wget 命令详解#
wget 是 Linux 系统中用于从网络下载文件的命令行工具,支持 HTTP、HTTPS 和 FTP 协议。与 curl 不同,wget 专注于文件下载,具有递归下载、断点续传、后台下载等强大功能,是系统管理员和开发人员常用的下载工具。
入门#
基本用法#
# 下载文件
wget https://example.com/file.zip
# 下载并指定文件名
wget -O newname.zip https://example.com/file.zip
# 下载到指定目录
wget -P /path/to/directory https://example.com/file.zip
# 后台下载
wget -b https://example.com/largefile.zip常用选项#
| 选项 | 说明 |
|---|---|
-O | 指定输出文件名 |
-P | 指定下载目录 |
-b | 后台下载 |
-c | 断点续传 |
-r | 递归下载 |
-l | 设置递归深度 |
-k | 转换链接为本地链接 |
-p | 下载页面所需的所有资源 |
-q | 静默模式 |
-t | 设置重试次数 |
基本示例#
# 下载单个文件
wget https://example.com/file.zip
# 下载并重命名
wget -O myfile.zip https://example.com/file.zip
# 下载到指定目录
wget -P /tmp https://example.com/file.zip
# 后台下载
wget -b https://example.com/largefile.zip
# 查看后台下载进度
tail -f wget-log中级#
断点续传#
# 断点续传下载
wget -c https://example.com/largefile.zip
# 下载中断后继续
wget -c https://example.com/largefile.zip
# 限制重试次数
wget -t 3 https://example.com/file.zip
# 无限重试
wget -t 0 https://example.com/file.zip递归下载#
# 递归下载网站
wget -r https://example.com/
# 限制递归深度
wget -r -l 2 https://example.com/
# 下载页面及所有资源
wget -p https://example.com/page.html
# 转换链接为本地链接
wget -k https://example.com/page.html
# 递归下载并转换链接
wget -r -k https://example.com/限速和带宽控制#
# 限制下载速度(字节/秒)
wget --limit-rate=200k https://example.com/file.zip
# 限制下载速度(KB/s)
wget --limit-rate=200K https://example.com/file.zip
# 限制下载速度(MB/s)
wget --limit-rate=2M https://example.com/file.zip
# 设置等待时间
wget --wait=5 https://example.com/file.zip
# 随机等待时间
wget --random-wait https://example.com/file.zip高级#
高级下载选项#
# 设置用户代理
wget -U "Mozilla/5.0" https://example.com/file.zip
# 添加请求头
wget --header="Accept: application/json" https://example.com/file.zip
# 添加多个请求头
wget --header="Accept: application/json" --header="Authorization: Bearer token" https://example.com/file.zip
# 使用代理
wget -e "https_proxy=http://proxy.example.com:8080" https://example.com/file.zip
# 忽略 SSL 证书验证
wget --no-check-certificate https://example.com/file.zip
# 使用客户端证书
wget --certificate=client.crt --private-key=client.key https://example.com/file.zip认证和登录#
# HTTP 基本认证
wget --user=username --password=password https://example.com/file.zip
# 从文件读取密码
wget --user=username --password-file=password.txt https://example.com/file.zip
# FTP 认证
wget --ftp-user=username --ftp-password=password ftp://example.com/file.zip
# 使用配置文件
echo "user=username:password" > ~/.wgetrc
wget https://example.com/file.zip日志和调试#
# 显示详细输出
wget -v https://example.com/file.zip
# 静默模式
wget -q https://example.com/file.zip
# 记录日志
wget -o download.log https://example.com/file.zip
# 追加日志
wget -a download.log https://example.com/file.zip
# 显示服务器响应
wget -S https://example.com/file.zip
# 调试模式
wget -d https://example.com/file.zip大师#
网站镜像#
#!/bin/bash
# 网站镜像脚本
SOURCE_URL="https://example.com/"
OUTPUT_DIR="mirror_output"
# 完整镜像网站
mirror_website() {
wget \
--mirror \
--convert-links \
--adjust-extension \
--page-requisites \
--no-parent \
--wait=1 \
--random-wait \
--limit-rate=200k \
-P "$OUTPUT_DIR" \
"$SOURCE_URL"
echo "Website mirrored to: $OUTPUT_DIR"
}
# 镜像特定目录
mirror_directory() {
local directory=$1
wget \
--mirror \
--convert-links \
--adjust-extension \
--page-requisites \
--no-parent \
-P "$OUTPUT_DIR" \
"$SOURCE_URL$directory"
echo "Directory $directory mirrored to: $OUTPUT_DIR"
}
# 镜像并排除特定文件
mirror_with_exclusions() {
local exclusions=$1
wget \
--mirror \
--convert-links \
--adjust-extension \
--page-requisites \
--no-parent \
--reject="$exclusions" \
-P "$OUTPUT_DIR" \
"$SOURCE_URL"
echo "Website mirrored (excluding: $exclusions) to: $OUTPUT_DIR"
}
# 主函数
main() {
case "$1" in
full)
mirror_website
;;
directory)
mirror_directory "$2"
;;
exclude)
mirror_with_exclusions "$2"
;;
*)
echo "Usage: $0 {full|directory|exclude}"
exit 1
;;
esac
}
main "$@"批量下载#
#!/bin/bash
# 批量下载脚本
URL_LIST="url_list.txt"
OUTPUT_DIR="downloads"
LOG_FILE="download.log"
mkdir -p $OUTPUT_DIR
# 从列表下载
download_from_list() {
local list_file=$1
while IFS= read -r url; do
if [ -n "$url" ] && [[ ! $url =~ ^# ]]; then
echo "Downloading: $url"
wget -P "$OUTPUT_DIR" -a "$LOG_FILE" "$url"
fi
done < "$list_file"
}
# 按模式下载
download_by_pattern() {
local base_url=$1
local pattern=$2
local start=$3
local end=$4
for ((i=start; i<=end; i++)); do
local url=$(printf "$base_url$pattern" $i)
echo "Downloading: $url"
wget -P "$OUTPUT_DIR" -a "$LOG_FILE" "$url"
done
}
# 并行下载
parallel_download() {
local max_parallel=$1
shift
local urls=("$@")
local count=0
for url in "${urls[@]}"; do
wget -P "$OUTPUT_DIR" -a "$LOG_FILE" "$url" &
count=$((count + 1))
if [ $count -ge $max_parallel ]; then
wait
count=0
fi
done
wait
}
# 下载失败重试
retry_failed_downloads() {
local log_file=$1
grep "failed" "$log_file" | awk '{print $3}' | sort -u | while read url; do
echo "Retrying: $url"
wget -c -P "$OUTPUT_DIR" -a "$LOG_FILE" "$url"
done
}
# 主函数
main() {
case "$1" in
list)
download_from_list "$2"
;;
pattern)
download_by_pattern "$2" "$3" "$4" "$5"
;;
parallel)
parallel_download "$2" "${@:3}"
;;
retry)
retry_failed_downloads "$2"
;;
*)
echo "Usage: $0 {list|pattern|parallel|retry}"
exit 1
;;
esac
}
main "$@"定时下载#
#!/bin/bash
# 定时下载脚本
DOWNLOAD_CONFIG="download_schedule.conf"
LOG_DIR="download_logs"
mkdir -p $LOG_DIR
# 执行下载任务
execute_download_task() {
local task_name=$1
local url=$2
local output_dir=$3
local options=$4
local log_file="$LOG_DIR/${task_name}_$(date +%Y%m%d_%H%M%S).log"
echo "Executing task: $task_name"
echo "URL: $url"
echo "Output: $output_dir"
mkdir -p "$output_dir"
wget $options -P "$output_dir" -o "$log_file" "$url"
if [ $? -eq 0 ]; then
echo "✓ Task $task_name completed successfully"
return 0
else
echo "✗ Task $task_name failed"
return 1
fi
}
# 加载并执行计划任务
load_and_execute_tasks() {
if [ ! -f "$DOWNLOAD_CONFIG" ]; then
echo "Config file not found: $DOWNLOAD_CONFIG"
return 1
fi
while IFS='|' read -r task_name url output_dir options schedule; do
if [ -n "$task_name" ] && [[ ! $task_name =~ ^# ]]; then
execute_download_task "$task_name" "$url" "$output_dir" "$options"
fi
done < "$DOWNLOAD_CONFIG"
}
# 监控下载任务
monitor_downloads() {
echo "Monitoring download tasks..."
while true; do
local running_downloads=$(ps aux | grep wget | grep -v grep | wc -l)
if [ $running_downloads -gt 0 ]; then
echo "Active downloads: $running_downloads"
fi
sleep 60
done
}
# 生成下载报告
generate_download_report() {
local report_file="download_report_$(date +%Y%m%d).txt"
echo "Download Report - $(date +%Y-%m-%d)" > $report_file
echo "=============================" >> $report_file
echo "" >> $report_file
echo "Download Statistics:" >> $report_file
echo "Total downloads: $(ls $LOG_DIR/*.log 2>/dev/null | wc -l)" >> $report_file
echo "Successful: $(grep -l "saved" $LOG_DIR/*.log 2>/dev/null | wc -l)" >> $report_file
echo "Failed: $(grep -l "failed" $LOG_DIR/*.log 2>/dev/null | wc -l)" >> $report_file
echo "" >> $report_file
echo "Recent Downloads:" >> $report_file
ls -lt $LOG_DIR/*.log 2>/dev/null | head -10 >> $report_file
echo "Report saved to: $report_file"
}
# 主函数
main() {
case "$1" in
execute)
load_and_execute_tasks
;;
monitor)
monitor_downloads
;;
report)
generate_download_report
;;
*)
echo "Usage: $0 {execute|monitor|report}"
exit 1
;;
esac
}
main "$@"无敌#
企业级下载管理系统#
#!/bin/bash
# 企业级下载管理系统
CONFIG_DIR="/etc/download_manager"
LOG_DIR="/var/log/download_manager"
DOWNLOAD_DIR="/var/downloads"
QUEUE_FILE="/var/run/download_manager/queue"
STATUS_FILE="/var/run/download_manager/status"
mkdir -p $CONFIG_DIR $LOG_DIR $DOWNLOAD_DIR /var/run/download_manager
# 初始化队列
init_queue() {
if [ ! -f "$QUEUE_FILE" ]; then
touch "$QUEUE_FILE"
fi
}
# 添加下载任务
add_download_task() {
local url=$1
local output_dir=$2
local priority=$3
local options=$4
local task_id=$(date +%s%N)
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo "$task_id|$timestamp|$url|$output_dir|$priority|$options|pending" >> "$QUEUE_FILE"
echo "Task added: $task_id"
return $task_id
}
# 获取下一个任务
get_next_task() {
if [ ! -f "$QUEUE_FILE" ]; then
return 1
fi
# 按优先级排序并获取第一个 pending 任务
local task=$(sort -t'|' -k5 -n "$QUEUE_FILE" | grep "|pending$" | head -1)
if [ -n "$task" ]; then
echo "$task"
return 0
else
return 1
fi
}
# 更新任务状态
update_task_status() {
local task_id=$1
local status=$2
sed -i "s/^$task_id|.*|$status$/$task_id|$(date '+%Y-%m-%d %H:%M:%S')|||$status/" "$QUEUE_FILE"
}
# 执行下载任务
execute_task() {
local task=$1
local task_id=$(echo "$task" | cut -d'|' -f1)
local url=$(echo "$task" | cut -d'|' -f3)
local output_dir=$(echo "$task" | cut -d'|' -f4)
local options=$(echo "$task" | cut -d'|' -f6)
local log_file="$LOG_DIR/task_${task_id}.log"
echo "Executing task: $task_id"
echo "URL: $url"
echo "Output: $output_dir"
mkdir -p "$output_dir"
update_task_status "$task_id" "running"
wget $options -P "$output_dir" -o "$log_file" "$url"
if [ $? -eq 0 ]; then
update_task_status "$task_id" "completed"
echo "✓ Task $task_id completed successfully"
else
update_task_status "$task_id" "failed"
echo "✗ Task $task_id failed"
fi
}
# 下载工作进程
download_worker() {
local worker_id=$1
echo "Worker $worker_id started"
while true; do
local task=$(get_next_task)
if [ -n "$task" ]; then
local task_id=$(echo "$task" | cut -d'|' -f1)
update_task_status "$task_id" "running"
execute_task "$task"
else
sleep 5
fi
done
}
# 启动多个工作进程
start_workers() {
local worker_count=$1
for ((i=1; i<=worker_count; i++)); do
download_worker $i &
done
wait
}
# 查看任务状态
view_task_status() {
local task_id=$1
if [ -z "$task_id" ]; then
echo "=== All Tasks ==="
cat "$QUEUE_FILE"
else
echo "=== Task $task_id ==="
grep "^$task_id|" "$QUEUE_FILE"
fi
}
# 取消任务
cancel_task() {
local task_id=$1
local task=$(grep "^$task_id|" "$QUEUE_FILE")
local status=$(echo "$task" | cut -d'|' -f7)
if [ "$status" = "pending" ] || [ "$status" = "running" ]; then
update_task_status "$task_id" "cancelled"
echo "Task $task_id cancelled"
else
echo "Cannot cancel task with status: $status"
fi
}
# 生成统计报告
generate_statistics() {
echo "=== Download Statistics ==="
echo ""
local total=$(wc -l < "$QUEUE_FILE")
local pending=$(grep "|pending$" "$QUEUE_FILE" | wc -l)
local running=$(grep "|running$" "$QUEUE_FILE" | wc -l)
local completed=$(grep "|completed$" "$QUEUE_FILE" | wc -l)
local failed=$(grep "|failed$" "$QUEUE_FILE" | wc -l)
local cancelled=$(grep "|cancelled$" "$QUEUE_FILE" | wc -l)
echo "Total tasks: $total"
echo "Pending: $pending"
echo "Running: $running"
echo "Completed: $completed"
echo "Failed: $failed"
echo "Cancelled: $cancelled"
echo ""
echo "Success rate: $(echo "scale=2; $completed * 100 / ($completed + $failed)" | bc)%"
}
# 主函数
main() {
init_queue
case "$1" in
add)
add_download_task "$2" "$3" "$4" "$5"
;;
start)
start_workers "${2:-3}"
;;
status)
view_task_status "$2"
;;
cancel)
cancel_task "$2"
;;
stats)
generate_statistics
;;
*)
echo "Usage: $0 {add|start|status|cancel|stats}"
exit 1
;;
esac
}
main "$@"智能下载调度器#
#!/bin/bash
# 智能下载调度器
CONFIG_FILE="scheduler.conf"
LOG_FILE="scheduler.log"
STATE_FILE="scheduler.state"
# 加载配置
load_config() {
if [ -f "$CONFIG_FILE" ]; then
source $CONFIG_FILE
else
echo "Config file not found: $CONFIG_FILE"
exit 1
fi
}
# 检查网络状态
check_network_status() {
local test_host="8.8.8.8"
if ping -c 1 -W 2 $test_host > /dev/null 2>&1; then
return 0
else
return 1
fi
}
# 检查系统负载
check_system_load() {
local load=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | cut -d',' -f1)
local max_load=${MAX_LOAD:-2.0}
if (( $(echo "$load < $max_load" | bc -l) )); then
return 0
else
return 1
fi
}
# 检查磁盘空间
check_disk_space() {
local download_dir=$1
local min_space=${MIN_DISK_SPACE:-1073741824} # 1GB
local available_space=$(df "$download_dir" | awk 'NR==2 {print $4 * 1024}')
if [ $available_space -gt $min_space ]; then
return 0
else
return 1
fi
}
# 检查下载时间窗口
check_time_window() {
local current_hour=$(date +%H)
local start_hour=${START_HOUR:-0}
local end_hour=${END_HOUR:-6}
if [ $current_hour -ge $start_hour ] && [ $current_hour -lt $end_hour ]; then
return 0
else
return 1
fi
}
# 智能调度下载
schedule_download() {
local url=$1
local output_dir=$2
local task_name=$3
echo "Scheduling download: $task_name"
# 检查网络状态
if ! check_network_status; then
echo "Network not available, postponing download"
return 1
fi
# 检查系统负载
if ! check_system_load; then
echo "System load too high, postponing download"
return 1
fi
# 检查磁盘空间
if ! check_disk_space "$output_dir"; then
echo "Insufficient disk space, postponing download"
return 1
fi
# 检查时间窗口
if ! check_time_window; then
echo "Outside allowed time window, postponing download"
return 1
fi
# 执行下载
echo "Starting download: $task_name"
wget -P "$output_dir" -a "$LOG_FILE" "$url"
if [ $? -eq 0 ]; then
echo "✓ Download completed: $task_name"
return 0
else
echo "✗ Download failed: $task_name"
return 1
fi
}
# 主调度循环
main_scheduler() {
load_config
echo "Starting download scheduler..."
echo "Log file: $LOG_FILE"
while true; do
# 从配置文件读取下载任务
while IFS='|' read -r task_name url output_dir options; do
if [ -n "$task_name" ] && [[ ! $task_name =~ ^# ]]; then
schedule_download "$url" "$output_dir" "$task_name"
fi
done < "$CONFIG_FILE"
# 等待下次检查
sleep ${CHECK_INTERVAL:-300}
done
}
# 单次执行模式
single_run() {
load_config
while IFS='|' read -r task_name url output_dir options; do
if [ -n "$task_name" ] && [[ ! $task_name =~ ^# ]]; then
schedule_download "$url" "$output_dir" "$task_name"
fi
done < "$CONFIG_FILE"
}
# 主函数
main() {
case "$1" in
start)
main_scheduler
;;
run)
single_run
;;
*)
echo "Usage: $0 {start|run}"
exit 1
;;
esac
}
main "$@"下载内容验证系统#
#!/bin/bash
# 下载内容验证系统
DOWNLOAD_DIR="downloads"
VERIFY_LOG="verify.log"
# 计算文件哈希
calculate_hash() {
local file=$1
local algorithm=${2:-md5}
case "$algorithm" in
md5)
md5sum "$file" | awk '{print $1}'
;;
sha1)
sha1sum "$file" | awk '{print $1}'
;;
sha256)
sha256sum "$file" | awk '{print $1}'
;;
*)
echo "Unsupported algorithm: $algorithm"
return 1
;;
esac
}
# 验证文件哈希
verify_hash() {
local file=$1
local expected_hash=$2
local algorithm=${3:-md5}
local actual_hash=$(calculate_hash "$file" "$algorithm")
if [ "$actual_hash" = "$expected_hash" ]; then
echo "✓ Hash verification passed for $file"
return 0
else
echo "✗ Hash verification failed for $file"
echo " Expected: $expected_hash"
echo " Actual: $actual_hash"
return 1
fi
}
# 从哈希文件验证
verify_from_hash_file() {
local hash_file=$1
local download_dir=$2
local algorithm=${3:-md5}
if [ ! -f "$hash_file" ]; then
echo "Hash file not found: $hash_file"
return 1
fi
echo "Verifying files from $hash_file..."
local verified=0
local failed=0
while read -r expected_hash filename; do
local file="$download_dir/$filename"
if [ -f "$file" ]; then
if verify_hash "$file" "$expected_hash" "$algorithm"; then
verified=$((verified + 1))
else
failed=$((failed + 1))
fi
else
echo "✗ File not found: $file"
failed=$((failed + 1))
fi
done < "$hash_file"
echo ""
echo "Verification Summary:"
echo "Verified: $verified"
echo "Failed: $failed"
return $failed
}
# 下载并验证
download_and_verify() {
local url=$1
local output_file=$2
local expected_hash=$3
local algorithm=${4:-md5}
echo "Downloading: $url"
wget -O "$output_file" "$url"
if [ $? -eq 0 ]; then
echo "Download completed, verifying..."
verify_hash "$output_file" "$expected_hash" "$algorithm"
return $?
else
echo "✗ Download failed"
return 1
fi
}
# 批量下载并验证
batch_download_and_verify() {
local manifest_file=$1
local download_dir=$2
if [ ! -f "$manifest_file" ]; then
echo "Manifest file not found: $manifest_file"
return 1
fi
mkdir -p "$download_dir"
echo "Batch downloading and verifying..."
local success=0
local failed=0
while IFS='|' read -r url filename expected_hash algorithm; do
if [ -n "$url" ] && [[ ! $url =~ ^# ]]; then
local output_file="$download_dir/$filename"
if download_and_verify "$url" "$output_file" "$expected_hash" "$algorithm"; then
success=$((success + 1))
else
failed=$((failed + 1))
fi
fi
done < "$manifest_file"
echo ""
echo "Batch Download Summary:"
echo "Success: $success"
echo "Failed: $failed"
return $failed
}
# 生成验证报告
generate_verification_report() {
local download_dir=$1
local report_file="verification_report_$(date +%Y%m%d_%H%M%S).txt"
echo "Verification Report - $(date)" > $report_file
echo "=========================" >> $report_file
echo "" >> $report_file
echo "Files in $download_dir:" >> $report_file
ls -lh "$download_dir" >> $report_file
echo "" >> $report_file
echo "File Hashes:" >> $report_file
find "$download_dir" -type f | while read file; do
local filename=$(basename "$file")
local hash=$(calculate_hash "$file" "md5")
echo "$hash $filename" >> $report_file
done
echo "Report saved to: $report_file"
}
# 主函数
main() {
case "$1" in
verify)
verify_hash "$2" "$3" "$4"
;;
verify-file)
verify_from_hash_file "$2" "$3" "$4"
;;
download)
download_and_verify "$2" "$3" "$4" "$5"
;;
batch)
batch_download_and_verify "$2" "$3"
;;
report)
generate_verification_report "$2"
;;
*)
echo "Usage: $0 {verify|verify-file|download|batch|report}"
exit 1
;;
esac
}
main "$@"最佳实践#
- 使用断点续传:对于大文件下载,始终使用
-c选项 - 限制下载速度:避免对服务器造成过大压力
- 设置合理的等待时间:在批量下载时设置适当的等待时间
- 使用后台下载:对于长时间下载任务使用
-b选项 - 验证下载内容:下载后验证文件完整性
- 记录日志:记录下载过程便于故障排查
- 使用配置文件:对于复杂的下载任务使用配置文件管理
- 遵守 robots.txt:递归下载时遵守网站的 robots.txt 规则
注意事项#
- wget 默认会递归下载,注意设置合适的递归深度
- 在生产环境中使用递归下载时要格外小心
- 注意版权和法律法规,不要下载非法内容
- 大文件下载时注意磁盘空间
- 使用代理时注意代理服务器的安全性
- 下载敏感数据时注意保护认证信息
- 不同版本的 wget 选项可能有所不同
- 在自动化脚本中添加适当的错误处理