wget 命令详解#

wget 是 Linux 系统中用于从网络下载文件的命令行工具,支持 HTTP、HTTPS 和 FTP 协议。与 curl 不同,wget 专注于文件下载,具有递归下载、断点续传、后台下载等强大功能,是系统管理员和开发人员常用的下载工具。

入门#

基本用法#

# 下载文件
wget https://example.com/file.zip

# 下载并指定文件名
wget -O newname.zip https://example.com/file.zip

# 下载到指定目录
wget -P /path/to/directory https://example.com/file.zip

# 后台下载
wget -b https://example.com/largefile.zip

常用选项#

选项说明
-O指定输出文件名
-P指定下载目录
-b后台下载
-c断点续传
-r递归下载
-l设置递归深度
-k转换链接为本地链接
-p下载页面所需的所有资源
-q静默模式
-t设置重试次数

基本示例#

# 下载单个文件
wget https://example.com/file.zip

# 下载并重命名
wget -O myfile.zip https://example.com/file.zip

# 下载到指定目录
wget -P /tmp https://example.com/file.zip

# 后台下载
wget -b https://example.com/largefile.zip

# 查看后台下载进度
tail -f wget-log

中级#

断点续传#

# 断点续传下载
wget -c https://example.com/largefile.zip

# 下载中断后继续
wget -c https://example.com/largefile.zip

# 限制重试次数
wget -t 3 https://example.com/file.zip

# 无限重试
wget -t 0 https://example.com/file.zip

递归下载#

# 递归下载网站
wget -r https://example.com/

# 限制递归深度
wget -r -l 2 https://example.com/

# 下载页面及所有资源
wget -p https://example.com/page.html

# 转换链接为本地链接
wget -k https://example.com/page.html

# 递归下载并转换链接
wget -r -k https://example.com/

限速和带宽控制#

# 限制下载速度(字节/秒)
wget --limit-rate=200k https://example.com/file.zip

# 限制下载速度(KB/s)
wget --limit-rate=200K https://example.com/file.zip

# 限制下载速度(MB/s)
wget --limit-rate=2M https://example.com/file.zip

# 设置等待时间
wget --wait=5 https://example.com/file.zip

# 随机等待时间
wget --random-wait https://example.com/file.zip

高级#

高级下载选项#

# 设置用户代理
wget -U "Mozilla/5.0" https://example.com/file.zip

# 添加请求头
wget --header="Accept: application/json" https://example.com/file.zip

# 添加多个请求头
wget --header="Accept: application/json" --header="Authorization: Bearer token" https://example.com/file.zip

# 使用代理
wget -e "https_proxy=http://proxy.example.com:8080" https://example.com/file.zip

# 忽略 SSL 证书验证
wget --no-check-certificate https://example.com/file.zip

# 使用客户端证书
wget --certificate=client.crt --private-key=client.key https://example.com/file.zip

认证和登录#

# HTTP 基本认证
wget --user=username --password=password https://example.com/file.zip

# 从文件读取密码
wget --user=username --password-file=password.txt https://example.com/file.zip

# FTP 认证
wget --ftp-user=username --ftp-password=password ftp://example.com/file.zip

# 使用配置文件
echo "user=username:password" > ~/.wgetrc
wget https://example.com/file.zip

日志和调试#

# 显示详细输出
wget -v https://example.com/file.zip

# 静默模式
wget -q https://example.com/file.zip

# 记录日志
wget -o download.log https://example.com/file.zip

# 追加日志
wget -a download.log https://example.com/file.zip

# 显示服务器响应
wget -S https://example.com/file.zip

# 调试模式
wget -d https://example.com/file.zip

大师#

网站镜像#

#!/bin/bash
# 网站镜像脚本

SOURCE_URL="https://example.com/"
OUTPUT_DIR="mirror_output"

# 完整镜像网站
mirror_website() {
    wget \
        --mirror \
        --convert-links \
        --adjust-extension \
        --page-requisites \
        --no-parent \
        --wait=1 \
        --random-wait \
        --limit-rate=200k \
        -P "$OUTPUT_DIR" \
        "$SOURCE_URL"
    
    echo "Website mirrored to: $OUTPUT_DIR"
}

# 镜像特定目录
mirror_directory() {
    local directory=$1
    
    wget \
        --mirror \
        --convert-links \
        --adjust-extension \
        --page-requisites \
        --no-parent \
        -P "$OUTPUT_DIR" \
        "$SOURCE_URL$directory"
    
    echo "Directory $directory mirrored to: $OUTPUT_DIR"
}

# 镜像并排除特定文件
mirror_with_exclusions() {
    local exclusions=$1
    
    wget \
        --mirror \
        --convert-links \
        --adjust-extension \
        --page-requisites \
        --no-parent \
        --reject="$exclusions" \
        -P "$OUTPUT_DIR" \
        "$SOURCE_URL"
    
    echo "Website mirrored (excluding: $exclusions) to: $OUTPUT_DIR"
}

# 主函数
main() {
    case "$1" in
        full)
            mirror_website
            ;;
        directory)
            mirror_directory "$2"
            ;;
        exclude)
            mirror_with_exclusions "$2"
            ;;
        *)
            echo "Usage: $0 {full|directory|exclude}"
            exit 1
            ;;
    esac
}

main "$@"

批量下载#

#!/bin/bash
# 批量下载脚本

URL_LIST="url_list.txt"
OUTPUT_DIR="downloads"
LOG_FILE="download.log"

mkdir -p $OUTPUT_DIR

# 从列表下载
download_from_list() {
    local list_file=$1
    
    while IFS= read -r url; do
        if [ -n "$url" ] && [[ ! $url =~ ^# ]]; then
            echo "Downloading: $url"
            wget -P "$OUTPUT_DIR" -a "$LOG_FILE" "$url"
        fi
    done < "$list_file"
}

# 按模式下载
download_by_pattern() {
    local base_url=$1
    local pattern=$2
    local start=$3
    local end=$4
    
    for ((i=start; i<=end; i++)); do
        local url=$(printf "$base_url$pattern" $i)
        echo "Downloading: $url"
        wget -P "$OUTPUT_DIR" -a "$LOG_FILE" "$url"
    done
}

# 并行下载
parallel_download() {
    local max_parallel=$1
    shift
    local urls=("$@")
    
    local count=0
    for url in "${urls[@]}"; do
        wget -P "$OUTPUT_DIR" -a "$LOG_FILE" "$url" &
        count=$((count + 1))
        
        if [ $count -ge $max_parallel ]; then
            wait
            count=0
        fi
    done
    
    wait
}

# 下载失败重试
retry_failed_downloads() {
    local log_file=$1
    
    grep "failed" "$log_file" | awk '{print $3}' | sort -u | while read url; do
        echo "Retrying: $url"
        wget -c -P "$OUTPUT_DIR" -a "$LOG_FILE" "$url"
    done
}

# 主函数
main() {
    case "$1" in
        list)
            download_from_list "$2"
            ;;
        pattern)
            download_by_pattern "$2" "$3" "$4" "$5"
            ;;
        parallel)
            parallel_download "$2" "${@:3}"
            ;;
        retry)
            retry_failed_downloads "$2"
            ;;
        *)
            echo "Usage: $0 {list|pattern|parallel|retry}"
            exit 1
            ;;
    esac
}

main "$@"

定时下载#

#!/bin/bash
# 定时下载脚本

DOWNLOAD_CONFIG="download_schedule.conf"
LOG_DIR="download_logs"

mkdir -p $LOG_DIR

# 执行下载任务
execute_download_task() {
    local task_name=$1
    local url=$2
    local output_dir=$3
    local options=$4
    
    local log_file="$LOG_DIR/${task_name}_$(date +%Y%m%d_%H%M%S).log"
    
    echo "Executing task: $task_name"
    echo "URL: $url"
    echo "Output: $output_dir"
    
    mkdir -p "$output_dir"
    
    wget $options -P "$output_dir" -o "$log_file" "$url"
    
    if [ $? -eq 0 ]; then
        echo "✓ Task $task_name completed successfully"
        return 0
    else
        echo "✗ Task $task_name failed"
        return 1
    fi
}

# 加载并执行计划任务
load_and_execute_tasks() {
    if [ ! -f "$DOWNLOAD_CONFIG" ]; then
        echo "Config file not found: $DOWNLOAD_CONFIG"
        return 1
    fi
    
    while IFS='|' read -r task_name url output_dir options schedule; do
        if [ -n "$task_name" ] && [[ ! $task_name =~ ^# ]]; then
            execute_download_task "$task_name" "$url" "$output_dir" "$options"
        fi
    done < "$DOWNLOAD_CONFIG"
}

# 监控下载任务
monitor_downloads() {
    echo "Monitoring download tasks..."
    
    while true; do
        local running_downloads=$(ps aux | grep wget | grep -v grep | wc -l)
        
        if [ $running_downloads -gt 0 ]; then
            echo "Active downloads: $running_downloads"
        fi
        
        sleep 60
    done
}

# 生成下载报告
generate_download_report() {
    local report_file="download_report_$(date +%Y%m%d).txt"
    
    echo "Download Report - $(date +%Y-%m-%d)" > $report_file
    echo "=============================" >> $report_file
    echo "" >> $report_file
    
    echo "Download Statistics:" >> $report_file
    echo "Total downloads: $(ls $LOG_DIR/*.log 2>/dev/null | wc -l)" >> $report_file
    echo "Successful: $(grep -l "saved" $LOG_DIR/*.log 2>/dev/null | wc -l)" >> $report_file
    echo "Failed: $(grep -l "failed" $LOG_DIR/*.log 2>/dev/null | wc -l)" >> $report_file
    echo "" >> $report_file
    
    echo "Recent Downloads:" >> $report_file
    ls -lt $LOG_DIR/*.log 2>/dev/null | head -10 >> $report_file
    
    echo "Report saved to: $report_file"
}

# 主函数
main() {
    case "$1" in
        execute)
            load_and_execute_tasks
            ;;
        monitor)
            monitor_downloads
            ;;
        report)
            generate_download_report
            ;;
        *)
            echo "Usage: $0 {execute|monitor|report}"
            exit 1
            ;;
    esac
}

main "$@"

无敌#

企业级下载管理系统#

#!/bin/bash
# 企业级下载管理系统

CONFIG_DIR="/etc/download_manager"
LOG_DIR="/var/log/download_manager"
DOWNLOAD_DIR="/var/downloads"
QUEUE_FILE="/var/run/download_manager/queue"
STATUS_FILE="/var/run/download_manager/status"

mkdir -p $CONFIG_DIR $LOG_DIR $DOWNLOAD_DIR /var/run/download_manager

# 初始化队列
init_queue() {
    if [ ! -f "$QUEUE_FILE" ]; then
        touch "$QUEUE_FILE"
    fi
}

# 添加下载任务
add_download_task() {
    local url=$1
    local output_dir=$2
    local priority=$3
    local options=$4
    
    local task_id=$(date +%s%N)
    local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
    
    echo "$task_id|$timestamp|$url|$output_dir|$priority|$options|pending" >> "$QUEUE_FILE"
    
    echo "Task added: $task_id"
    return $task_id
}

# 获取下一个任务
get_next_task() {
    if [ ! -f "$QUEUE_FILE" ]; then
        return 1
    fi
    
    # 按优先级排序并获取第一个 pending 任务
    local task=$(sort -t'|' -k5 -n "$QUEUE_FILE" | grep "|pending$" | head -1)
    
    if [ -n "$task" ]; then
        echo "$task"
        return 0
    else
        return 1
    fi
}

# 更新任务状态
update_task_status() {
    local task_id=$1
    local status=$2
    
    sed -i "s/^$task_id|.*|$status$/$task_id|$(date '+%Y-%m-%d %H:%M:%S')|||$status/" "$QUEUE_FILE"
}

# 执行下载任务
execute_task() {
    local task=$1
    
    local task_id=$(echo "$task" | cut -d'|' -f1)
    local url=$(echo "$task" | cut -d'|' -f3)
    local output_dir=$(echo "$task" | cut -d'|' -f4)
    local options=$(echo "$task" | cut -d'|' -f6)
    
    local log_file="$LOG_DIR/task_${task_id}.log"
    
    echo "Executing task: $task_id"
    echo "URL: $url"
    echo "Output: $output_dir"
    
    mkdir -p "$output_dir"
    
    update_task_status "$task_id" "running"
    
    wget $options -P "$output_dir" -o "$log_file" "$url"
    
    if [ $? -eq 0 ]; then
        update_task_status "$task_id" "completed"
        echo "✓ Task $task_id completed successfully"
    else
        update_task_status "$task_id" "failed"
        echo "✗ Task $task_id failed"
    fi
}

# 下载工作进程
download_worker() {
    local worker_id=$1
    
    echo "Worker $worker_id started"
    
    while true; do
        local task=$(get_next_task)
        
        if [ -n "$task" ]; then
            local task_id=$(echo "$task" | cut -d'|' -f1)
            update_task_status "$task_id" "running"
            execute_task "$task"
        else
            sleep 5
        fi
    done
}

# 启动多个工作进程
start_workers() {
    local worker_count=$1
    
    for ((i=1; i<=worker_count; i++)); do
        download_worker $i &
    done
    
    wait
}

# 查看任务状态
view_task_status() {
    local task_id=$1
    
    if [ -z "$task_id" ]; then
        echo "=== All Tasks ==="
        cat "$QUEUE_FILE"
    else
        echo "=== Task $task_id ==="
        grep "^$task_id|" "$QUEUE_FILE"
    fi
}

# 取消任务
cancel_task() {
    local task_id=$1
    
    local task=$(grep "^$task_id|" "$QUEUE_FILE")
    local status=$(echo "$task" | cut -d'|' -f7)
    
    if [ "$status" = "pending" ] || [ "$status" = "running" ]; then
        update_task_status "$task_id" "cancelled"
        echo "Task $task_id cancelled"
    else
        echo "Cannot cancel task with status: $status"
    fi
}

# 生成统计报告
generate_statistics() {
    echo "=== Download Statistics ==="
    echo ""
    
    local total=$(wc -l < "$QUEUE_FILE")
    local pending=$(grep "|pending$" "$QUEUE_FILE" | wc -l)
    local running=$(grep "|running$" "$QUEUE_FILE" | wc -l)
    local completed=$(grep "|completed$" "$QUEUE_FILE" | wc -l)
    local failed=$(grep "|failed$" "$QUEUE_FILE" | wc -l)
    local cancelled=$(grep "|cancelled$" "$QUEUE_FILE" | wc -l)
    
    echo "Total tasks: $total"
    echo "Pending: $pending"
    echo "Running: $running"
    echo "Completed: $completed"
    echo "Failed: $failed"
    echo "Cancelled: $cancelled"
    echo ""
    
    echo "Success rate: $(echo "scale=2; $completed * 100 / ($completed + $failed)" | bc)%"
}

# 主函数
main() {
    init_queue
    
    case "$1" in
        add)
            add_download_task "$2" "$3" "$4" "$5"
            ;;
        start)
            start_workers "${2:-3}"
            ;;
        status)
            view_task_status "$2"
            ;;
        cancel)
            cancel_task "$2"
            ;;
        stats)
            generate_statistics
            ;;
        *)
            echo "Usage: $0 {add|start|status|cancel|stats}"
            exit 1
            ;;
    esac
}

main "$@"

智能下载调度器#

#!/bin/bash
# 智能下载调度器

CONFIG_FILE="scheduler.conf"
LOG_FILE="scheduler.log"
STATE_FILE="scheduler.state"

# 加载配置
load_config() {
    if [ -f "$CONFIG_FILE" ]; then
        source $CONFIG_FILE
    else
        echo "Config file not found: $CONFIG_FILE"
        exit 1
    fi
}

# 检查网络状态
check_network_status() {
    local test_host="8.8.8.8"
    
    if ping -c 1 -W 2 $test_host > /dev/null 2>&1; then
        return 0
    else
        return 1
    fi
}

# 检查系统负载
check_system_load() {
    local load=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | cut -d',' -f1)
    local max_load=${MAX_LOAD:-2.0}
    
    if (( $(echo "$load < $max_load" | bc -l) )); then
        return 0
    else
        return 1
    fi
}

# 检查磁盘空间
check_disk_space() {
    local download_dir=$1
    local min_space=${MIN_DISK_SPACE:-1073741824} # 1GB
    
    local available_space=$(df "$download_dir" | awk 'NR==2 {print $4 * 1024}')
    
    if [ $available_space -gt $min_space ]; then
        return 0
    else
        return 1
    fi
}

# 检查下载时间窗口
check_time_window() {
    local current_hour=$(date +%H)
    local start_hour=${START_HOUR:-0}
    local end_hour=${END_HOUR:-6}
    
    if [ $current_hour -ge $start_hour ] && [ $current_hour -lt $end_hour ]; then
        return 0
    else
        return 1
    fi
}

# 智能调度下载
schedule_download() {
    local url=$1
    local output_dir=$2
    local task_name=$3
    
    echo "Scheduling download: $task_name"
    
    # 检查网络状态
    if ! check_network_status; then
        echo "Network not available, postponing download"
        return 1
    fi
    
    # 检查系统负载
    if ! check_system_load; then
        echo "System load too high, postponing download"
        return 1
    fi
    
    # 检查磁盘空间
    if ! check_disk_space "$output_dir"; then
        echo "Insufficient disk space, postponing download"
        return 1
    fi
    
    # 检查时间窗口
    if ! check_time_window; then
        echo "Outside allowed time window, postponing download"
        return 1
    fi
    
    # 执行下载
    echo "Starting download: $task_name"
    wget -P "$output_dir" -a "$LOG_FILE" "$url"
    
    if [ $? -eq 0 ]; then
        echo "✓ Download completed: $task_name"
        return 0
    else
        echo "✗ Download failed: $task_name"
        return 1
    fi
}

# 主调度循环
main_scheduler() {
    load_config
    
    echo "Starting download scheduler..."
    echo "Log file: $LOG_FILE"
    
    while true; do
        # 从配置文件读取下载任务
        while IFS='|' read -r task_name url output_dir options; do
            if [ -n "$task_name" ] && [[ ! $task_name =~ ^# ]]; then
                schedule_download "$url" "$output_dir" "$task_name"
            fi
        done < "$CONFIG_FILE"
        
        # 等待下次检查
        sleep ${CHECK_INTERVAL:-300}
    done
}

# 单次执行模式
single_run() {
    load_config
    
    while IFS='|' read -r task_name url output_dir options; do
        if [ -n "$task_name" ] && [[ ! $task_name =~ ^# ]]; then
            schedule_download "$url" "$output_dir" "$task_name"
        fi
    done < "$CONFIG_FILE"
}

# 主函数
main() {
    case "$1" in
        start)
            main_scheduler
            ;;
        run)
            single_run
            ;;
        *)
            echo "Usage: $0 {start|run}"
            exit 1
            ;;
    esac
}

main "$@"

下载内容验证系统#

#!/bin/bash
# 下载内容验证系统

DOWNLOAD_DIR="downloads"
VERIFY_LOG="verify.log"

# 计算文件哈希
calculate_hash() {
    local file=$1
    local algorithm=${2:-md5}
    
    case "$algorithm" in
        md5)
            md5sum "$file" | awk '{print $1}'
            ;;
        sha1)
            sha1sum "$file" | awk '{print $1}'
            ;;
        sha256)
            sha256sum "$file" | awk '{print $1}'
            ;;
        *)
            echo "Unsupported algorithm: $algorithm"
            return 1
            ;;
    esac
}

# 验证文件哈希
verify_hash() {
    local file=$1
    local expected_hash=$2
    local algorithm=${3:-md5}
    
    local actual_hash=$(calculate_hash "$file" "$algorithm")
    
    if [ "$actual_hash" = "$expected_hash" ]; then
        echo "✓ Hash verification passed for $file"
        return 0
    else
        echo "✗ Hash verification failed for $file"
        echo "  Expected: $expected_hash"
        echo "  Actual: $actual_hash"
        return 1
    fi
}

# 从哈希文件验证
verify_from_hash_file() {
    local hash_file=$1
    local download_dir=$2
    local algorithm=${3:-md5}
    
    if [ ! -f "$hash_file" ]; then
        echo "Hash file not found: $hash_file"
        return 1
    fi
    
    echo "Verifying files from $hash_file..."
    
    local verified=0
    local failed=0
    
    while read -r expected_hash filename; do
        local file="$download_dir/$filename"
        
        if [ -f "$file" ]; then
            if verify_hash "$file" "$expected_hash" "$algorithm"; then
                verified=$((verified + 1))
            else
                failed=$((failed + 1))
            fi
        else
            echo "✗ File not found: $file"
            failed=$((failed + 1))
        fi
    done < "$hash_file"
    
    echo ""
    echo "Verification Summary:"
    echo "Verified: $verified"
    echo "Failed: $failed"
    
    return $failed
}

# 下载并验证
download_and_verify() {
    local url=$1
    local output_file=$2
    local expected_hash=$3
    local algorithm=${4:-md5}
    
    echo "Downloading: $url"
    wget -O "$output_file" "$url"
    
    if [ $? -eq 0 ]; then
        echo "Download completed, verifying..."
        verify_hash "$output_file" "$expected_hash" "$algorithm"
        return $?
    else
        echo "✗ Download failed"
        return 1
    fi
}

# 批量下载并验证
batch_download_and_verify() {
    local manifest_file=$1
    local download_dir=$2
    
    if [ ! -f "$manifest_file" ]; then
        echo "Manifest file not found: $manifest_file"
        return 1
    fi
    
    mkdir -p "$download_dir"
    
    echo "Batch downloading and verifying..."
    
    local success=0
    local failed=0
    
    while IFS='|' read -r url filename expected_hash algorithm; do
        if [ -n "$url" ] && [[ ! $url =~ ^# ]]; then
            local output_file="$download_dir/$filename"
            
            if download_and_verify "$url" "$output_file" "$expected_hash" "$algorithm"; then
                success=$((success + 1))
            else
                failed=$((failed + 1))
            fi
        fi
    done < "$manifest_file"
    
    echo ""
    echo "Batch Download Summary:"
    echo "Success: $success"
    echo "Failed: $failed"
    
    return $failed
}

# 生成验证报告
generate_verification_report() {
    local download_dir=$1
    local report_file="verification_report_$(date +%Y%m%d_%H%M%S).txt"
    
    echo "Verification Report - $(date)" > $report_file
    echo "=========================" >> $report_file
    echo "" >> $report_file
    
    echo "Files in $download_dir:" >> $report_file
    ls -lh "$download_dir" >> $report_file
    echo "" >> $report_file
    
    echo "File Hashes:" >> $report_file
    find "$download_dir" -type f | while read file; do
        local filename=$(basename "$file")
        local hash=$(calculate_hash "$file" "md5")
        echo "$hash  $filename" >> $report_file
    done
    
    echo "Report saved to: $report_file"
}

# 主函数
main() {
    case "$1" in
        verify)
            verify_hash "$2" "$3" "$4"
            ;;
        verify-file)
            verify_from_hash_file "$2" "$3" "$4"
            ;;
        download)
            download_and_verify "$2" "$3" "$4" "$5"
            ;;
        batch)
            batch_download_and_verify "$2" "$3"
            ;;
        report)
            generate_verification_report "$2"
            ;;
        *)
            echo "Usage: $0 {verify|verify-file|download|batch|report}"
            exit 1
            ;;
    esac
}

main "$@"

最佳实践#

  1. 使用断点续传:对于大文件下载,始终使用 -c 选项
  2. 限制下载速度:避免对服务器造成过大压力
  3. 设置合理的等待时间:在批量下载时设置适当的等待时间
  4. 使用后台下载:对于长时间下载任务使用 -b 选项
  5. 验证下载内容:下载后验证文件完整性
  6. 记录日志:记录下载过程便于故障排查
  7. 使用配置文件:对于复杂的下载任务使用配置文件管理
  8. 遵守 robots.txt:递归下载时遵守网站的 robots.txt 规则

注意事项#

  • wget 默认会递归下载,注意设置合适的递归深度
  • 在生产环境中使用递归下载时要格外小心
  • 注意版权和法律法规,不要下载非法内容
  • 大文件下载时注意磁盘空间
  • 使用代理时注意代理服务器的安全性
  • 下载敏感数据时注意保护认证信息
  • 不同版本的 wget 选项可能有所不同
  • 在自动化脚本中添加适当的错误处理