du 命令详解#

du(Disk Usage)是 Linux 系统中用于显示文件和目录磁盘使用情况的命令。它可以递归地显示目录及其子目录的磁盘使用量,帮助用户找出占用大量磁盘空间的文件和目录,是系统管理员进行磁盘空间管理的重要工具。

入门#

基本用法#

# 显示当前目录的磁盘使用情况
du

# 以人类可读格式显示
du -h

# 显示特定目录
du /home

# 显示总大小
du -s

常用选项#

选项说明
-h以人类可读格式显示(KB、MB、GB)
-s显示总计
-a显示所有文件
-c显示总计
-d限制显示深度
--max-depth限制显示深度

基本示例#

# 显示当前目录的磁盘使用情况
du

# 输出示例:
# 1024    ./dir1
# 2048    ./dir2
# 3072    .

# 以人类可读格式显示
du -h

# 输出示例:
# 1.0K    ./dir1
# 2.0K    ./dir2
# 3.0K    .

# 显示特定目录
du -h /home

# 输出示例:
# 1.0G    /home/user1
# 2.0G    /home/user2
# 3.0G    /home

中级#

目录大小分析#

# 显示目录总大小
du -sh /home

# 显示所有子目录大小
du -h --max-depth=1 /home

# 显示特定深度
du -h --max-depth=2 /home

# 显示所有文件和目录
du -ah /home

# 按大小排序
du -h --max-depth=1 /home | sort -hr

文件大小分析#

# 查找大文件
du -ah /home | sort -hr | head -10

# 查找特定大小的文件
du -ah /home | grep 'G'

# 查找大于 100MB 的文件
du -ah /home | grep '[0-9]\+M' | awk '$1 > 100'

# 查找特定类型的文件
du -ah /home | grep '\.log$'

# 查找特定目录中的文件
du -ah /home/user1 | sort -hr | head -10

高级筛选#

# 排除特定目录
du -h --exclude='*.log' /home

# 排除多个目录
du -h --exclude='*.log' --exclude='*.tmp' /home

# 只包含特定类型
du -h --include='*.txt' /home

# 显示 inode 使用情况
du -i /home

# 显示块数量
du -B 1M /home

高级#

高级选项#

# 显示实际使用空间
du --apparent-size /home

# 显示硬链接计数
du -l /home

# 显示符号链接
du -L /home

# 显示所有文件(包括隐藏文件)
du -ah /home

# 显示总计
du -ch /home

# 限制显示深度
du -h --max-depth=3 /home

磁盘使用分析脚本#

#!/bin/bash
# 磁盘使用分析脚本

DIRECTORY=$1
SIZE_THRESHOLD=${2:-100M}

# 分析目录使用情况
analyze_directory() {
    local directory=$1
    
    echo "=== Directory Analysis: $directory ==="
    echo ""
    
    # 总大小
    local total_size=$(du -sh $directory | awk '{print $1}')
    echo "Total size: $total_size"
    echo ""
    
    # 前 10 个最大的子目录
    echo "Top 10 largest subdirectories:"
    du -h --max-depth=1 $directory | sort -hr | head -11 | tail -10
    echo ""
    
    # 前 10 个最大的文件
    echo "Top 10 largest files:"
    find $directory -type f -exec du -h {} + | sort -hr | head -10
}

# 查找大文件
find_large_files() {
    local directory=$1
    local threshold=$2
    
    echo "=== Finding files larger than $threshold in $directory ==="
    
    find $directory -type f -size +$threshold -exec ls -lh {} \; | awk '{print $9, $5}'
}

# 查找特定类型的文件
find_files_by_type() {
    local directory=$1
    local extension=$2
    
    echo "=== Finding $extension files in $directory ==="
    
    find $directory -type f -name "*$extension" -exec du -h {} + | sort -hr | head -20
}

# 生成目录报告
generate_directory_report() {
    local directory=$1
    local report_file="directory_report_$(date +%Y%m%d_%H%M%S).txt"
    
    echo "Directory Report - $(date)" > $report_file
    echo "===================" >> $report_file
    echo "Directory: $directory" >> $report_file
    echo "" >> $report_file
    
    # 总大小
    echo "Total size:" >> $report_file
    du -sh $directory >> $report_file
    echo "" >> $report_file
    
    # 子目录大小
    echo "Subdirectory sizes:" >> $report_file
    du -h --max-depth=1 $directory | sort -hr >> $report_file
    echo "" >> $report_file
    
    # 文件统计
    echo "File statistics:" >> $report_file
    echo "Total files: $(find $directory -type f | wc -l)" >> $report_file
    echo "Total directories: $(find $directory -type d | wc -l)" >> $report_file
    echo "" >> $report_file
    
    # 最大的文件
    echo "Largest files:" >> $report_file
    find $directory -type f -exec du -h {} + | sort -hr | head -20 >> $report_file
    
    echo "Report saved to: $report_file"
}

# 主函数
main() {
    case "$1" in
        analyze)
            analyze_directory "$2"
            ;;
        large)
            find_large_files "$2" "$3"
            ;;
        type)
            find_files_by_type "$2" "$3"
            ;;
        report)
            generate_directory_report "$2"
            ;;
        *)
            echo "Usage: $0 {analyze|large|type|report}"
            exit 1
            ;;
    esac
}

main "$@"

磁盘清理工具#

#!/bin/bash
# 磁盘清理工具

DIRECTORY=$1

# 查找重复文件
find_duplicate_files() {
    local directory=$1
    
    echo "=== Finding duplicate files in $directory ==="
    
    find $directory -type f -exec md5sum {} + | sort | uniq -w 32 -d
}

# 查找空目录
find_empty_directories() {
    local directory=$1
    
    echo "=== Finding empty directories in $directory ==="
    
    find $directory -type d -empty
}

# 查找旧文件
find_old_files() {
    local directory=$1
    local days=$2
    
    echo "=== Finding files older than $days days in $directory ==="
    
    find $directory -type f -mtime +$days -exec ls -lh {} \;
}

# 清理日志文件
clean_log_files() {
    local directory=$1
    local days=${2:-30}
    
    echo "=== Cleaning log files older than $days days in $directory ==="
    
    local size_before=$(du -sh $directory | awk '{print $1}')
    
    find $directory -type f -name "*.log" -mtime +$days -delete
    
    local size_after=$(du -sh $directory | awk '{print $1}')
    
    echo "Size before: $size_before"
    echo "Size after: $size_after"
}

# 清理临时文件
clean_temp_files() {
    local directory=$1
    local days=${2:-7}
    
    echo "=== Cleaning temporary files older than $days days in $directory ==="
    
    local size_before=$(du -sh $directory | awk '{print $1}')
    
    find $directory -type f \( -name "*.tmp" -o -name "*.temp" \) -mtime +$days -delete
    
    local size_after=$(du -sh $directory | awk '{print $1}')
    
    echo "Size before: $size_before"
    echo "Size after: $size_after"
}

# 主函数
main() {
    case "$1" in
        duplicates)
            find_duplicate_files "$2"
            ;;
        empty)
            find_empty_directories "$2"
            ;;
        old)
            find_old_files "$2" "$3"
            ;;
        logs)
            clean_log_files "$2" "$3"
            ;;
        temp)
            clean_temp_files "$2" "$3"
            ;;
        *)
            echo "Usage: $0 {duplicates|empty|old|logs|temp}"
            exit 1
            ;;
    esac
}

main "$@"

大师#

企业级磁盘分析系统#

#!/bin/bash
# 企业级磁盘分析系统

CONFIG_FILE="/etc/disk_analyzer/config.conf"
LOG_DIR="/var/log/disk_analyzer"
ANALYSIS_DIR="/var/lib/disk_analyzer/analyses"

mkdir -p $LOG_DIR $ANALYSIS_DIR

# 加载配置
source $CONFIG_FILE

# 分析目录结构
analyze_directory_structure() {
    local directory=$1
    local output_dir="$ANALYSIS_DIR/structure_$(date +%Y%m%d_%H%M%S)"
    
    mkdir -p $output_dir
    
    echo "=== Directory Structure Analysis ===" > $output_dir/structure.txt
    echo "Directory: $directory" >> $output_dir/structure.txt
    echo "Analysis Time: $(date)" >> $output_dir/structure.txt
    echo "" >> $output_dir/structure.txt
    
    # 目录树
    echo "Directory Tree:" >> $output_dir/structure.txt
    tree -L 3 -h $directory >> $output_dir/structure.txt 2>/dev/null || find $directory -type d | head -100 >> $output_dir/structure.txt
    echo "" >> $output_dir/structure.txt
    
    # 目录大小分布
    echo "Directory Size Distribution:" >> $output_dir/structure.txt
    du -h --max-depth=1 $directory | sort -hr >> $output_dir/structure.txt
    echo "" >> $output_dir/structure.txt
    
    # 文件类型分布
    echo "File Type Distribution:" >> $output_dir/structure.txt
    find $directory -type f | sed 's/.*\.//' | sort | uniq -c | sort -rn | head -20 >> $output_dir/structure.txt
    
    echo "Analysis saved to: $output_dir/structure.txt"
}

# 分析文件分布
analyze_file_distribution() {
    local directory=$1
    local output_dir="$ANALYSIS_DIR/files_$(date +%Y%m%d_%H%M%S)"
    
    mkdir -p $output_dir
    
    echo "=== File Distribution Analysis ===" > $output_dir/files.txt
    echo "Directory: $directory" >> $output_dir/files.txt
    echo "Analysis Time: $(date)" >> $output_dir/files.txt
    echo "" >> $output_dir/files.txt
    
    # 文件大小分布
    echo "File Size Distribution:" >> $output_dir/files.txt
    find $directory -type f -exec du -h {} + | sort -hr | head -100 >> $output_dir/files.txt
    echo "" >> $output_dir/files.txt
    
    # 大文件
    echo "Large Files (>100MB):" >> $output_dir/files.txt
    find $directory -type f -size +100M -exec ls -lh {} \; | awk '{print $9, $5}' >> $output_dir/files.txt
    echo "" >> $output_dir/files.txt
    
    # 文件类型统计
    echo "File Type Statistics:" >> $output_dir/files.txt
    find $directory -type f | sed 's/.*\.//' | sort | uniq -c | sort -rn >> $output_dir/files.txt
    echo "" >> $output_dir/files.txt
    
    # 文件修改时间分布
    echo "File Modification Time Distribution:" >> $output_dir/files.txt
    echo "Files modified in last 7 days: $(find $directory -type f -mtime -7 | wc -l)" >> $output_dir/files.txt
    echo "Files modified in last 30 days: $(find $directory -type f -mtime -30 | wc -l)" >> $output_dir/files.txt
    echo "Files modified in last 90 days: $(find $directory -type f -mtime -90 | wc -l)" >> $output_dir/files.txt
    
    echo "Analysis saved to: $output_dir/files.txt"
}

# 分析磁盘增长
analyze_disk_growth() {
    local directory=$1
    local days=${2:-30}
    local output_file="$ANALYSIS_DIR/growth_${directory//\//_}_$(date +%Y%m%d).txt"
    
    echo "=== Disk Growth Analysis ===" > $output_file
    echo "Directory: $directory" >> $output_file
    echo "Analysis Period: $days days" >> $output_file
    echo "Analysis Time: $(date)" >> $output_file
    echo "" >> $output_file
    
    # 当前大小
    local current_size=$(du -sh $directory | awk '{print $1}')
    echo "Current size: $current_size" >> $output_file
    echo "" >> $output_file
    
    # 记录历史数据
    local history_file="$ANALYSIS_DIR/history_${directory//\//_}.txt"
    echo "$(date '+%Y-%m-%d %H:%M:%S'),$current_size" >> $history_file
    
    # 显示历史趋势
    if [ -f "$history_file" ]; then
        echo "Historical Data:" >> $output_file
        tail -10 $history_file >> $output_file
    fi
    
    echo "Analysis saved to: $output_file"
}

# 生成分析报告
generate_analysis_report() {
    local directory=$1
    local report_file="$ANALYSIS_DIR/analysis_report_$(date +%Y%m%d).txt"
    
    echo "Disk Analysis Report - $(date +%Y-%m-%d)" > $report_file
    echo "=======================" >> $report_file
    echo "Directory: $directory" >> $report_file
    echo "" >> $report_file
    
    # 目录结构
    echo "=== Directory Structure ===" >> $report_file
    du -h --max-depth=2 $directory | sort -hr >> $report_file
    echo "" >> $report_file
    
    # 文件分布
    echo "=== File Distribution ===" >> $report_file
    find $directory -type f -exec du -h {} + | sort -hr | head -50 >> $report_file
    echo "" >> $report_file
    
    # 文件类型
    echo "=== File Types ===" >> $report_file
    find $directory -type f | sed 's/.*\.//' | sort | uniq -c | sort -rn | head -20 >> $report_file
    echo "" >> $report_file
    
    # 统计信息
    echo "=== Statistics ===" >> $report_file
    echo "Total files: $(find $directory -type f | wc -l)" >> $report_file
    echo "Total directories: $(find $directory -type d | wc -l)" >> $report_file
    echo "Total size: $(du -sh $directory | awk '{print $1}')" >> $report_file
    
    echo "Report saved to: $report_file"
}

# 主函数
main() {
    case "$1" in
        structure)
            analyze_directory_structure "$2"
            ;;
        files)
            analyze_file_distribution "$2"
            ;;
        growth)
            analyze_disk_growth "$2" "$3"
            ;;
        report)
            generate_analysis_report "$2"
            ;;
        *)
            echo "Usage: $0 {structure|files|growth|report}"
            exit 1
            ;;
    esac
}

main "$@"

智能磁盘清理系统#

#!/bin/bash
# 智能磁盘清理系统

CONFIG_FILE="/etc/disk_cleaner/config.conf"
LOG_FILE="/var/log/disk_cleaner/cleaner.log"

mkdir -p $(dirname $LOG_FILE)

# 加载配置
source $CONFIG_FILE

# 智能清理
smart_clean() {
    local directory=$1
    local threshold=${2:-80}
    
    local usage=$(df -h $directory | awk 'NR==2 {print $5}' | cut -d'%' -f1)
    
    if [ $usage -gt $threshold ]; then
        echo "Disk usage high (${usage}%), starting smart clean..."
        log_message "Smart clean triggered for $directory: ${usage}%"
        
        # 清理日志文件
        clean_logs $directory
        
        # 清理临时文件
        clean_temp $directory
        
        # 清理缓存文件
        clean_cache $directory
        
        # 清理备份文件
        clean_backups $directory
        
        local new_usage=$(df -h $directory | awk 'NR==2 {print $5}' | cut -d'%' -f1)
        echo "Disk usage after clean: ${new_usage}%"
        log_message "Disk usage after clean: ${new_usage}%"
    else
        echo "Disk usage normal (${usage}%), no clean needed"
    fi
}

# 清理日志文件
clean_logs() {
    local directory=$1
    local days=${LOG_RETENTION_DAYS:-30}
    
    echo "Cleaning log files older than $days days..."
    
    local size_before=$(du -sh $directory | awk '{print $1}')
    
    find $directory -type f -name "*.log" -mtime +$days -delete 2>/dev/null
    find $directory -type f -name "*.log.*" -mtime +$days -delete 2>/dev/null
    
    local size_after=$(du -sh $directory | awk '{print $1}')
    
    echo "  Logs cleaned: $size_before -> $size_after"
    log_message "Logs cleaned in $directory: $size_before -> $size_after"
}

# 清理临时文件
clean_temp() {
    local directory=$1
    local days=${TEMP_RETENTION_DAYS:-7}
    
    echo "Cleaning temporary files older than $days days..."
    
    local size_before=$(du -sh $directory | awk '{print $1}')
    
    find $directory -type f \( -name "*.tmp" -o -name "*.temp" -o -name "*.swp" \) -mtime +$days -delete 2>/dev/null
    
    local size_after=$(du -sh $directory | awk '{print $1}')
    
    echo "  Temp files cleaned: $size_before -> $size_after"
    log_message "Temp files cleaned in $directory: $size_before -> $size_after"
}

# 清理缓存文件
clean_cache() {
    local directory=$1
    local days=${CACHE_RETENTION_DAYS:-7}
    
    echo "Cleaning cache files older than $days days..."
    
    local size_before=$(du -sh $directory | awk '{print $1}')
    
    find $directory -type f -name "*.cache" -mtime +$days -delete 2>/dev/null
    find $directory -type d -name "cache" -mtime +$days -exec rm -rf {} + 2>/dev/null
    
    local size_after=$(du -sh $directory | awk '{print $1}')
    
    echo "  Cache files cleaned: $size_before -> $size_after"
    log_message "Cache files cleaned in $directory: $size_before -> $size_after"
}

# 清理备份文件
clean_backups() {
    local directory=$1
    local days=${BACKUP_RETENTION_DAYS:-30}
    
    echo "Cleaning backup files older than $days days..."
    
    local size_before=$(du -sh $directory | awk '{print $1}')
    
    find $directory -type f \( -name "*.bak" -o -name "*.backup" -o -name "*~" \) -mtime +$days -delete 2>/dev/null
    
    local size_after=$(du -sh $directory | awk '{print $1}')
    
    echo "  Backup files cleaned: $size_before -> $size_after"
    log_message "Backup files cleaned in $directory: $size_before -> $size_after"
}

# 查找并清理大文件
clean_large_files() {
    local directory=$1
    local size=${2:-100M}
    
    echo "Finding and cleaning files larger than $size..."
    
    local large_files=$(find $directory -type f -size +$size)
    
    if [ -n "$large_files" ]; then
        echo "Found large files:"
        echo "$large_files" | while read file; do
            ls -lh "$file" | awk '{print $9, $5}'
        done
        
        read -p "Delete these files? (y/n) " -n 1 -r
        echo
        
        if [[ $REPLY =~ ^[Yy]$ ]]; then
            local size_before=$(du -sh $directory | awk '{print $1}')
            
            echo "$large_files" | while read file; do
                rm -f "$file"
                echo "  Deleted: $file"
            done
            
            local size_after=$(du -sh $directory | awk '{print $1}')
            
            echo "  Large files cleaned: $size_before -> $size_after"
            log_message "Large files cleaned in $directory: $size_before -> $size_after"
        fi
    else
        echo "No files larger than $size found"
    fi
}

# 查找并清理重复文件
clean_duplicate_files() {
    local directory=$1
    
    echo "Finding duplicate files in $directory..."
    
    local duplicates=$(find $directory -type f -exec md5sum {} + | sort | uniq -w 32 -d)
    
    if [ -n "$duplicates" ]; then
        echo "Found duplicate files:"
        echo "$duplicates"
        
        read -p "Delete duplicate files? (y/n) " -n 1 -r
        echo
        
        if [[ $REPLY =~ ^[Yy]$ ]]; then
            local size_before=$(du -sh $directory | awk '{print $1}')
            
            echo "$duplicates" | awk '{print $2}' | while read file; do
                rm -f "$file"
                echo "  Deleted: $file"
            done
            
            local size_after=$(du -sh $directory | awk '{print $1}')
            
            echo "  Duplicate files cleaned: $size_before -> $size_after"
            log_message "Duplicate files cleaned in $directory: $size_before -> $size_after"
        fi
    else
        echo "No duplicate files found"
    fi
}

# 记录日志
log_message() {
    local message=$1
    local timestamp=$(date "+%Y-%m-%d %H:%M:%S")
    echo "[$timestamp] $message" >> $LOG_FILE
}

# 主函数
main() {
    case "$1" in
        smart)
            smart_clean "$2" "$3"
            ;;
        logs)
            clean_logs "$2"
            ;;
        temp)
            clean_temp "$2"
            ;;
        cache)
            clean_cache "$2"
            ;;
        backups)
            clean_backups "$2"
            ;;
        large)
            clean_large_files "$2" "$3"
            ;;
        duplicates)
            clean_duplicate_files "$2"
            ;;
        *)
            echo "Usage: $0 {smart|logs|temp|cache|backups|large|duplicates}"
            exit 1
            ;;
    esac
}

main "$@"

无敌#

磁盘空间优化系统#

#!/bin/bash
# 磁盘空间优化系统

CONFIG_FILE="/etc/disk_optimizer/config.conf"
LOG_DIR="/var/log/disk_optimizer"
OPTIMIZATION_DIR="/var/lib/disk_optimizer"

mkdir -p $LOG_DIR $OPTIMIZATION_DIR

# 加载配置
source $CONFIG_FILE

# 分析磁盘使用模式
analyze_usage_pattern() {
    local directory=$1
    local output_file="$OPTIMIZATION_DIR/usage_pattern_${directory//\//_}_$(date +%Y%m%d).txt"
    
    echo "=== Usage Pattern Analysis ===" > $output_file
    echo "Directory: $directory" >> $output_file
    echo "Analysis Time: $(date)" >> $output_file
    echo "" >> $output_file
    
    # 文件大小分布
    echo "File Size Distribution:" >> $output_file
    find $directory -type f -exec du -h {} + | awk '{print $1}' | sort | uniq -c | sort -rn >> $output_file
    echo "" >> $output_file
    
    # 文件类型分布
    echo "File Type Distribution:" >> $output_file
    find $directory -type f | sed 's/.*\.//' | sort | uniq -c | sort -rn >> $output_file
    echo "" >> $output_file
    
    # 文件修改时间分布
    echo "File Modification Time Distribution:" >> $output_file
    echo "Files modified in last 7 days: $(find $directory -type f -mtime -7 | wc -l)" >> $output_file
    echo "Files modified in last 30 days: $(find $directory -type f -mtime -30 | wc -l)" >> $output_file
    echo "Files modified in last 90 days: $(find $directory -type f -mtime -90 | wc -l)" >> $output_file
    echo "Files older than 90 days: $(find $directory -type f -mtime +90 | wc -l)" >> $output_file
    echo "" >> $output_file
    
    # 目录深度分析
    echo "Directory Depth Analysis:" >> $output_file
    find $directory -type d | while read dir; do
        local depth=$(echo $dir | tr -cd '/' | wc -c)
        echo "$depth: $dir"
    done | sort -rn | head -20 >> $output_file
    
    echo "Analysis saved to: $output_file"
}

# 生成优化建议
generate_optimization_recommendations() {
    local directory=$1
    local output_file="$OPTIMIZATION_DIR/optimization_recommendations_${directory//\//_}_$(date +%Y%m%d).txt"
    
    echo "=== Optimization Recommendations ===" > $output_file
    echo "Directory: $directory" >> $output_file
    echo "Analysis Time: $(date)" >> $output_file
    echo "" >> $output_file
    
    # 检查大文件
    local large_files=$(find $directory -type f -size +100M | wc -l)
    if [ $large_files -gt 0 ]; then
        echo "⚠️  Found $large_files large files (>100MB)" >> $output_file
        echo "   Consider archiving or compressing these files" >> $output_file
        echo "" >> $output_file
    fi
    
    # 检查旧文件
    local old_files=$(find $directory -type f -mtime +365 | wc -l)
    if [ $old_files -gt 0 ]; then
        echo "⚠️  Found $old_files old files (>1 year)" >> $output_file
        echo "   Consider archiving or deleting these files" >> $output_file
        echo "" >> $output_file
    fi
    
    # 检查重复文件
    local duplicate_files=$(find $directory -type f -exec md5sum {} + | sort | uniq -w 32 -d | wc -l)
    if [ $duplicate_files -gt 0 ]; then
        echo "⚠️  Found $duplicate_files duplicate files" >> $output_file
        echo "   Consider removing duplicate files" >> $output_file
        echo "" >> $output_file
    fi
    
    # 检查临时文件
    local temp_files=$(find $directory -type f \( -name "*.tmp" -o -name "*.temp" \) | wc -l)
    if [ $temp_files -gt 0 ]; then
        echo "⚠️  Found $temp_files temporary files" >> $output_file
        echo "   Consider cleaning temporary files regularly" >> $output_file
        echo "" >> $output_file
    fi
    
    # 检查日志文件
    local log_files=$(find $directory -type f -name "*.log" | wc -l)
    if [ $log_files -gt 0 ]; then
        echo "⚠️  Found $log_files log files" >> $output_file
        echo "   Consider implementing log rotation" >> $output_file
        echo "" >> $output_file
    fi
    
    # 检查备份文件
    local backup_files=$(find $directory -type f \( -name "*.bak" -o -name "*.backup" \) | wc -l)
    if [ $backup_files -gt 0 ]; then
        echo "⚠️  Found $backup_files backup files" >> $output_file
        echo "   Consider implementing backup retention policy" >> $output_file
        echo "" >> $output_file
    fi
    
    # 优化建议
    echo "=== Optimization Suggestions ===" >> $output_file
    echo "1. Implement regular cleanup schedules" >> $output_file
    echo "2. Use compression for large files" >> $output_file
    echo "3. Implement data archiving policies" >> $output_file
    echo "4. Use deduplication for duplicate files" >> $output_file
    echo "5. Implement log rotation" >> $output_file
    echo "6. Use disk quotas to prevent excessive usage" >> $output_file
    
    echo "Recommendations saved to: $output_file"
}

# 执行优化
perform_optimization() {
    local directory=$1
    
    echo "=== Performing Optimization for $directory ==="
    
    # 压缩大文件
    echo "Compressing large files..."
    find $directory -type f -size +100M ! -name "*.gz" ! -name "*.zip" -exec gzip {} \;
    
    # 清理临时文件
    echo "Cleaning temporary files..."
    find $directory -type f \( -name "*.tmp" -o -name "*.temp" \) -mtime +7 -delete
    
    # 清理旧文件
    echo "Cleaning old files..."
    find $directory -type f -mtime +365 -delete
    
    # 清理重复文件
    echo "Cleaning duplicate files..."
    find $directory -type f -exec md5sum {} + | sort | uniq -w 32 -d | awk '{print $2}' | xargs rm -f
    
    # 显示优化结果
    local size_before=$(cat $OPTIMIZATION_DIR/size_before_${directory//\//_}.txt 2>/dev/null || echo "N/A")
    local size_after=$(du -sh $directory | awk '{print $1}')
    
    echo "Optimization completed"
    echo "Size before: $size_before"
    echo "Size after: $size_after"
}

# 生成优化报告
generate_optimization_report() {
    local directory=$1
    local report_file="$OPTIMIZATION_DIR/optimization_report_$(date +%Y%m%d).txt"
    
    echo "Disk Optimization Report - $(date +%Y-%m-%d)" > $report_file
    echo "=========================" >> $report_file
    echo "Directory: $directory" >> $report_file
    echo "" >> $report_file
    
    # 当前使用情况
    echo "=== Current Usage ===" >> $report_file
    du -sh $directory >> $report_file
    echo "" >> $report_file
    
    # 使用模式分析
    echo "=== Usage Pattern ===" >> $report_file
    analyze_usage_pattern $directory >> $report_file
    echo "" >> $report_file
    
    # 优化建议
    echo "=== Optimization Recommendations ===" >> $report_file
    generate_optimization_recommendations $directory >> $report_file
    
    echo "Report saved to: $report_file"
}

# 主函数
main() {
    case "$1" in
        analyze)
            analyze_usage_pattern "$2"
            ;;
        recommend)
            generate_optimization_recommendations "$2"
            ;;
        optimize)
            perform_optimization "$2"
            ;;
        report)
            generate_optimization_report "$2"
            ;;
        *)
            echo "Usage: $0 {analyze|recommend|optimize|report}"
            exit 1
            ;;
    esac
}

main "$@"

磁盘空间预测系统#

#!/bin/bash
# 磁盘空间预测系统

METRICS_DB="/var/lib/disk_predictor/metrics.db"
PREDICTION_DIR="/var/lib/disk_predictor/predictions"

mkdir -p $(dirname $METRICS_DB) $PREDICTION_DIR

# 收集磁盘使用数据
collect_disk_data() {
    local directory=$1
    
    local timestamp=$(date +%s)
    local total_size=$(du -s $directory | awk '{print $1}')
    local file_count=$(find $directory -type f | wc -l)
    local dir_count=$(find $directory -type d | wc -l)
    
    local data_file="$METRICS_DB/${directory//\//_}.csv"
    
    echo "$timestamp,$total_size,$file_count,$dir_count" >> $data_file
}

# 预测磁盘使用
predict_disk_usage() {
    local directory=$1
    local days=${2:-30}
    
    local data_file="$METRICS_DB/${directory//\//_}.csv"
    
    if [ ! -f "$data_file" ]; then
        echo "No data available for $directory"
        return 1
    fi
    
    echo "=== Disk Usage Prediction for $directory ==="
    echo "Prediction period: $days days"
    echo ""
    
    # 计算平均增长率
    local current_size=$(tail -1 $data_file | cut -d, -f2)
    local start_size=$(head -1 $data_file | cut -d, -f2)
    local data_points=$(wc -l < $data_file)
    
    local growth_rate=$(echo "scale=2; ($current_size - $start_size) / $data_points" | bc)
    
    echo "Current size: $current_size KB"
    echo "Average growth rate: $growth_rate KB per data point"
    echo ""
    
    # 预测未来使用情况
    echo "Future predictions:"
    for ((i=1; i<=$days; i++)); do
        local predicted_size=$(echo "scale=2; $current_size + ($growth_rate * $i)" | bc)
        local predicted_size_mb=$(echo "scale=2; $predicted_size / 1024" | bc)
        local future_date=$(date -d "+$i days" '+%Y-%m-%d')
        
        echo "  $future_date: ${predicted_size_mb} MB"
    done
    
    # 保存预测结果
    local prediction_file="$PREDICTION_DIR/${directory//\//_}_$(date +%Y%m%d).txt"
    {
        echo "Disk Usage Prediction - $(date)"
        echo "Directory: $directory"
        echo "Prediction period: $days days"
        echo ""
        echo "Current size: $current_size KB"
        echo "Average growth rate: $growth_rate KB per data point"
        echo ""
        echo "Future predictions:"
        for ((i=1; i<=$days; i++)); do
            local predicted_size=$(echo "scale=2; $current_size + ($growth_rate * $i)" | bc)
            local predicted_size_mb=$(echo "scale=2; $predicted_size / 1024" | bc)
            local future_date=$(date -d "+$i days" '+%Y-%m-%d')
            
            echo "  $future_date: ${predicted_size_mb} MB"
        done
    } > $prediction_file
    
    echo ""
    echo "Prediction saved to: $prediction_file"
}

# 分析磁盘使用趋势
analyze_disk_trend() {
    local directory=$1
    local days=${2:-7}
    
    local data_file="$METRICS_DB/${directory//\//_}.csv"
    
    if [ ! -f "$data_file" ]; then
        echo "No data available for $directory"
        return 1
    fi
    
    echo "=== Disk Usage Trend for $directory ==="
    echo "Last $days days"
    echo ""
    
    local end_time=$(date +%s)
    local start_time=$((end_time - (days * 86400)))
    
    tail -100 $data_file | awk -F, -v start=$start_time -v end=$end_time '$1 >= start && $1 <= end' | while read line; do
        local timestamp=$(echo $line | cut -d, -f1)
        local size=$(echo $line | cut -d, -f2)
        local size_mb=$(echo "scale=2; $size / 1024" | bc)
        local formatted_time=$(date -d @$timestamp '+%Y-%m-%d %H:%M:%S')
        
        echo "$formatted_time - Size: ${size_mb} MB"
    done
}

# 生成预测报告
generate_prediction_report() {
    local report_file="$PREDICTION_DIR/prediction_report_$(date +%Y%m%d).txt"
    
    echo "Disk Usage Prediction Report - $(date +%Y-%m-%d)" > $report_file
    echo "=============================" >> $report_file
    echo "" >> $report_file
    
    # 当前磁盘使用情况
    echo "=== Current Disk Usage ===" >> $report_file
    du -sh * 2>/dev/null | sort -hr >> $report_file
    echo "" >> $report_file
    
    # 预测结果
    echo "=== Predictions ===" >> $report_file
    for directory in $(ls -d */ 2>/dev/null | cut -d'/' -f1); do
        predict_disk_usage $directory 30 >> $report_file
        echo "" >> $report_file
    done
    
    echo "Prediction report saved to: $report_file"
}

# 主函数
main() {
    case "$1" in
        collect)
            collect_disk_data "$2"
            ;;
        predict)
            predict_disk_usage "$2" "$3"
            ;;
        trend)
            analyze_disk_trend "$2" "$3"
            ;;
        report)
            generate_prediction_report
            ;;
        *)
            echo "Usage: $0 {collect|predict|trend|report}"
            exit 1
            ;;
    esac
}

main "$@"

最佳实践#

  1. 使用人类可读格式:使用 -h 选项以人类可读格式显示
  2. 限制显示深度:使用 –max-depth 选项限制显示深度
  3. 按大小排序:结合 sort -hr 按大小排序
  4. 定期分析磁盘使用:定期使用 du 分析磁盘使用情况
  5. 查找大文件:使用 find 和 du 组合查找大文件
  6. 清理不必要的文件:定期清理日志、临时文件等
  7. 监控磁盘增长:监控磁盘使用趋势,预测未来需求
  8. 使用排除选项:使用 –exclude 排除不需要的文件

注意事项#

  • du 显示的是磁盘使用量,不是文件大小
  • 硬链接的文件只计算一次
  • 符号链接默认不计算,使用 -L 选项计算
  • 不同文件系统的 du 结果可能不同
  • du 需要遍历整个目录树,可能需要较长时间
  • 在大型目录上运行 du 可能会消耗较多资源
  • 注意权限问题,某些文件可能无法访问
  • 在生产环境中删除文件时要格外小心
  • 对于关键业务,建议使用专业的磁盘管理工具