top 命令详解#
top 是 Linux 系统中用于实时显示系统进程和资源使用情况的命令行工具。它提供了动态的、实时的系统监控视图,可以查看 CPU、内存、进程等系统资源的使用情况,是系统管理员和开发人员进行性能分析和故障排查的重要工具。
入门#
基本用法#
# 启动 top
top
# 指定更新间隔(秒)
top -d 5
# 指定显示的进程数
top -n 10
# 指定用户
top -u username
# 指定 PID
top -p 1234常用选项#
| 选项 | 说明 |
|---|---|
-d | 指定更新间隔(秒) |
-n | 指定显示的进程数 |
-u | 指定用户 |
-p | 指定 PID |
-b | 批处理模式 |
-H | 显示线程模式 |
-c | 显示完整命令行 |
-s | 安全模式 |
基本示例#
# 启动 top
top
# 输出示例:
# top - 10:00:00 up 1 day, 2:30, 2 users, load average: 0.15, 0.10, 0.05
# Tasks: 123 total, 1 running, 122 sleeping, 0 stopped, 0 zombie
# %Cpu(s): 5.0 us, 2.0 sy, 0.0 ni, 92.0 id, 1.0 wa, 0.0 hi, 0.0 si, 0.0 st
# MiB Mem: 8192.0 total, 4096.0 free, 2048.0 used, 2048.0 buff/cache
# MiB Swap: 2048.0 total, 2048.0 free, 0.0 used. 6144.0 avail Mem
#
# PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
# 1234 root 20 0 123456 56789 1234 S 2.0 0.7 0:01.23 nginx
# 5678 mysql 20 0 234567 123456 5678 S 1.5 1.5 0:05.67 mysqld
# 9012 user 20 0 34567 23456 7890 R 0.5 0.3 0:00.89 python中级#
交互式命令#
# 在 top 运行时使用以下快捷键:
# 排序
P - 按 CPU 使用率排序
M - 按内存使用排序
N - 按 PID 排序
T - 按运行时间排序
# 显示/隐藏
1 - 显示所有 CPU 核心
H - 显示线程
k - 杀死进程
r - 修改进程优先级
q - 退出 top
# 切换
z - 切换彩色/单色显示
l - 切换负载平均值显示
t - 切换任务和 CPU 状态显示
m - 切换内存信息显示进程筛选#
# 显示特定用户的进程
top -u username
# 显示特定用户的进程(交互式)
top
u username
# 显示特定 PID
top -p 1234
# 显示多个 PID
top -p 1234,5678,9012
# 显示特定进程名的进程
top -p $(pgrep nginx | tr '\n' ',' | sed 's/,$//')输出控制#
# 批处理模式(输出到文件)
top -b -n 1 > top_output.txt
# 指定更新次数
top -n 10
# 指定更新间隔
top -d 2
# 组合使用
top -b -d 5 -n 3 > top_batch.txt
# 显示完整命令行
top -c
# 不显示空闲进程
top -i高级#
高级选项#
# 显示线程模式
top -H
# 显示特定用户的线程
top -H -u username
# 显示完整命令行
top -c
# 安全模式(禁用某些交互命令)
top -s
# 指定配置文件
top -c -b -n 1 > output.txt
# 显示累积模式
top -S
# 显示特定字段
top -o %CPU
top -o %MEM
top -o TIME+自定义显示#
# 自定义显示字段
top -o PID,USER,%CPU,%MEM,TIME+,COMMAND
# 按多个字段排序
top -o %CPU,%MEM
# 显示特定数量的进程
top -n 20
# 显示特定列
top -o PID,USER,PR,NI,VIRT,RES,SHR,S,%CPU,%MEM,TIME+,COMMAND
# 保存配置
# 在 top 中按 W 保存当前配置性能监控#
#!/bin/bash
# top 性能监控脚本
DURATION=60
INTERVAL=5
OUTPUT_FILE="top_monitor_$(date +%Y%m%d_%H%M%S).txt"
# 监控系统性能
monitor_performance() {
echo "System Performance Monitor - $(date)" > $OUTPUT_FILE
echo "=================================" >> $OUTPUT_FILE
echo "" >> $OUTPUT_FILE
local iterations=$((DURATION / INTERVAL))
for ((i=1; i<=iterations; i++)); do
echo "=== Snapshot $i - $(date '+%Y-%m-%d %H:%M:%S') ===" >> $OUTPUT_FILE
top -b -n 1 | head -20 >> $OUTPUT_FILE
echo "" >> $OUTPUT_FILE
sleep $INTERVAL
done
echo "Monitoring completed. Results saved to: $OUTPUT_FILE"
}
# 分析性能数据
analyze_performance() {
local input_file=$1
echo "=== Performance Analysis ==="
echo ""
# CPU 使用率
echo "CPU Usage:"
grep "Cpu(s)" $input_file | awk '{sum+=$2; count++} END {print "Average:", sum/count, "%"}'
echo ""
# 内存使用
echo "Memory Usage:"
grep "MiB Mem" $input_file | awk '{sum+=$3; count++} END {print "Average:", sum/count, "MiB"}'
echo ""
# 负载平均值
echo "Load Average:"
grep "load average" $input_file | awk '{print $NF}'
}
# 主函数
main() {
case "$1" in
monitor)
monitor_performance
;;
analyze)
analyze_performance "$2"
;;
*)
echo "Usage: $0 {monitor|analyze}"
exit 1
;;
esac
}
main "$@"大师#
进程资源分析#
#!/bin/bash
# 进程资源分析脚本
PROCESS_NAME=$1
DURATION=60
# 分析进程资源使用
analyze_process() {
local process_name=$1
local duration=$2
local output_file="process_${process_name}_$(date +%Y%m%d_%H%M%S).txt"
echo "Process Analysis: $process_name" > $output_file
echo "Analysis Time: $(date)" >> $output_file
echo "" >> $output_file
local iterations=$((duration / 5))
for ((i=1; i<=iterations; i++)); do
echo "=== Snapshot $i - $(date '+%Y-%m-%d %H:%M:%S') ===" >> $output_file
top -b -n 1 | grep "$process_name" >> $output_file
echo "" >> $output_file
sleep 5
done
echo "Analysis completed: $output_file"
}
# 查找高资源使用进程
find_high_usage_processes() {
local threshold=$1
local metric=$2
case "$metric" in
cpu)
top -b -n 1 | awk -v threshold=$threshold 'NR>7 && $9 > threshold {print $1, $12, $9"%"}'
;;
mem)
top -b -n 1 | awk -v threshold=$threshold 'NR>7 && $10 > threshold {print $1, $12, $10"%"}'
;;
*)
echo "Invalid metric: $metric"
return 1
;;
esac
}
# 生成进程报告
generate_process_report() {
local report_file="process_report_$(date +%Y%m%d).txt"
echo "Process Report - $(date +%Y-%m-%d)" > $report_file
echo "====================" >> $report_file
echo "" >> $report_file
echo "Top 10 CPU consuming processes:" >> $report_file
top -b -n 1 | head -17 | tail -10 >> $report_file
echo "" >> $report_file
echo "Top 10 Memory consuming processes:" >> $report_file
top -b -n 1 -o %MEM | head -17 | tail -10 >> $report_file
echo "" >> $report_file
echo "Top 10 Longest running processes:" >> $report_file
top -b -n 1 -o TIME+ | head -17 | tail -10 >> $report_file
echo "Report saved to: $report_file"
}
# 主函数
main() {
case "$1" in
analyze)
analyze_process "$2" "$3"
;;
find)
find_high_usage_processes "$2" "$3"
;;
report)
generate_process_report
;;
*)
echo "Usage: $0 {analyze|find|report}"
exit 1
;;
esac
}
main "$@"系统健康监控#
#!/bin/bash
# 系统健康监控脚本
ALERT_CPU=80
ALERT_MEM=80
ALERT_LOAD=2.0
LOG_FILE="system_health_monitor.log"
ALERT_EMAIL="admin@example.com"
# 检查 CPU 使用率
check_cpu_usage() {
local cpu_usage=$(top -b -n 1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
echo "CPU Usage: ${cpu_usage}%"
if (( $(echo "$cpu_usage > $ALERT_CPU" | bc -l) )); then
echo "ALERT: High CPU usage: ${cpu_usage}%" >> $LOG_FILE
echo "High CPU usage: ${cpu_usage}%" | mail -s "System Alert" $ALERT_EMAIL
return 1
else
return 0
fi
}
# 检查内存使用率
check_memory_usage() {
local mem_usage=$(top -b -n 1 | grep "MiB Mem" | awk '{print $8}' | cut -d'%' -f1)
echo "Memory Usage: ${mem_usage}%"
if (( $(echo "$mem_usage > $ALERT_MEM" | bc -l) )); then
echo "ALERT: High memory usage: ${mem_usage}%" >> $LOG_FILE
echo "High memory usage: ${mem_usage}%" | mail -s "System Alert" $ALERT_EMAIL
return 1
else
return 0
fi
}
# 检查系统负载
check_system_load() {
local load_avg=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | cut -d',' -f1)
echo "Load Average: $load_avg"
if (( $(echo "$load_avg > $ALERT_LOAD" | bc -l) )); then
echo "ALERT: High system load: $load_avg" >> $LOG_FILE
echo "High system load: $load_avg" | mail -s "System Alert" $ALERT_EMAIL
return 1
else
return 0
fi
}
# 检查僵尸进程
check_zombie_processes() {
local zombie_count=$(top -b -n 1 | grep "zombie" | awk '{print $8}')
echo "Zombie Processes: $zombie_count"
if [ $zombie_count -gt 0 ]; then
echo "ALERT: $zombie_count zombie processes detected" >> $LOG_FILE
echo "$zombie_count zombie processes detected" | mail -s "System Alert" $ALERT_EMAIL
return 1
else
return 0
fi
}
# 主监控循环
main_monitor() {
echo "System Health Monitor started" >> $LOG_FILE
while true; do
local timestamp=$(date "+%Y-%m-%d %H:%M:%S")
echo "=== $timestamp ===" >> $LOG_FILE
check_cpu_usage
check_memory_usage
check_system_load
check_zombie_processes
echo "" >> $LOG_FILE
sleep 300
done
}
# 单次检查
single_check() {
local timestamp=$(date "+%Y-%m-%d %H:%M:%S")
echo "=== System Health Check - $timestamp ==="
check_cpu_usage
check_memory_usage
check_system_load
check_zombie_processes
}
# 主函数
main() {
case "$1" in
monitor)
main_monitor
;;
check)
single_check
;;
*)
echo "Usage: $0 {monitor|check}"
exit 1
;;
esac
}
main "$@"批量进程管理#
#!/bin/bash
# 批量进程管理脚本
# 查找并显示进程
find_processes() {
local pattern=$1
echo "Searching for processes matching: $pattern"
top -b -n 1 | grep "$pattern"
}
# 查找高 CPU 使用进程
find_high_cpu_processes() {
local threshold=$1
echo "Finding processes with CPU usage > ${threshold}%"
top -b -n 1 | awk -v threshold=$threshold 'NR>7 && $9 > threshold {print $0}'
}
# 查找高内存使用进程
find_high_memory_processes() {
local threshold=$1
echo "Finding processes with memory usage > ${threshold}%"
top -b -n 1 | awk -v threshold=$threshold 'NR>7 && $10 > threshold {print $0}'
}
# 查找僵尸进程
find_zombie_processes() {
echo "Finding zombie processes"
top -b -n 1 | grep "Z"
}
# 查找特定用户的进程
find_user_processes() {
local username=$1
echo "Finding processes for user: $username"
top -b -n 1 | grep "$username"
}
# 生成进程统计
generate_statistics() {
echo "=== Process Statistics ==="
echo ""
# 总进程数
local total_processes=$(top -b -n 1 | grep "Tasks:" | awk '{print $2}')
echo "Total processes: $total_processes"
# 运行中的进程
local running_processes=$(top -b -n 1 | grep "Tasks:" | awk '{print $4}')
echo "Running processes: $running_processes"
# 睡眠进程
local sleeping_processes=$(top -b -n 1 | grep "Tasks:" | awk '{print $6}')
echo "Sleeping processes: $sleeping_processes"
# 僵尸进程
local zombie_processes=$(top -b -n 1 | grep "Tasks:" | awk '{print $10}')
echo "Zombie processes: $zombie_processes"
# CPU 使用率
local cpu_usage=$(top -b -n 1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
echo "CPU usage: ${cpu_usage}%"
# 内存使用率
local mem_usage=$(top -b -n 1 | grep "MiB Mem" | awk '{print $8}' | cut -d'%' -f1)
echo "Memory usage: ${mem_usage}%"
}
# 主函数
main() {
case "$1" in
find)
find_processes "$2"
;;
high-cpu)
find_high_cpu_processes "$2"
;;
high-mem)
find_high_memory_processes "$2"
;;
zombie)
find_zombie_processes
;;
user)
find_user_processes "$2"
;;
stats)
generate_statistics
;;
*)
echo "Usage: $0 {find|high-cpu|high-mem|zombie|user|stats}"
exit 1
;;
esac
}
main "$@"无敌#
企业级性能监控系统#
#!/bin/bash
# 企业级性能监控系统
CONFIG_FILE="/etc/performance_monitor/config.conf"
LOG_DIR="/var/log/performance_monitor"
METRICS_DB="/var/lib/performance_monitor/metrics.db"
mkdir -p $LOG_DIR $(dirname $METRICS_DB)
# 加载配置
source $CONFIG_FILE
# 收集系统指标
collect_system_metrics() {
local timestamp=$(date +%s)
# CPU 使用率
local cpu_usage=$(top -b -n 1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
# 内存使用率
local mem_usage=$(top -b -n 1 | grep "MiB Mem" | awk '{print $8}' | cut -d'%' -f1)
# 系统负载
local load_avg=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | cut -d',' -f1)
# 进程数
local total_processes=$(top -b -n 1 | grep "Tasks:" | awk '{print $2}')
local running_processes=$(top -b -n 1 | grep "Tasks:" | awk '{print $4}')
# 保存到数据库
echo "$timestamp,cpu,$cpu_usage" >> $METRICS_DB
echo "$timestamp,memory,$mem_usage" >> $METRICS_DB
echo "$timestamp,load,$load_avg" >> $METRICS_DB
echo "$timestamp,processes,$total_processes" >> $METRICS_DB
echo "$timestamp,running,$running_processes" >> $METRICS_DB
}
# 收集进程指标
collect_process_metrics() {
local timestamp=$(date +%s)
# 获取前 10 个进程的指标
top -b -n 1 | head -17 | tail -10 | while read line; do
local pid=$(echo $line | awk '{print $1}')
local user=$(echo $line | awk '{print $2}')
local cpu=$(echo $line | awk '{print $9}')
local mem=$(echo $line | awk '{print $10}')
local command=$(echo $line | awk '{print $12}')
echo "$timestamp,pid_$pid,$cpu,$mem,$command" >> $METRICS_DB
done
}
# 分析指标趋势
analyze_metric_trend() {
local metric=$1
local duration=$2
local end_time=$(date +%s)
local start_time=$((end_time - duration))
echo "=== $metric Trend Analysis (last $duration seconds) ==="
grep ",$metric," $METRICS_DB | awk -F, -v start=$start_time -v end=$end_time '$1 >= start && $1 <= end' | while read line; do
local timestamp=$(echo $line | cut -d, -f1)
local value=$(echo $line | cut -d, -f3)
local formatted_time=$(date -d @$timestamp '+%Y-%m-%d %H:%M:%S')
echo "$formatted_time: $value"
done
}
# 生成性能报告
generate_performance_report() {
local report_file="$LOG_DIR/performance_report_$(date +%Y%m%d).txt"
echo "Performance Report - $(date +%Y-%m-%d)" > $report_file
echo "=========================" >> $report_file
echo "" >> $report_file
# CPU 趋势
echo "CPU Usage Trend:" >> $report_file
analyze_metric_trend "cpu" 86400 >> $report_file
echo "" >> $report_file
# 内存趋势
echo "Memory Usage Trend:" >> $report_file
analyze_metric_trend "memory" 86400 >> $report_file
echo "" >> $report_file
# 负载趋势
echo "Load Average Trend:" >> $report_file
analyze_metric_trend "load" 86400 >> $report_file
echo "" >> $report_file
echo "Report saved to: $report_file"
}
# 主监控循环
main_monitor() {
while true; do
collect_system_metrics
collect_process_metrics
# 每天生成一次报告
if [ $(date +%H) -eq 0 ] && [ $(date +%M) -eq 0 ]; then
generate_performance_report
fi
sleep $COLLECT_INTERVAL
done
}
# 主函数
main() {
case "$1" in
monitor)
main_monitor
;;
analyze)
analyze_metric_trend "$2" "$3"
;;
report)
generate_performance_report
;;
*)
echo "Usage: $0 {monitor|analyze|report}"
exit 1
;;
esac
}
main "$@"智能告警系统#
#!/bin/bash
# 智能告警系统
CONFIG_FILE="/etc/smart_alert/config.conf"
LOG_FILE="/var/log/smart_alert/alerts.log"
ALERT_HISTORY="/var/lib/smart_alert/history.db"
mkdir -p $(dirname $LOG_FILE) $(dirname $ALERT_HISTORY)
# 加载配置
source $CONFIG_FILE
# 检查 CPU 使用率
check_cpu_alert() {
local cpu_usage=$(top -b -n 1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
local threshold=$CPU_ALERT_THRESHOLD
if (( $(echo "$cpu_usage > $threshold" | bc -l) )); then
local timestamp=$(date +%s)
local alert_id=$(date +%s%N)
echo "$timestamp,$alert_id,cpu,${cpu_usage}%,High CPU usage detected" >> $ALERT_HISTORY
log_alert "CPU ALERT: ${cpu_usage}% usage detected"
send_alert "High CPU usage: ${cpu_usage}%"
return 1
else
return 0
fi
}
# 检查内存使用率
check_memory_alert() {
local mem_usage=$(top -b -n 1 | grep "MiB Mem" | awk '{print $8}' | cut -d'%' -f1)
local threshold=$MEMORY_ALERT_THRESHOLD
if (( $(echo "$mem_usage > $threshold" | bc -l) )); then
local timestamp=$(date +%s)
local alert_id=$(date +%s%N)
echo "$timestamp,$alert_id,memory,${mem_usage}%,High memory usage detected" >> $ALERT_HISTORY
log_alert "MEMORY ALERT: ${mem_usage}% usage detected"
send_alert "High memory usage: ${mem_usage}%"
return 1
else
return 0
fi
}
# 检查系统负载
check_load_alert() {
local load_avg=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | cut -d',' -f1)
local threshold=$LOAD_ALERT_THRESHOLD
if (( $(echo "$load_avg > $threshold" | bc -l) )); then
local timestamp=$(date +%s)
local alert_id=$(date +%s%N)
echo "$timestamp,$alert_id,load,${load_avg},High system load detected" >> $ALERT_HISTORY
log_alert "LOAD ALERT: ${load_avg} load detected"
send_alert "High system load: $load_avg"
return 1
else
return 0
fi
}
# 检查僵尸进程
check_zombie_alert() {
local zombie_count=$(top -b -n 1 | grep "Tasks:" | awk '{print $10}')
local threshold=$ZOMBIE_ALERT_THRESHOLD
if [ $zombie_count -gt $threshold ]; then
local timestamp=$(date +%s)
local alert_id=$(date +%s%N)
echo "$timestamp,$alert_id,zombie,${zombie_count},Zombie processes detected" >> $ALERT_HISTORY
log_alert "ZOMBIE ALERT: $zombie_count zombie processes detected"
send_alert "$zombie_count zombie processes detected"
return 1
else
return 0
fi
}
# 检查告警频率
check_alert_frequency() {
local metric=$1
local time_window=$2
local max_alerts=$3
local current_time=$(date +%s)
local start_time=$((current_time - time_window))
local alert_count=$(grep ",$metric," $ALERT_HISTORY | awk -F, -v start=$start_time -v end=$current_time '$1 >= start && $1 <= end' | wc -l)
if [ $alert_count -gt $max_alerts ]; then
log_alert "ALERT FREQUENCY: Too many $metric alerts in last $time_window seconds: $alert_count"
return 1
else
return 0
fi
}
# 记录告警
log_alert() {
local message=$1
local timestamp=$(date "+%Y-%m-%d %H:%M:%S")
echo "[$timestamp] $message" >> $LOG_FILE
}
# 发送告警
send_alert() {
local message=$1
if [ "$ENABLE_EMAIL_ALERTS" = "true" ]; then
echo "$message" | mail -s "System Alert" $ALERT_EMAIL
fi
if [ "$ENABLE_SLACK_ALERTS" = "true" ]; then
curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"$message\"}" $SLACK_WEBHOOK
fi
}
# 主监控循环
main_monitor() {
log_alert "Smart Alert System started"
while true; do
# 检查各种告警条件
check_cpu_alert
check_memory_alert
check_load_alert
check_zombie_alert
# 检查告警频率
check_alert_frequency "cpu" 3600 10
check_alert_frequency "memory" 3600 10
check_alert_frequency "load" 3600 10
sleep $CHECK_INTERVAL
done
}
# 生成告警报告
generate_alert_report() {
local report_file="$LOG_DIR/alert_report_$(date +%Y%m%d).txt"
echo "Alert Report - $(date +%Y-%m-%d)" > $report_file
echo "===============" >> $report_file
echo "" >> $report_file
# 统计告警数量
echo "Alert Statistics:" >> $report_file
echo "Total alerts: $(wc -l < $ALERT_HISTORY)" >> $report_file
echo "CPU alerts: $(grep ",cpu," $ALERT_HISTORY | wc -l)" >> $report_file
echo "Memory alerts: $(grep ",memory," $ALERT_HISTORY | wc -l)" >> $report_file
echo "Load alerts: $(grep ",load," $ALERT_HISTORY | wc -l)" >> $report_file
echo "Zombie alerts: $(grep ",zombie," $ALERT_HISTORY | wc -l)" >> $report_file
echo "" >> $report_file
# 最近的告警
echo "Recent Alerts:" >> $report_file
tail -20 $ALERT_HISTORY >> $report_file
log_alert "Alert report generated: $report_file"
}
# 主函数
main() {
case "$1" in
monitor)
main_monitor
;;
report)
generate_alert_report
;;
*)
echo "Usage: $0 {monitor|report}"
exit 1
;;
esac
}
main "$@"最佳实践#
- 合理设置更新间隔:根据监控需求设置合适的更新间隔
- 使用批处理模式:在脚本中使用
-b选项进行批处理 - 关注关键指标:重点监控 CPU、内存、负载等关键指标
- 定期检查僵尸进程:及时发现和处理僵尸进程
- 设置告警阈值:根据业务需求设置合理的告警阈值
- 记录监控数据:定期记录监控数据,便于分析趋势
- 分析性能趋势:定期分析性能数据,识别潜在问题
- 自动化监控:编写脚本实现自动化监控和告警
注意事项#
- top 会消耗一定的系统资源,频繁使用可能影响性能
- 在高负载系统中,top 的更新可能不够及时
- 不同版本的 top 输出格式可能有所不同
- 在生产环境中监控时要谨慎,避免影响服务
- 注意僵尸进程的处理,避免资源泄漏
- 对于关键业务,建议使用专业的性能监控工具
- 注意系统负载的持续监控,及时发现性能瓶颈
- 在自动化脚本中使用 top 时,注意错误处理