223 lines
7.8 KiB
Bash
Executable File
223 lines
7.8 KiB
Bash
Executable File
#!/bin/bash
|
||
|
||
# N体问题实验脚本
|
||
# 收集串行和并行程序的性能数据
|
||
# 多机环境:hpc-ecs-1, hpc-ecs-2, hpc-ecs-3(每台2线程)
|
||
|
||
set -e # 遇到错误立即退出
|
||
set -u # 使用未定义变量时报错
|
||
set -o pipefail # 管道命令中任何错误都会导致整个管道失败
|
||
|
||
OUTPUT_CSV="nbody_results.csv"
|
||
LOG_FILE="nbody_experiment.log"
|
||
|
||
# 主机配置
|
||
HOST1="hpc-ecs-1"
|
||
HOST2="hpc-ecs-2"
|
||
HOST3="hpc-ecs-3"
|
||
|
||
# 记录日志函数
|
||
log_error() {
|
||
echo "[ERROR] $*" | tee -a "$LOG_FILE"
|
||
}
|
||
|
||
log_info() {
|
||
echo "[INFO] $*" | tee -a "$LOG_FILE"
|
||
}
|
||
|
||
# 清空或创建CSV文件
|
||
echo "实验,数据规模,每机进程数,机器配置,运行时间(s)" > "$OUTPUT_CSV"
|
||
|
||
echo "=========================================="
|
||
echo "N体问题性能测试实验"
|
||
echo "=========================================="
|
||
echo "主机配置: $HOST1, $HOST2, $HOST3"
|
||
echo ""
|
||
|
||
# 编译程序
|
||
echo "编译程序..."
|
||
log_info "开始编译程序..."
|
||
if ! xmake build nbody_ser; then
|
||
log_error "编译 nbody_ser 失败"
|
||
exit 1
|
||
fi
|
||
if ! xmake build nbody_par; then
|
||
log_error "编译 nbody_par 失败"
|
||
exit 1
|
||
fi
|
||
log_info "编译完成"
|
||
echo ""
|
||
|
||
# 固定数据规模
|
||
FIXED_N=6000
|
||
|
||
# 实验一:单机上,数据规模为6000时,随每机进程数变化的运行时间(串行程序)
|
||
echo "=========================================="
|
||
echo "实验一:串行程序 - 数据规模6000"
|
||
echo "=========================================="
|
||
log_info "运行串行程序..."
|
||
ser_output=$(./build/linux/arm64-v8a/release/nbody_ser $FIXED_N 2>&1)
|
||
ser_exit_code=$?
|
||
if [ $ser_exit_code -ne 0 ]; then
|
||
log_error "串行程序执行失败,退出码: $ser_exit_code"
|
||
echo "$ser_output" | tee -a "$LOG_FILE"
|
||
exit 1
|
||
fi
|
||
time_output=$(echo "$ser_output" | grep "模拟用时" | awk '{print $2}')
|
||
if [ -z "$time_output" ]; then
|
||
log_error "无法从输出中提取运行时间"
|
||
echo "$ser_output" | tee -a "$LOG_FILE"
|
||
exit 1
|
||
fi
|
||
echo "实验一,6000,1,单机,$time_output" >> "$OUTPUT_CSV"
|
||
echo " 时间: $time_output s"
|
||
log_info "实验一完成"
|
||
echo ""
|
||
|
||
# 实验二:多机环境下,数据规模为6000,随每机进程数变化的运行时间
|
||
echo "=========================================="
|
||
echo "实验二:并行程序 - 数据规模6000,不同每机进程数"
|
||
echo "=========================================="
|
||
|
||
# 测试不同的每机进程数和机器配置
|
||
for ppn in 1 2 3 4; do
|
||
# 单机测试
|
||
echo "每机进程数: $ppn, 单机"
|
||
log_info "实验二: 单机, ppn=$ppn"
|
||
par_output=$(mpirun --host "$HOST1:$ppn" --oversubscribe ./build/linux/arm64-v8a/release/nbody_par $FIXED_N 2>&1)
|
||
par_exit_code=$?
|
||
if [ $par_exit_code -ne 0 ]; then
|
||
log_error "并行程序执行失败(单机 ppn=$ppn),退出码: $par_exit_code"
|
||
echo "$par_output" | tee -a "$LOG_FILE"
|
||
else
|
||
time_output=$(echo "$par_output" | grep "模拟用时" | awk '{print $2}')
|
||
if [ -z "$time_output" ]; then
|
||
log_error "无法从输出中提取运行时间(单机 ppn=$ppn)"
|
||
echo "$par_output" | tee -a "$LOG_FILE"
|
||
else
|
||
echo "实验二,6000,$ppn,单机,$time_output" >> "$OUTPUT_CSV"
|
||
echo " 时间: $time_output s"
|
||
fi
|
||
fi
|
||
echo ""
|
||
|
||
# 双机测试
|
||
echo "每机进程数: $ppn, 双机"
|
||
log_info "实验二: 双机, ppn=$ppn"
|
||
par_output=$(mpirun --host "$HOST1:$ppn,$HOST2:$ppn" --oversubscribe ./build/linux/arm64-v8a/release/nbody_par $FIXED_N 2>&1)
|
||
par_exit_code=$?
|
||
if [ $par_exit_code -ne 0 ]; then
|
||
log_error "并行程序执行失败(双机 ppn=$ppn),退出码: $par_exit_code"
|
||
echo "$par_output" | tee -a "$LOG_FILE"
|
||
else
|
||
time_output=$(echo "$par_output" | grep "模拟用时" | awk '{print $2}')
|
||
if [ -z "$time_output" ]; then
|
||
log_error "无法从输出中提取运行时间(双机 ppn=$ppn)"
|
||
echo "$par_output" | tee -a "$LOG_FILE"
|
||
else
|
||
echo "实验二,6000,$ppn,双机,$time_output" >> "$OUTPUT_CSV"
|
||
echo " 时间: $time_output s"
|
||
fi
|
||
fi
|
||
echo ""
|
||
|
||
# 三机测试
|
||
echo "每机进程数: $ppn, 三机"
|
||
log_info "实验二: 三机, ppn=$ppn"
|
||
par_output=$(mpirun --host "$HOST1:$ppn,$HOST2:$ppn,$HOST3:$ppn" --oversubscribe ./build/linux/arm64-v8a/release/nbody_par $FIXED_N 2>&1)
|
||
par_exit_code=$?
|
||
if [ $par_exit_code -ne 0 ]; then
|
||
log_error "并行程序执行失败(三机 ppn=$ppn),退出码: $par_exit_code"
|
||
echo "$par_output" | tee -a "$LOG_FILE"
|
||
else
|
||
time_output=$(echo "$par_output" | grep "模拟用时" | awk '{print $2}')
|
||
if [ -z "$time_output" ]; then
|
||
log_error "无法从输出中提取运行时间(三机 ppn=$ppn)"
|
||
echo "$par_output" | tee -a "$LOG_FILE"
|
||
else
|
||
echo "实验二,6000,$ppn,三机,$time_output" >> "$OUTPUT_CSV"
|
||
echo " 时间: $time_output s"
|
||
fi
|
||
fi
|
||
echo ""
|
||
done
|
||
|
||
# 实验三:每机1个进程,随数据规模变化的并行程序运行时间
|
||
echo "=========================================="
|
||
echo "实验三:并行程序 - 每机1进程,不同数据规模"
|
||
echo "=========================================="
|
||
|
||
# 测试不同的数据规模
|
||
for N in 150 300 600 1200 2400 4800 9600; do
|
||
echo "数据规模: $N"
|
||
log_info "实验三: 数据规模=$N"
|
||
|
||
# 单机测试
|
||
echo " 单机..."
|
||
par_output=$(mpirun --host "$HOST1:1" ./build/linux/arm64-v8a/release/nbody_par $N 2>&1)
|
||
par_exit_code=$?
|
||
if [ $par_exit_code -ne 0 ]; then
|
||
log_error "并行程序执行失败(单机 N=$N),退出码: $par_exit_code"
|
||
echo "$par_output" | tee -a "$LOG_FILE"
|
||
else
|
||
time_output=$(echo "$par_output" | grep "模拟用时" | awk '{print $2}')
|
||
if [ -z "$time_output" ]; then
|
||
log_error "无法从输出中提取运行时间(单机 N=$N)"
|
||
echo "$par_output" | tee -a "$LOG_FILE"
|
||
else
|
||
echo "实验三,$N,单机,$time_output" >> "$OUTPUT_CSV"
|
||
echo " 时间: $time_output s"
|
||
fi
|
||
fi
|
||
|
||
# 双机测试
|
||
echo " 双机..."
|
||
par_output=$(mpirun --host "$HOST1:1,$HOST2:1" ./build/linux/arm64-v8a/release/nbody_par $N 2>&1)
|
||
par_exit_code=$?
|
||
if [ $par_exit_code -ne 0 ]; then
|
||
log_error "并行程序执行失败(双机 N=$N),退出码: $par_exit_code"
|
||
echo "$par_output" | tee -a "$LOG_FILE"
|
||
else
|
||
time_output=$(echo "$par_output" | grep "模拟用时" | awk '{print $2}')
|
||
if [ -z "$time_output" ]; then
|
||
log_error "无法从输出中提取运行时间(双机 N=$N)"
|
||
echo "$par_output" | tee -a "$LOG_FILE"
|
||
else
|
||
echo "实验三,$N,双机,$time_output" >> "$OUTPUT_CSV"
|
||
echo " 时间: $time_output s"
|
||
fi
|
||
fi
|
||
|
||
# 三机测试
|
||
echo " 三机..."
|
||
par_output=$(mpirun --host "$HOST1:1,$HOST2:1,$HOST3:1" ./build/linux/arm64-v8a/release/nbody_par $N 2>&1)
|
||
par_exit_code=$?
|
||
if [ $par_exit_code -ne 0 ]; then
|
||
log_error "并行程序执行失败(三机 N=$N),退出码: $par_exit_code"
|
||
echo "$par_output" | tee -a "$LOG_FILE"
|
||
else
|
||
time_output=$(echo "$par_output" | grep "模拟用时" | awk '{print $2}')
|
||
if [ -z "$time_output" ]; then
|
||
log_error "无法从输出中提取运行时间(三机 N=$N)"
|
||
echo "$par_output" | tee -a "$LOG_FILE"
|
||
else
|
||
echo "实验三,$N,三机,$time_output" >> "$OUTPUT_CSV"
|
||
echo " 时间: $time_output s"
|
||
fi
|
||
fi
|
||
echo ""
|
||
done
|
||
|
||
echo "=========================================="
|
||
echo "实验完成"
|
||
echo "=========================================="
|
||
echo ""
|
||
log_info "所有实验完成"
|
||
echo "结果已保存到: $OUTPUT_CSV"
|
||
echo "日志已保存到: $LOG_FILE"
|
||
echo ""
|
||
echo "数据预览:"
|
||
cat "$OUTPUT_CSV"
|
||
echo ""
|
||
echo "如有错误,请查看日志文件: $LOG_FILE"
|