#!/bin/bash # MPI-OpenMP矩阵乘法性能测试脚本 # 用于收集实验数据 # 设置环境变量 export OMP_NUM_THREADS=${OMP_NUM_THREADS:-1} # 输出文件 OUTPUT_FILE="experiment_results.csv" SERIAL_OUTPUT="serial_results.csv" # get arch using uname -m # if aarch64 then use arm64-v8a else use x86_64 ARCH=$(uname -m) if [ "$ARCH" == "aarch64" ]; then BUILD_ARCH="arm64-v8a" else BUILD_ARCH="x86_64" fi # Build directory BUILD_DIR="./build/linux/$BUILD_ARCH/release" # 创建输出文件并写入表头 echo "Experiment,M,N,K,MPI_Processes,OpenMP_Threads,Time_ms,Speedup,Efficiency" > $OUTPUT_FILE echo "M,N,K,Time_ms" > $SERIAL_OUTPUT # 矩阵尺寸配置(可以根据需要修改) MATRIX_SIZES="512 1024 2048 4096" # MPI进程数配置 MPI_PROCESSES="1 2 3 6 9 12" # OpenMP线程数配置 OPENMP_THREADS="1 2 4 8" echo "==========================================" echo "MPI-OpenMP矩阵乘法性能测试" echo "==========================================" # 编译程序 echo "编译程序..." xmake if [ $? -ne 0 ]; then echo "编译失败!" exit 1 fi echo "编译完成!" echo "" # 获取串行基准时间 echo "==========================================" echo "实验0: 串行基准测试" echo "==========================================" for SIZE in $MATRIX_SIZES; do echo "测试矩阵尺寸: ${SIZE}x${SIZE}x${SIZE}" TIME=$($BUILD_DIR/gemm_serial $SIZE $SIZE $SIZE 0 | grep "matmul:" | awk '{print $2}') echo " 时间: ${TIME} ms" echo "$SIZE,$SIZE,$SIZE,$TIME" >> $SERIAL_OUTPUT done echo "" # 实验一:固定OpenMP线程数为1,改变MPI进程数 echo "==========================================" echo "实验一: OpenMP线程数=1,改变MPI进程数" echo "==========================================" export OMP_NUM_THREADS=1 for SIZE in $MATRIX_SIZES; do # 获取串行时间 SERIAL_TIME=$(grep "^$SIZE," $SERIAL_OUTPUT | cut -d',' -f4) echo "矩阵尺寸: ${SIZE}x${SIZE}x${SIZE}" echo "串行时间: ${SERIAL_TIME} ms" for NP in $MPI_PROCESSES; do echo " MPI进程数: $NP" TIME=$(mpirun --hostfile ~/mpi_hosts --oversubscribe -np $NP $BUILD_DIR/gemm_parallel $SIZE $SIZE $SIZE | grep "mpi matmul:" | awk '{print $3}') if [ ! -z "$TIME" ]; then SPEEDUP=$(echo "scale=4; $SERIAL_TIME / $TIME" | bc) EFFICIENCY=$(echo "scale=4; $SPEEDUP / $NP" | bc) echo " 时间: ${TIME} ms, 加速比: $SPEEDUP, 效率: $EFFICIENCY" echo "Exp1,$SIZE,$SIZE,$SIZE,$NP,1,$TIME,$SPEEDUP,$EFFICIENCY" >> $OUTPUT_FILE fi done echo "" done # 实验二:同时改变MPI进程数和OpenMP线程数 echo "==========================================" echo "实验二: 改变MPI进程数和OpenMP线程数" echo "==========================================" for SIZE in $MATRIX_SIZES; do # 获取串行时间 SERIAL_TIME=$(grep "^$SIZE," $SERIAL_OUTPUT | cut -d',' -f4) echo "矩阵尺寸: ${SIZE}x${SIZE}x${SIZE}" for NTHREADS in $OPENMP_THREADS; do export OMP_NUM_THREADS=$NTHREADS echo " OpenMP线程数: $NTHREADS" for NP in $MPI_PROCESSES; do TOTAL_PROCS=$((NP * NTHREADS)) echo " MPI进程数: $NP (总处理器数: $TOTAL_PROCS)" TIME=$(mpirun --hostfile ~/mpi_hosts --oversubscribe -np $NP $BUILD_DIR/gemm_parallel $SIZE $SIZE $SIZE | grep "mpi matmul:" | awk '{print $3}') if [ ! -z "$TIME" ]; then SPEEDUP=$(echo "scale=4; $SERIAL_TIME / $TIME" | bc) EFFICIENCY=$(echo "scale=4; $SPEEDUP / $TOTAL_PROCS" | bc) echo " 时间: ${TIME} ms, 加速比: $SPEEDUP, 效率: $EFFICIENCY" echo "Exp2,$SIZE,$SIZE,$SIZE,$NP,$NTHREADS,$TIME,$SPEEDUP,$EFFICIENCY" >> $OUTPUT_FILE fi done done echo "" done # 实验三:固定总处理器数,改变MPI和OpenMP的组合 echo "==========================================" echo "实验三: 固定总处理器数,改变MPI/OpenMP组合" echo "==========================================" TOTAL_PROCS_TARGET=16 echo "目标总处理器数: $TOTAL_PROCS_TARGET" for SIZE in $MATRIX_SIZES; do # 获取串行时间 SERIAL_TIME=$(grep "^$SIZE," $SERIAL_OUTPUT | cut -d',' -f4) echo "矩阵尺寸: ${SIZE}x${SIZE}x${SIZE}" # 不同的MPI/OpenMP组合,使得总处理器数接近16 declare -a COMBOS=("1:16" "2:8" "4:4" "8:2" "16:1") for COMBO in "${COMBOS[@]}"; do NP=$(echo $COMBO | cut -d':' -f1) NTHREADS=$(echo $COMBO | cut -d':' -f2) TOTAL_PROCS=$((NP * NTHREADS)) export OMP_NUM_THREADS=$NTHREADS echo " MPI: $NP, OpenMP: $NTHREADS (总处理器: $TOTAL_PROCS)" TIME=$(mpirun --hostfile ~/mpi_hosts --oversubscribe -np $NP $BUILD_DIR/gemm_parallel $SIZE $SIZE $SIZE | grep "mpi matmul:" | awk '{print $3}') if [ ! -z "$TIME" ]; then SPEEDUP=$(echo "scale=4; $SERIAL_TIME / $TIME" | bc) EFFICIENCY=$(echo "scale=4; $SPEEDUP / $TOTAL_PROCS" | bc) echo " 时间: ${TIME} ms, 加速比: $SPEEDUP, 效率: $EFFICIENCY" echo "Exp3,$SIZE,$SIZE,$SIZE,$NP,$NTHREADS,$TIME,$SPEEDUP,$EFFICIENCY" >> $OUTPUT_FILE fi done echo "" done # 实验三(优化实现): 固定总处理器数,使用 gemm_optimized,结果标识为 Exp3-opt echo "==========================================" echo "实验三(优化): 固定总处理器数,使用 gemm_optimized 的 MPI/OpenMP 组合测试" echo "==========================================" for SIZE in $MATRIX_SIZES; do # 获取串行时间 SERIAL_TIME=$(grep "^$SIZE," $SERIAL_OUTPUT | cut -d',' -f4) echo "矩阵尺寸: ${SIZE}x${SIZE}x${SIZE}" # 与之前相同的组合 declare -a COMBOS_OPT=("1:16" "2:8" "4:4" "8:2" "16:1") for COMBO in "${COMBOS_OPT[@]}"; do NP=$(echo $COMBO | cut -d':' -f1) NTHREADS=$(echo $COMBO | cut -d':' -f2) TOTAL_PROCS=$((NP * NTHREADS)) export OMP_NUM_THREADS=$NTHREADS echo " MPI: $NP, OpenMP: $NTHREADS (总处理器: $TOTAL_PROCS)" TIME=$(mpirun --hostfile ~/mpi_hosts --oversubscribe -np $NP $BUILD_DIR/gemm_optimized $SIZE $SIZE $SIZE | grep "optimized mpi matmul:" | awk '{print $4}') if [ ! -z "$TIME" ]; then SPEEDUP=$(echo "scale=4; $SERIAL_TIME / $TIME" | bc) EFFICIENCY=$(echo "scale=4; $SPEEDUP / $TOTAL_PROCS" | bc) echo " 时间: ${TIME} ms, 加速比: $SPEEDUP, 效率: $EFFICIENCY" echo "Exp3-opt,$SIZE,$SIZE,$SIZE,$NP,$NTHREADS,$TIME,$SPEEDUP,$EFFICIENCY" >> $OUTPUT_FILE fi done echo "" done echo "==========================================" echo "测试完成!" echo "结果已保存到: $OUTPUT_FILE" echo "串行基准已保存到: $SERIAL_OUTPUT" echo "==========================================" echo "" echo "数据处理说明:" echo "1. 使用Excel、Python或R读取CSV文件" echo "2. 绘制图表:" echo " - 实验一: X轴=MPI进程数, Y轴=加速比/效率, 不同矩阵尺寸用不同颜色" echo " - 实验二: X轴=总处理器数, Y轴=加速比/效率, 不同OpenMP线程数用不同颜色" echo " - 实验三: X轴=MPI进程数, Y轴=效率, 不同矩阵尺寸用不同颜色" echo "3. 分析加速比和效率的变化趋势" echo "4. 讨论MPI/OpenMP组合对性能的影响"