hpc-lab-code/work/analyze_results.py
2026-01-21 18:02:30 +08:00

281 lines
9.8 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
MPI-OpenMP矩阵乘法实验数据分析脚本
用于读取实验数据并生成性能分析图表
"""
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
# 设置中文字体和样式
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
sns.set_style("whitegrid")
def load_data(filename='experiment_results.csv'):
"""加载实验数据"""
df = pd.read_csv(filename)
return df
def load_serial_data(filename='serial_results.csv'):
"""加载串行基准数据"""
df = pd.read_csv(filename)
return df
def plot_experiment1(df):
"""绘制实验一MPI进程数扩展性"""
exp1 = df[df['Experiment'] == 'Exp1'].copy()
if exp1.empty:
print("警告:没有找到实验一的数据")
return
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# 绘制加速比
for size in exp1['M'].unique():
data = exp1[exp1['M'] == size].sort_values('MPI_Processes')
ax1.plot(data['MPI_Processes'], data['Speedup'],
marker='o', label=f'{size}×{size}', linewidth=2)
ax1.set_xlabel('MPI进程数', fontsize=12)
ax1.set_ylabel('加速比', fontsize=12)
ax1.set_title('实验一MPI进程数扩展性OpenMP=1', fontsize=14)
ax1.legend(fontsize=10)
ax1.grid(True, alpha=0.3)
ax1.plot([1, 16], [1, 16], 'k--', alpha=0.3, label='理想线性加速比')
# 绘制效率
for size in exp1['M'].unique():
data = exp1[exp1['M'] == size].sort_values('MPI_Processes')
ax2.plot(data['MPI_Processes'], data['Efficiency'] * 100,
marker='s', label=f'{size}×{size}', linewidth=2)
ax2.set_xlabel('MPI进程数', fontsize=12)
ax2.set_ylabel('效率 (%)', fontsize=12)
ax2.set_title('实验一:并行效率', fontsize=14)
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3)
ax2.axhline(y=100, color='k', linestyle='--', alpha=0.3, label='理想效率100%')
plt.tight_layout()
plt.savefig('exp1_mpi_scaling.png', dpi=300, bbox_inches='tight')
print("已保存: exp1_mpi_scaling.png")
plt.close()
def plot_experiment2(df):
"""绘制实验二MPI-OpenMP混合并行扩展性"""
exp2 = df[df['Experiment'] == 'Exp2'].copy()
if exp2.empty:
print("警告:没有找到实验二的数据")
return
exp2['Total_Processors'] = exp2['MPI_Processes'] * exp2['OpenMP_Threads']
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# 绘制加速比按OpenMP线程数分组
for nthreads in exp2['OpenMP_Threads'].unique():
data = exp2[exp2['OpenMP_Threads'] == nthreads].copy()
# 对相同总处理器数的数据取平均
avg_data = data.groupby('Total_Processors').agg({
'Speedup': 'mean',
'Efficiency': 'mean'
}).reset_index()
ax1.plot(avg_data['Total_Processors'], avg_data['Speedup'],
marker='o', label=f'OpenMP={nthreads}', linewidth=2)
ax1.set_xlabel('总处理器数', fontsize=12)
ax1.set_ylabel('加速比', fontsize=12)
ax1.set_title('实验二:混合并行扩展性', fontsize=14)
ax1.legend(fontsize=10)
ax1.grid(True, alpha=0.3)
# 绘制效率
for nthreads in exp2['OpenMP_Threads'].unique():
data = exp2[exp2['OpenMP_Threads'] == nthreads].copy()
avg_data = data.groupby('Total_Processors').agg({
'Speedup': 'mean',
'Efficiency': 'mean'
}).reset_index()
ax2.plot(avg_data['Total_Processors'], avg_data['Efficiency'] * 100,
marker='s', label=f'OpenMP={nthreads}', linewidth=2)
ax2.set_xlabel('总处理器数', fontsize=12)
ax2.set_ylabel('效率 (%)', fontsize=12)
ax2.set_title('实验二:并行效率', fontsize=14)
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3)
ax2.axhline(y=100, color='k', linestyle='--', alpha=0.3)
plt.tight_layout()
plt.savefig('exp2_hybrid_scaling.png', dpi=300, bbox_inches='tight')
print("已保存: exp2_hybrid_scaling.png")
plt.close()
def plot_experiment3(df):
"""绘制实验三MPI/OpenMP组合优化"""
exp3 = df[df['Experiment'] == 'Exp3'].copy()
if exp3.empty:
print("警告:没有找到实验三的数据")
return
exp3['Total_Processors'] = exp3['MPI_Processes'] * exp3['OpenMP_Threads']
fig, ax = plt.subplots(figsize=(12, 6))
# 绘制效率热图
for size in exp3['M'].unique():
data = exp3[exp3['M'] == size]
ax.plot(data['MPI_Processes'], data['Efficiency'] * 100,
marker='o', label=f'{size}×{size}', linewidth=2, markersize=8)
ax.set_xlabel('MPI进程数', fontsize=12)
ax.set_ylabel('效率 (%)', fontsize=12)
ax.set_title('实验三不同MPI/OpenMP组合的效率总处理器数=16', fontsize=14)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)
ax.axhline(y=100, color='k', linestyle='--', alpha=0.3)
# 添加x轴标签显示OpenMP线程数
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
ax2.set_xlabel('OpenMP线程数', fontsize=12)
ax2.set_xticks([1, 2, 4, 8, 16])
ax2.set_xticklabels([16, 8, 4, 2, 1])
plt.tight_layout()
plt.savefig('exp3_mpi_openmp_combo.png', dpi=300, bbox_inches='tight')
print("已保存: exp3_mpi_openmp_combo.png")
plt.close()
def plot_efficiency_heatmap(df):
"""绘制效率热图"""
exp2 = df[df['Experiment'] == 'Exp2'].copy()
if exp2.empty:
print("警告:没有找到实验二的数据")
return
# 选择一个中等规模的矩阵尺寸
sizes = sorted(exp2['M'].unique())
if len(sizes) > 2:
target_size = sizes[len(sizes)//2]
else:
target_size = sizes[0] if sizes else 1024
data = exp2[exp2['M'] == target_size].copy()
if data.empty:
print("警告:没有足够的数据绘制热图")
return
# 创建数据透视表
pivot_data = data.pivot_table(
values='Efficiency',
index='MPI_Processes',
columns='OpenMP_Threads',
aggfunc='mean'
) * 100
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(pivot_data, annot=True, fmt='.1f', cmap='YlOrRd',
cbar_kws={'label': '效率 (%)'}, ax=ax)
ax.set_title(f'并行效率热图(矩阵尺寸: {target_size}×{target_size}', fontsize=14)
ax.set_xlabel('OpenMP线程数', fontsize=12)
ax.set_ylabel('MPI进程数', fontsize=12)
plt.tight_layout()
plt.savefig('efficiency_heatmap.png', dpi=300, bbox_inches='tight')
print("已保存: efficiency_heatmap.png")
plt.close()
def print_summary(df):
"""打印实验结果摘要"""
print("\n" + "="*80)
print("实验结果摘要")
print("="*80)
# 实验一摘要
exp1 = df[df['Experiment'] == 'Exp1']
if not exp1.empty:
print("\n实验一MPI进程数扩展性OpenMP=1")
print("-" * 80)
for size in sorted(exp1['M'].unique()):
data = exp1[exp1['M'] == size]
max_speedup = data['Speedup'].max()
max_eff = data['Efficiency'].max()
best_np = data.loc[data['Speedup'].idxmax(), 'MPI_Processes']
print(f"矩阵 {size}×{size}: 最大加速比={max_speedup:.2f} (NP={best_np}), "
f"最高效率={max_eff*100:.1f}%")
# 实验二摘要
exp2 = df[df['Experiment'] == 'Exp2']
if not exp2.empty:
exp2['Total_Processors'] = exp2['MPI_Processes'] * exp2['OpenMP_Threads']
print("\n实验二:混合并行扩展性")
print("-" * 80)
for nthreads in sorted(exp2['OpenMP_Threads'].unique()):
data = exp2[exp2['OpenMP_Threads'] == nthreads]
max_speedup = data['Speedup'].max()
max_eff = data['Efficiency'].max()
best_total = data.loc[data['Speedup'].idxmax(), 'Total_Processors']
print(f"OpenMP={nthreads}: 最大加速比={max_speedup:.2f} "
f"(总处理器={best_total}), 最高效率={max_eff*100:.1f}%")
# 实验三摘要
exp3 = df[df['Experiment'] == 'Exp3']
if not exp3.empty:
print("\n实验三MPI/OpenMP组合优化总处理器=16")
print("-" * 80)
for size in sorted(exp3['M'].unique()):
data = exp3[exp3['M'] == size]
max_eff = data['Efficiency'].max()
best_config = data.loc[data['Efficiency'].idxmax()]
print(f"矩阵 {size}×{size}: 最高效率={max_eff*100:.1f}% "
f"(MPI={best_config['MPI_Processes']}, "
f"OpenMP={best_config['OpenMP_Threads']})")
print("\n" + "="*80)
def main():
"""主函数"""
import sys
filename = sys.argv[1] if len(sys.argv) > 1 else 'experiment_results.csv'
print(f"加载数据文件: {filename}")
try:
df = load_data(filename)
print(f"数据加载成功,共 {len(df)} 条记录")
except FileNotFoundError:
print(f"错误:找不到文件 {filename}")
print("请先运行 ./run_experiments.sh 生成实验数据")
return
# 打印摘要
print_summary(df)
# 生成图表
print("\n生成性能分析图表...")
plot_experiment1(df)
plot_experiment2(df)
plot_experiment3(df)
plot_efficiency_heatmap(df)
print("\n所有图表已生成完成!")
print("\n建议:")
print("1. 查看 exp1_mpi_scaling.png 了解MPI扩展性")
print("2. 查看 exp2_hybrid_scaling.png 了解混合并行性能")
print("3. 查看 exp3_mpi_openmp_combo.png 了解MPI/OpenMP组合优化")
print("4. 查看 efficiency_heatmap.png 了解不同配置的效率分布")
if __name__ == '__main__':
main()