#!/usr/bin/env python3 """ MPI+OpenMP混合并行矩阵乘法性能实验数据分析脚本 包含三个实验的完整分析和可视化 """ import matplotlib.pyplot as plt import numpy as np import matplotlib from matplotlib import rcParams import pandas as pd # 设置字体 matplotlib.rcParams['font.sans-serif'] = ['DejaVu Sans'] matplotlib.rcParams['axes.unicode_minus'] = False # 读取实验数据 def load_data(): """加载CSV格式的实验数据""" df = pd.read_csv('experiment_results.csv') serial_df = pd.read_csv('serial_results.csv') return df, serial_df def experiment1_analysis(df, serial_df): """实验一:固定OpenMP线程数为1,改变MPI进程数""" print("=" * 100) print("实验一:OpenMP线程数=1,改变MPI进程数对性能的影响") print("=" * 100) # 筛选实验一数据(OpenMP线程数=1) exp1_data = df[(df['Experiment'] == 'Exp1') & (df['OpenMP_Threads'] == 1)].copy() matrix_sizes = [512, 1024, 2048, 4096] mpi_processes = [1, 2, 3, 6, 9, 12] # 打印数据表格 for size in matrix_sizes: size_data = exp1_data[exp1_data['M'] == size].sort_values('MPI_Processes') print(f"\n矩阵规模: {size}x{size}x{size}") print("-" * 90) print(f"{'MPI进程数':<12} {'时间(ms)':<15} {'加速比':<15} {'效率':<15}") print("-" * 90) for _, row in size_data.iterrows(): print(f"{int(row['MPI_Processes']):<12} {row['Time_ms']:<15.3f} " f"{row['Speedup']:<15.4f} {row['Efficiency']:<15.4f}") # 绘制图表 fig, axes = plt.subplots(2, 2, figsize=(16, 12)) colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'] markers = ['o', 's', '^', 'd'] # Figure 1: Execution Time Comparison ax1 = axes[0, 0] for i, size in enumerate(matrix_sizes): size_data = exp1_data[exp1_data['M'] == size].sort_values('MPI_Processes') ax1.plot(size_data['MPI_Processes'], size_data['Time_ms'], marker=markers[i], linewidth=2, label=f'{size}x{size}', color=colors[i]) ax1.set_xlabel('Number of MPI Processes') ax1.set_ylabel('Execution Time (ms)') ax1.set_title('Experiment 1: Execution Time vs MPI Processes') ax1.legend() ax1.grid(True, alpha=0.3) # Figure 2: Speedup Comparison ax2 = axes[0, 1] for i, size in enumerate(matrix_sizes): size_data = exp1_data[exp1_data['M'] == size].sort_values('MPI_Processes') ax2.plot(size_data['MPI_Processes'], size_data['Speedup'], marker=markers[i], linewidth=2, label=f'{size}x{size}', color=colors[i]) # Add ideal speedup reference line ax2.plot(size_data['MPI_Processes'], size_data['MPI_Processes'], '--', linewidth=1, color=colors[i], alpha=0.5) ax2.set_xlabel('Number of MPI Processes') ax2.set_ylabel('Speedup') ax2.set_title('Experiment 1: Speedup vs MPI Processes') ax2.legend() ax2.grid(True, alpha=0.3) # Figure 3: Parallel Efficiency Comparison ax3 = axes[1, 0] for i, size in enumerate(matrix_sizes): size_data = exp1_data[exp1_data['M'] == size].sort_values('MPI_Processes') ax3.plot(size_data['MPI_Processes'], size_data['Efficiency'], marker=markers[i], linewidth=2, label=f'{size}x{size}', color=colors[i]) # Add ideal efficiency reference line (100%) ax3.axhline(y=1.0, color='gray', linestyle='--', linewidth=1, alpha=0.5) ax3.set_xlabel('Number of MPI Processes') ax3.set_ylabel('Parallel Efficiency') ax3.set_title('Experiment 1: Parallel Efficiency vs MPI Processes') ax3.legend() ax3.grid(True, alpha=0.3) # Figure 4: Efficiency Heatmap ax4 = axes[1, 1] efficiency_matrix = [] for size in matrix_sizes: size_data = exp1_data[exp1_data['M'] == size].sort_values('MPI_Processes') efficiency_matrix.append(size_data['Efficiency'].values) im = ax4.imshow(efficiency_matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1) ax4.set_xticks(range(len(mpi_processes))) ax4.set_xticklabels(mpi_processes) ax4.set_yticks(range(len(matrix_sizes))) ax4.set_yticklabels([f'{s}x{s}' for s in matrix_sizes]) ax4.set_xlabel('Number of MPI Processes') ax4.set_ylabel('Matrix Size') ax4.set_title('Parallel Efficiency Heatmap') # Add value annotations for i in range(len(matrix_sizes)): for j in range(len(mpi_processes)): text = ax4.text(j, i, f'{efficiency_matrix[i][j]:.2f}', ha="center", va="center", color="black", fontsize=8) plt.colorbar(im, ax=ax4, label='Efficiency') plt.tight_layout() plt.savefig('experiment1_analysis.png', dpi=300, bbox_inches='tight') print("\nFigure saved to: experiment1_analysis.png") return exp1_data def experiment2_analysis(df): """实验二:同时改变MPI进程数和OpenMP线程数""" print("\n" + "=" * 100) print("实验二:MPI进程数和OpenMP线程数同时改变对性能的影响") print("=" * 100) # 筛选实验二数据 exp2_data = df[df['Experiment'] == 'Exp2'].copy() matrix_sizes = [512, 1024, 2048, 4096] mpi_processes = [1, 2, 3, 6, 9, 12] omp_threads = [1, 2, 4, 8] # 2.1 打印总体数据表格 print("\n2.1 不同配置下的性能数据") for size in matrix_sizes: print(f"\n矩阵规模: {size}x{size}x{size}") print("-" * 100) print(f"{'MPI':<6} {'OMP':<6} {'总进程数':<10} {'时间(ms)':<15} {'加速比':<15} {'效率':<15}") print("-" * 100) size_data = exp2_data[exp2_data['M'] == size] for np in mpi_processes: for nt in omp_threads: row = size_data[(size_data['MPI_Processes'] == np) & (size_data['OpenMP_Threads'] == nt)] if not row.empty: r = row.iloc[0] total_procs = r['MPI_Processes'] * r['OpenMP_Threads'] print(f"{int(r['MPI_Processes']):<6} {int(r['OpenMP_Threads']):<6} " f"{int(total_procs):<10} {r['Time_ms']:<15.3f} " f"{r['Speedup']:<15.4f} {r['Efficiency']:<15.4f}") # 2.2 分析相同总进程数下不同分配的影响 print("\n\n2.2 相同总进程数下,MPI进程数和OpenMP线程数分配对效率的影响") print("=" * 100) # 找出总进程数相同的配置组合 combinations = [ (1, 16), (2, 8), (4, 4), (8, 2), (16, 1) # 总进程数=16 ] for size in [512, 1024, 2048, 4096]: print(f"\n矩阵规模: {size}x{size}x{size},总进程数=16的不同分配") print("-" * 90) print(f"{'MPI进程数':<12} {'OpenMP线程数':<15} {'时间(ms)':<15} {'加速比':<15} {'效率':<15}") print("-" * 90) size_data = exp2_data[exp2_data['M'] == size] for np, nt in combinations: row = size_data[(size_data['MPI_Processes'] == np) & (size_data['OpenMP_Threads'] == nt)] if not row.empty: r = row.iloc[0] print(f"{int(r['MPI_Processes']):<12} {int(r['OpenMP_Threads']):<15} " f"{r['Time_ms']:<15.3f} {r['Speedup']:<15.4f} {r['Efficiency']:<15.4f}") # 找出最优配置 best_config = None best_efficiency = 0 for np, nt in combinations: row = size_data[(size_data['MPI_Processes'] == np) & (size_data['OpenMP_Threads'] == nt)] if not row.empty: eff = row.iloc[0]['Efficiency'] if eff > best_efficiency: best_efficiency = eff best_config = (np, nt) if best_config: print(f"\n最优配置: MPI={best_config[0]}, OpenMP={best_config[1]}, " f"效率={best_efficiency:.4f}") # 绘制图表 fig, axes = plt.subplots(2, 2, figsize=(16, 12)) # Figure 1: Efficiency comparison for total processes = 16 ax1 = axes[0, 0] size = 1024 # Use 1024 as example size_data = exp2_data[exp2_data['M'] == size] configs = [] efficiencies = [] for np, nt in combinations: row = size_data[(size_data['MPI_Processes'] == np) & (size_data['OpenMP_Threads'] == nt)] if not row.empty: configs.append(f'{np}x{nt}') efficiencies.append(row.iloc[0]['Efficiency']) bars = ax1.bar(range(len(configs)), efficiencies, color='steelblue', alpha=0.7) ax1.set_xticks(range(len(configs))) ax1.set_xticklabels([f'MPI={c.split("x")[0]}\nOMP={c.split("x")[1]}' for c in configs]) ax1.set_ylabel('Parallel Efficiency') ax1.set_title(f'Efficiency Comparison (Total Processes=16, {size}x{size})') ax1.axhline(y=1.0, color='red', linestyle='--', linewidth=1, alpha=0.5, label='Ideal') ax1.legend() ax1.grid(True, alpha=0.3, axis='y') # Add value annotations for i, (bar, eff) in enumerate(zip(bars, efficiencies)): ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, f'{eff:.3f}', ha='center', va='bottom', fontsize=9) # Figure 2: Best configuration efficiency for different matrix sizes ax2 = axes[0, 1] matrix_sizes_for_plot = [512, 1024, 2048, 4096] best_efficiencies = [] best_configs_labels = [] for size in matrix_sizes_for_plot: size_data = exp2_data[exp2_data['M'] == size] best_eff = 0 best_config = None for np, nt in combinations: row = size_data[(size_data['MPI_Processes'] == np) & (size_data['OpenMP_Threads'] == nt)] if not row.empty: eff = row.iloc[0]['Efficiency'] if eff > best_eff: best_eff = eff best_config = f'{np}x{nt}' best_efficiencies.append(best_eff) best_configs_labels.append(best_config) bars = ax2.bar(range(len(matrix_sizes_for_plot)), best_efficiencies, color='coral', alpha=0.7) ax2.set_xticks(range(len(matrix_sizes_for_plot))) ax2.set_xticklabels([f'{s}x{s}' for s in matrix_sizes_for_plot]) ax2.set_ylabel('Best Parallel Efficiency') ax2.set_title('Best Configuration Efficiency vs Matrix Size') ax2.axhline(y=1.0, color='red', linestyle='--', linewidth=1, alpha=0.5, label='Ideal') ax2.legend() ax2.grid(True, alpha=0.3, axis='y') # Add configuration annotations for i, (bar, eff, config) in enumerate(zip(bars, best_efficiencies, best_configs_labels)): ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, f'{eff:.3f}\n{config}', ha='center', va='bottom', fontsize=8) # Figure 3: Impact of MPI processes on efficiency (fixed OpenMP threads) ax3 = axes[1, 0] for nt in [1, 2, 4, 8]: efficiencies_by_size = {} for size in matrix_sizes_for_plot: size_data = exp2_data[(exp2_data['M'] == size) & (exp2_data['OpenMP_Threads'] == nt)] if not size_data.empty: # Calculate average efficiency avg_eff = size_data['Efficiency'].mean() efficiencies_by_size[size] = avg_eff if efficiencies_by_size: ax3.plot(efficiencies_by_size.keys(), efficiencies_by_size.values(), marker='o', linewidth=2, label=f'OpenMP={nt}') ax3.set_xlabel('Matrix Size') ax3.set_ylabel('Average Parallel Efficiency') ax3.set_title('MPI Process Impact on Efficiency (Fixed OpenMP Threads)') ax3.legend() ax3.grid(True, alpha=0.3) # Figure 4: Speedup comparison (different configurations) ax4 = axes[1, 1] for size in [512, 2048]: size_data = exp2_data[exp2_data['M'] == size] for nt in [1, 2, 4, 8]: nt_data = size_data[size_data['OpenMP_Threads'] == nt].sort_values('MPI_Processes') if not nt_data.empty: total_procs = nt_data['MPI_Processes'] * nt_data['OpenMP_Threads'] ax4.plot(total_procs, nt_data['Speedup'], marker='o', linewidth=2, label=f'{size}x{size}, OMP={nt}') # Add ideal speedup reference line max_procs = 96 ax4.plot(range(1, max_procs+1), range(1, max_procs+1), '--', linewidth=1, color='gray', alpha=0.5, label='Ideal') ax4.set_xlabel('Total Processes (MPI × OpenMP)') ax4.set_ylabel('Speedup') ax4.set_title('Speedup Comparison for Different Configurations') ax4.legend(fontsize=8) ax4.grid(True, alpha=0.3) ax4.set_xlim(0, max_procs) ax4.set_ylim(0, max_procs) plt.tight_layout() plt.savefig('experiment2_analysis.png', dpi=300, bbox_inches='tight') print("\nFigure saved to: experiment2_analysis.png") return exp2_data def experiment3_analysis(df): """实验三:优化前后的性能对比""" print("\n" + "=" * 100) print("实验三:优化前后的性能对比分析") print("=" * 100) # 筛选实验三数据 exp3_original = df[df['Experiment'] == 'Exp3'].copy() exp3_optimized = df[df['Experiment'] == 'Exp3-opt'].copy() matrix_sizes = [512, 1024, 2048, 4096] combinations = [(1, 16), (2, 8), (4, 4), (8, 2), (16, 1)] # 打印优化前后对比表格 for size in matrix_sizes: print(f"\n矩阵规模: {size}x{size}x{size}") print("-" * 110) print(f"{'配置':<15} {'优化前时间(ms)':<18} {'优化后时间(ms)':<18} " f"{'性能提升':<15} {'优化前效率':<15} {'优化后效率':<15}") print("-" * 110) for np, nt in combinations: orig_row = exp3_original[(exp3_original['M'] == size) & (exp3_original['MPI_Processes'] == np) & (exp3_original['OpenMP_Threads'] == nt)] opt_row = exp3_optimized[(exp3_optimized['M'] == size) & (exp3_optimized['MPI_Processes'] == np) & (exp3_optimized['OpenMP_Threads'] == nt)] if not orig_row.empty and not opt_row.empty: orig = orig_row.iloc[0] opt = opt_row.iloc[0] speedup = orig['Time_ms'] / opt['Time_ms'] print(f"{np}×{nt:<10} {orig['Time_ms']:<18.3f} {opt['Time_ms']:<18.3f} " f"{speedup:<15.2f}x {orig['Efficiency']:<15.4f} {opt['Efficiency']:<15.4f}") # 绘制图表 fig, axes = plt.subplots(2, 2, figsize=(16, 12)) # Figure 1: Execution time comparison before and after optimization ax1 = axes[0, 0] size = 1024 configs = [] orig_times = [] opt_times = [] for np, nt in combinations: orig_row = exp3_original[(exp3_original['M'] == size) & (exp3_original['MPI_Processes'] == np) & (exp3_original['OpenMP_Threads'] == nt)] opt_row = exp3_optimized[(exp3_optimized['M'] == size) & (exp3_optimized['MPI_Processes'] == np) & (exp3_optimized['OpenMP_Threads'] == nt)] if not orig_row.empty and not opt_row.empty: configs.append(f'{np}x{nt}') orig_times.append(orig_row.iloc[0]['Time_ms']) opt_times.append(opt_row.iloc[0]['Time_ms']) x = list(range(len(configs))) width = 0.35 ax1.bar([i - width/2 for i in x], orig_times, width, label='Original', color='coral', alpha=0.7) ax1.bar([i + width/2 for i in x], opt_times, width, label='Optimized', color='steelblue', alpha=0.7) ax1.set_xticks(x) ax1.set_xticklabels(configs) ax1.set_ylabel('Execution Time (ms)') ax1.set_title(f'Execution Time Comparison ({size}x{size})') ax1.legend() ax1.grid(True, alpha=0.3, axis='y') # Figure 2: Efficiency comparison before and after optimization ax2 = axes[0, 1] orig_effs = [] opt_effs = [] for np, nt in combinations: orig_row = exp3_original[(exp3_original['M'] == size) & (exp3_original['MPI_Processes'] == np) & (exp3_original['OpenMP_Threads'] == nt)] opt_row = exp3_optimized[(exp3_optimized['M'] == size) & (exp3_optimized['MPI_Processes'] == np) & (exp3_optimized['OpenMP_Threads'] == nt)] if not orig_row.empty and not opt_row.empty: orig_effs.append(orig_row.iloc[0]['Efficiency']) opt_effs.append(opt_row.iloc[0]['Efficiency']) x = list(range(len(configs))) ax2.plot(x, orig_effs, marker='o', linewidth=2, label='Original', color='coral') ax2.plot(x, opt_effs, marker='s', linewidth=2, label='Optimized', color='steelblue') ax2.set_xticks(x) ax2.set_xticklabels(configs) ax2.set_ylabel('Parallel Efficiency') ax2.set_title(f'Efficiency Comparison ({size}x{size})') ax2.axhline(y=1.0, color='red', linestyle='--', linewidth=1, alpha=0.5, label='Ideal') ax2.legend() ax2.grid(True, alpha=0.3) # Figure 3: Performance improvement for different matrix sizes ax3 = axes[1, 0] matrix_sizes_for_plot = [512, 1024, 2048, 4096] speedups_by_config = {config: [] for config in combinations} for size in matrix_sizes_for_plot: for np, nt in combinations: orig_row = exp3_original[(exp3_original['M'] == size) & (exp3_original['MPI_Processes'] == np) & (exp3_original['OpenMP_Threads'] == nt)] opt_row = exp3_optimized[(exp3_optimized['M'] == size) & (exp3_optimized['MPI_Processes'] == np) & (exp3_optimized['OpenMP_Threads'] == nt)] if not orig_row.empty and not opt_row.empty: speedup = orig_row.iloc[0]['Time_ms'] / opt_row.iloc[0]['Time_ms'] speedups_by_config[(np, nt)].append(speedup) for i, (np, nt) in enumerate(combinations): if speedups_by_config[(np, nt)]: ax3.plot(matrix_sizes_for_plot, speedups_by_config[(np, nt)], marker='o', linewidth=2, label=f'{np}x{nt}') ax3.set_xlabel('Matrix Size') ax3.set_ylabel('Performance Improvement (x)') ax3.set_title('Optimization Effect for Different Matrix Sizes') ax3.axhline(y=1.0, color='gray', linestyle='--', linewidth=1, alpha=0.5) ax3.legend() ax3.grid(True, alpha=0.3) # Figure 4: Best configuration efficiency comparison ax4 = axes[1, 1] best_orig_effs = [] best_opt_effs = [] for size in matrix_sizes_for_plot: # Find best configuration best_orig_eff = 0 best_opt_eff = 0 for np, nt in combinations: orig_row = exp3_original[(exp3_original['M'] == size) & (exp3_original['MPI_Processes'] == np) & (exp3_original['OpenMP_Threads'] == nt)] opt_row = exp3_optimized[(exp3_optimized['M'] == size) & (exp3_optimized['MPI_Processes'] == np) & (exp3_optimized['OpenMP_Threads'] == nt)] if not orig_row.empty: best_orig_eff = max(best_orig_eff, orig_row.iloc[0]['Efficiency']) if not opt_row.empty: best_opt_eff = max(best_opt_eff, opt_row.iloc[0]['Efficiency']) best_orig_effs.append(best_orig_eff) best_opt_effs.append(best_opt_eff) x = list(range(len(matrix_sizes_for_plot))) width = 0.35 ax4.bar([i - width/2 for i in x], best_orig_effs, width, label='Original', color='coral', alpha=0.7) ax4.bar([i + width/2 for i in x], best_opt_effs, width, label='Optimized', color='steelblue', alpha=0.7) ax4.set_xticks(x) ax4.set_xticklabels([f'{s}x{s}' for s in matrix_sizes_for_plot]) ax4.set_ylabel('Best Parallel Efficiency') ax4.set_title('Best Configuration Efficiency Comparison') ax4.axhline(y=1.0, color='red', linestyle='--', linewidth=1, alpha=0.5, label='Ideal') ax4.legend() ax4.grid(True, alpha=0.3, axis='y') plt.tight_layout() plt.savefig('experiment3_analysis.png', dpi=300, bbox_inches='tight') print("\nFigure saved to: experiment3_analysis.png") return exp3_original, exp3_optimized def analyze_bottlenecks(df): """分析性能瓶颈""" print("\n" + "=" * 100) print("性能瓶颈分析") print("=" * 100) exp1_data = df[df['Experiment'] == 'Exp1'] exp2_data = df[df['Experiment'] == 'Exp2'] print("\n1. MPI扩展性分析") print("-" * 90) # 分析MPI进程数增加时的效率下降 for size in [512, 1024, 2048, 4096]: size_data = exp1_data[exp1_data['M'] == size].sort_values('MPI_Processes') if not size_data.empty: print(f"\n矩阵规模 {size}x{size}:") for _, row in size_data.iterrows(): np = row['MPI_Processes'] eff = row['Efficiency'] if np == 1: print(f" {np}进程: 效率={eff:.4f} (基准)") else: prev_data = size_data[size_data['MPI_Processes'] == np/2] if np % 2 == 1 else size_data[size_data['MPI_Processes'] == np-1] if not prev_data.empty and np > 1: prev_eff = prev_data.iloc[0]['Efficiency'] eff_change = (eff - prev_eff) / prev_eff * 100 print(f" {np}进程: 效率={eff:.4f} (变化: {eff_change:+.1f}%)") print("\n\n2. OpenMP线程数扩展性分析") print("-" * 90) # 分析OpenMP线程数增加时的效率 for size in [512, 1024, 2048, 4096]: print(f"\n矩阵规模 {size}x{size}:") size_data = exp2_data[exp2_data['M'] == size] for np in [1, 2, 3]: np_data = size_data[size_data['MPI_Processes'] == np] if not np_data.empty: print(f" MPI进程数={np}:") for _, row in np_data.sort_values('OpenMP_Threads').iterrows(): nt = row['OpenMP_Threads'] eff = row['Efficiency'] print(f" OpenMP线程数={nt}: 效率={eff:.4f}") print("\n\n3. 通信开销分析") print("-" * 90) print("MPI进程数增加时,通信开销增大,导致效率下降:") print(" - 进程间通信需要同步和等待") print(" - 数据分发和结果收集的开销") print(" - 负载不均衡导致的空闲等待") print("\n\n4. 内存带宽瓶颈") print("-" * 90) print("矩阵规模较小时,内存带宽成为瓶颈:") print(" - 计算时间短,通信时间占比高") print(" - 缓存利用率低") print(" - 内存访问模式不优化") print("\n\n5. 负载均衡问题") print("-" * 90) print("MPI进程数不能整除矩阵大小时:") print(" - 部分进程负载较重") print(" - 进程间等待时间增加") print(" - 整体效率下降") def main(): """主函数""" print("开始分析MPI+OpenMP混合并行矩阵乘法实验数据...\n") # 加载数据 df, serial_df = load_data() # 实验一分析 exp1_data = experiment1_analysis(df, serial_df) # 实验二分析 exp2_data = experiment2_analysis(df) # 实验三分析 exp3_orig, exp3_opt = experiment3_analysis(df) # 瓶颈分析 analyze_bottlenecks(df) print("\n" + "=" * 100) print("分析完成!所有图表已保存。") print("=" * 100) if __name__ == "__main__": main()