342 lines
12 KiB
Python
Executable File
342 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Lab4 CUDA 实验数据可视化脚本
|
|
用于生成实验报告所需的图表
|
|
"""
|
|
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
import os
|
|
from pathlib import Path
|
|
|
|
# 设置中文字体支持
|
|
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
|
|
plt.rcParams['axes.unicode_minus'] = False
|
|
|
|
# 创建输出目录
|
|
OUTPUT_DIR = Path("experiment_data/figures")
|
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
def parse_vectoradd_data(filename):
|
|
"""解析向量加法实验数据"""
|
|
data = {'sizes': [], 'times': []}
|
|
with open(filename, 'r') as f:
|
|
for line in f:
|
|
if 'N=' in line and 'Time=' in line:
|
|
parts = line.split(',')
|
|
n = int(parts[0].split('=')[1].strip())
|
|
time = float(parts[1].split('=')[1].split()[0])
|
|
data['sizes'].append(n)
|
|
data['times'].append(time)
|
|
return data
|
|
|
|
|
|
def parse_matrixmul_cpu_data(filename):
|
|
"""解析 CPU 矩阵乘法数据"""
|
|
data = {8: [], 64: [], 256: []}
|
|
sizes = []
|
|
|
|
with open(filename, 'r') as f:
|
|
lines = f.readlines()
|
|
for i, line in enumerate(lines):
|
|
if 'x' in line and len(line.split()) >= 5:
|
|
parts = line.split()
|
|
try:
|
|
size = int(parts[0].split('x')[0])
|
|
threads = int(parts[1])
|
|
time = float(parts[2])
|
|
gflops = float(parts[3])
|
|
speedup = float(parts[4])
|
|
|
|
if size not in sizes:
|
|
sizes.append(size)
|
|
|
|
if threads in data:
|
|
data[threads].append({
|
|
'size': size,
|
|
'time': time,
|
|
'gflops': gflops,
|
|
'speedup': speedup
|
|
})
|
|
except (ValueError, IndexError):
|
|
continue
|
|
|
|
return data, sizes
|
|
|
|
|
|
def parse_cuda_kernel_data(filename, kernel_name):
|
|
"""解析 CUDA Kernel 数据"""
|
|
data = {'sizes': [], 'times': [], 'gflops': []}
|
|
|
|
with open(filename, 'r') as f:
|
|
in_kernel_section = False
|
|
for line in f:
|
|
if kernel_name in line:
|
|
in_kernel_section = True
|
|
continue
|
|
if in_kernel_section and '----' in line:
|
|
break
|
|
if in_kernel_section and 'x' in line:
|
|
parts = line.split()
|
|
try:
|
|
size_str = parts[0]
|
|
size = int(size_str.split('x')[0])
|
|
time = float(parts[1])
|
|
gflops = float(parts[3])
|
|
data['sizes'].append(size)
|
|
data['times'].append(time)
|
|
data['gflops'].append(gflops)
|
|
except (ValueError, IndexError):
|
|
continue
|
|
|
|
return data
|
|
|
|
|
|
def parse_blocksize_data(filename):
|
|
"""解析 BLOCK_SIZE 实验数据"""
|
|
data = {4: {}, 8: {}, 16: {}, 32: {}}
|
|
|
|
with open(filename, 'r') as f:
|
|
for line in f:
|
|
if 'x' in line and len(line.split()) >= 4:
|
|
parts = line.split()
|
|
try:
|
|
size_str = parts[0]
|
|
size = int(size_str.split('x')[0])
|
|
block_str = parts[1]
|
|
block = int(block_str.split('x')[0])
|
|
time = float(parts[2])
|
|
gflops = float(parts[3])
|
|
|
|
if block in data:
|
|
data[block][size] = {
|
|
'time': time,
|
|
'gflops': gflops
|
|
}
|
|
except (ValueError, IndexError):
|
|
continue
|
|
|
|
return data
|
|
|
|
|
|
def plot_vectoradd_performance(data):
|
|
"""绘制向量加法性能图"""
|
|
fig, ax = plt.subplots(figsize=(10, 6))
|
|
|
|
sizes = np.array(data['sizes'])
|
|
times = np.array(data['times'])
|
|
|
|
ax.plot(sizes, times, 'o-', linewidth=2, markersize=8, label='执行时间')
|
|
ax.set_xlabel('数据规模 N', fontsize=12)
|
|
ax.set_ylabel('执行时间 (ms)', fontsize=12)
|
|
ax.set_title('向量加法性能测试 - 数据规模 vs 执行时间', fontsize=14)
|
|
ax.grid(True, alpha=0.3)
|
|
ax.legend(fontsize=11)
|
|
|
|
plt.tight_layout()
|
|
plt.savefig(OUTPUT_DIR / 'vectoradd_performance.png', dpi=300)
|
|
print(f"✓ 生成图表: vectoradd_performance.png")
|
|
plt.close()
|
|
|
|
|
|
def plot_cpu_vs_gpu(cpu_data, cuda1_data, cuda2_data, sizes):
|
|
"""绘制 CPU vs GPU 性能对比"""
|
|
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
|
|
|
|
threads_list = [8, 64, 256]
|
|
|
|
# 子图1: 运行时间对比
|
|
ax = axes[0, 0]
|
|
x = np.arange(len(sizes))
|
|
width = 0.15
|
|
|
|
for i, threads in enumerate(threads_list):
|
|
times = [item['time'] for item in cpu_data[threads]]
|
|
ax.bar(x + i * width, times, width, label=f'CPU {threads}线程')
|
|
|
|
cuda1_times = cuda1_data['times']
|
|
cuda2_times = cuda2_data['times']
|
|
|
|
ax.bar(x + 3 * width, cuda1_times, width, label='CUDA Kernel1')
|
|
ax.bar(x + 4 * width, cuda2_times, width, label='CUDA Kernel2')
|
|
|
|
ax.set_xlabel('矩阵规模', fontsize=11)
|
|
ax.set_ylabel('运行时间 (s)', fontsize=11)
|
|
ax.set_title('运行时间对比', fontsize=13)
|
|
ax.set_xticks(x + 2 * width)
|
|
ax.set_xticklabels([f'{s}x{s}' for s in sizes])
|
|
ax.legend(fontsize=9)
|
|
ax.grid(True, alpha=0.3, axis='y')
|
|
|
|
# 子图2: GFLOPS 对比
|
|
ax = axes[0, 1]
|
|
for i, threads in enumerate(threads_list):
|
|
gflops = [item['gflops'] for item in cpu_data[threads]]
|
|
ax.bar(x + i * width, gflops, width, label=f'CPU {threads}线程')
|
|
|
|
cuda1_gflops = cuda1_data['gflops']
|
|
cuda2_gflops = cuda2_data['gflops']
|
|
|
|
ax.bar(x + 3 * width, cuda1_gflops, width, label='CUDA Kernel1')
|
|
ax.bar(x + 4 * width, cuda2_gflops, width, label='CUDA Kernel2')
|
|
|
|
ax.set_xlabel('矩阵规模', fontsize=11)
|
|
ax.set_ylabel('GFLOPS', fontsize=11)
|
|
ax.set_title('计算性能对比 (GFLOPS)', fontsize=13)
|
|
ax.set_xticks(x + 2 * width)
|
|
ax.set_xticklabels([f'{s}x{s}' for s in sizes])
|
|
ax.legend(fontsize=9)
|
|
ax.grid(True, alpha=0.3, axis='y')
|
|
|
|
# 子图3: 加速比 (相对于单线程CPU)
|
|
ax = axes[1, 0]
|
|
baseline_times = [item['time'] for item in cpu_data[8]] # 使用8线程作为基准
|
|
|
|
for i, threads in enumerate(threads_list):
|
|
speedups = [item['speedup'] for item in cpu_data[threads]]
|
|
ax.plot(sizes, speedups, 'o-', linewidth=2, markersize=8, label=f'CPU {threads}线程')
|
|
|
|
# 计算 CUDA 加速比
|
|
cuda1_speedups = [baseline_times[i] / cuda1_times[i] for i in range(len(sizes))]
|
|
cuda2_speedups = [baseline_times[i] / cuda2_times[i] for i in range(len(sizes))]
|
|
|
|
ax.plot(sizes, cuda1_speedups, 's-', linewidth=2, markersize=8, label='CUDA Kernel1')
|
|
ax.plot(sizes, cuda2_speedups, '^-', linewidth=2, markersize=8, label='CUDA Kernel2')
|
|
|
|
ax.set_xlabel('矩阵规模', fontsize=11)
|
|
ax.set_ylabel('加速比', fontsize=11)
|
|
ax.set_title('加速比对比 (相对于8线程CPU)', fontsize=13)
|
|
ax.legend(fontsize=9)
|
|
ax.grid(True, alpha=0.3)
|
|
|
|
# 子图4: GPU 优化效果
|
|
ax = axes[1, 1]
|
|
improvement = [(cuda1_times[i] / cuda2_times[i]) for i in range(len(sizes))]
|
|
ax.bar(range(len(sizes)), improvement, color='steelblue', alpha=0.7)
|
|
ax.set_xlabel('矩阵规模', fontsize=11)
|
|
ax.set_ylabel('性能提升倍数', fontsize=11)
|
|
ax.set_title('Kernel2 相对于 Kernel1 的性能提升', fontsize=13)
|
|
ax.set_xticks(range(len(sizes)))
|
|
ax.set_xticklabels([f'{s}x{s}' for s in sizes])
|
|
ax.grid(True, alpha=0.3, axis='y')
|
|
|
|
plt.tight_layout()
|
|
plt.savefig(OUTPUT_DIR / 'cpu_vs_gpu_comparison.png', dpi=300)
|
|
print(f"✓ 生成图表: cpu_vs_gpu_comparison.png")
|
|
plt.close()
|
|
|
|
|
|
def plot_blocksize_analysis(data):
|
|
"""绘制 BLOCK_SIZE 性能分析图"""
|
|
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
|
|
|
|
block_sizes = [4, 8, 16, 32]
|
|
matrix_sizes = sorted(list(next(iter(data.values())).keys()))
|
|
|
|
# 子图1: 运行时间
|
|
ax = axes[0]
|
|
x = np.arange(len(matrix_sizes))
|
|
width = 0.2
|
|
|
|
for i, block_size in enumerate(block_sizes):
|
|
times = [data[block_size][size]['time'] for size in matrix_sizes]
|
|
ax.bar(x + i * width, times, width, label=f'BLOCK={block_size}')
|
|
|
|
ax.set_xlabel('矩阵规模', fontsize=12)
|
|
ax.set_ylabel('运行时间 (ms)', fontsize=12)
|
|
ax.set_title('不同 BLOCK_SIZE 的运行时间对比', fontsize=13)
|
|
ax.set_xticks(x + 1.5 * width)
|
|
ax.set_xticklabels([f'{s}x{s}' for s in matrix_sizes])
|
|
ax.legend(fontsize=10)
|
|
ax.grid(True, alpha=0.3, axis='y')
|
|
|
|
# 子图2: GFLOPS
|
|
ax = axes[1]
|
|
for i, block_size in enumerate(block_sizes):
|
|
gflops = [data[block_size][size]['gflops'] for size in matrix_sizes]
|
|
ax.plot(matrix_sizes, gflops, 'o-', linewidth=2, markersize=8, label=f'BLOCK={block_size}')
|
|
|
|
ax.set_xlabel('矩阵规模', fontsize=12)
|
|
ax.set_ylabel('GFLOPS', fontsize=12)
|
|
ax.set_title('不同 BLOCK_SIZE 的计算性能对比', fontsize=13)
|
|
ax.legend(fontsize=10)
|
|
ax.grid(True, alpha=0.3)
|
|
|
|
plt.tight_layout()
|
|
plt.savefig(OUTPUT_DIR / 'blocksize_analysis.png', dpi=300)
|
|
print(f"✓ 生成图表: blocksize_analysis.png")
|
|
plt.close()
|
|
|
|
|
|
def main():
|
|
print("=" * 60)
|
|
print("Lab4 CUDA 实验数据可视化")
|
|
print("=" * 60)
|
|
print()
|
|
|
|
data_dir = Path("experiment_data")
|
|
|
|
# 检查数据文件是否存在
|
|
if not data_dir.exists():
|
|
print("❌ 错误: experiment_data 目录不存在")
|
|
print(" 请先运行 ./lab4.sh 收集实验数据")
|
|
return
|
|
|
|
# 绘制向量加法性能图
|
|
vectoradd_file = data_dir / "vectoradd_results.txt"
|
|
if vectoradd_file.exists():
|
|
print("1. 绘制向量加法性能图...")
|
|
try:
|
|
data = parse_vectoradd_data(vectoradd_file)
|
|
if data['sizes']:
|
|
plot_vectoradd_performance(data)
|
|
else:
|
|
print(" ⚠ 警告: 无法解析向量加法数据")
|
|
except Exception as e:
|
|
print(f" ❌ 错误: {e}")
|
|
else:
|
|
print("⚠ 跳过: vectoradd_results.txt 不存在")
|
|
|
|
# 绘制 CPU vs GPU 对比图
|
|
matrixmul_file = data_dir / "matrixmul_comparison.txt"
|
|
if matrixmul_file.exists():
|
|
print("2. 绘制 CPU vs GPU 性能对比图...")
|
|
try:
|
|
cpu_data, sizes = parse_matrixmul_cpu_data(matrixmul_file)
|
|
cuda1_data = parse_cuda_kernel_data(matrixmul_file, "Kernel1")
|
|
cuda2_data = parse_cuda_kernel_data(matrixmul_file, "Kernel2")
|
|
|
|
if cpu_data and cuda1_data['sizes'] and cuda2_data['sizes']:
|
|
plot_cpu_vs_gpu(cpu_data, cuda1_data, cuda2_data, sizes)
|
|
else:
|
|
print(" ⚠ 警告: 无法解析矩阵乘法数据")
|
|
except Exception as e:
|
|
print(f" ❌ 错误: {e}")
|
|
else:
|
|
print("⚠ 跳过: matrixmul_comparison.txt 不存在")
|
|
|
|
# 绘制 BLOCK_SIZE 分析图
|
|
blocksize_file = data_dir / "blocksize_analysis.txt"
|
|
if blocksize_file.exists():
|
|
print("3. 绘制 BLOCK_SIZE 性能分析图...")
|
|
try:
|
|
data = parse_blocksize_data(blocksize_file)
|
|
if data:
|
|
plot_blocksize_analysis(data)
|
|
else:
|
|
print(" ⚠ 警告: 无法解析 BLOCK_SIZE 数据")
|
|
except Exception as e:
|
|
print(f" ❌ 错误: {e}")
|
|
else:
|
|
print("⚠ 跳过: blocksize_analysis.txt 不存在")
|
|
|
|
print()
|
|
print("=" * 60)
|
|
print(f"✓ 所有图表已保存到: {OUTPUT_DIR}/")
|
|
print("=" * 60)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|