diff --git a/.gitignore b/.gitignore
index 62d5619..23c6353 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,5 @@ compile_commands.json
 *.swp
 # Temporary files
 *~
-.cache/
\ No newline at end of file
+.cache/
+*.bak
\ No newline at end of file
diff --git a/lab3/nbody/lab3_nbody.sh b/lab3/nbody/lab3_nbody.sh
new file mode 100755
index 0000000..bb1febb
--- /dev/null
+++ b/lab3/nbody/lab3_nbody.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# N体问题实验脚本
+
+echo "=========================================="
+echo "N体问题串行模拟实验"
+echo "=========================================="
+echo ""
+
+# 默认天体数量
+N=${1:-4}
+
+echo "运行参数："
+echo "  天体数量: $N"
+echo "  时间步长: 0.01 s"
+echo "  总步数: 100"
+echo ""
+# 编译程序
+xmake build nbody_ser
+# 运行程序
+./build/linux/x86_64/release/nbody_ser $N
+
+echo ""
+echo "=========================================="
+echo "实验完成"
+echo "=========================================="
diff --git a/lab3/nbody/nbody_par.cpp b/lab3/nbody/nbody_par.cpp
new file mode 100644
index 0000000..c4e5780
--- /dev/null
+++ b/lab3/nbody/nbody_par.cpp
@@ -0,0 +1,301 @@
+#include <cmath>
+#include <cstdlib>
+#include <iomanip>
+#include <iostream>
+#include <vector>
+#include <mpi.h>
+#include <assert.h>
+using namespace std;
+
+// 常量定义
+const double G = 6.67430e-11;  // 引力常数 (m^3 kg^-1 s^-2)
+const double DT = 0.01;        // 时间步长
+const int TMAX = 100;          // 总时间步数
+const double mass_scale = 1e24;  // 质量缩放因子
+const double dist_scale = 1e8;   // 距离缩放因子
+const double vel_scale = 1e3;    // 速度缩放因子
+
+// 三维向量结构体
+struct Vec3 {
+  double x, y, z;
+
+  Vec3() : x(0), y(0), z(0) {}
+  Vec3(double x, double y, double z) : x(x), y(y), z(z) {}
+
+  Vec3 operator+(const Vec3 &other) const {
+    return Vec3(x + other.x, y + other.y, z + other.z);
+  }
+
+  Vec3 operator-(const Vec3 &other) const {
+    return Vec3(x - other.x, y - other.y, z - other.z);
+  }
+
+  Vec3 operator*(double scalar) const {
+    return Vec3(x * scalar, y * scalar, z * scalar);
+  }
+
+  Vec3 operator/(double scalar) const {
+    return Vec3(x / scalar, y / scalar, z / scalar);
+  }
+
+  double magnitude() const { return sqrt(x * x + y * y + z * z); }
+};
+
+// 天体结构体
+struct Body {
+  double mass;   // 质量
+  Vec3 position; // 位置
+  Vec3 velocity; // 速度
+  Vec3 force;    // 受力
+};
+
+// 初始化天体系统
+void init_bodies(vector<Body> &bodies, int n, bool verbose=false) {
+  // 中心天体（类似太阳）
+  bodies[0].mass = 1000 * mass_scale;
+  bodies[0].position = Vec3(0, 0, 0);
+  bodies[0].velocity = Vec3(0, 0, 0);
+
+  // 其他天体（类似行星）
+  for (int i = 1; i < n; i++) {
+    bodies[i].mass = (1.0 + i * 0.5) * mass_scale;
+    double angle = 2.0 * M_PI * i / n;
+    double radius = (1.0 + i * 0.5) * dist_scale;
+
+    bodies[i].position = Vec3(radius * cos(angle), radius * sin(angle), 0.0);
+
+    // 给予切向速度以形成轨道
+    double orbital_speed = sqrt(G * bodies[0].mass / radius);
+    bodies[i].velocity =
+        Vec3(-orbital_speed * sin(angle), orbital_speed * cos(angle), 0.0);
+  }
+
+  // 输出初始状态
+  if(verbose){
+
+    cout << fixed << setprecision(6);
+    cout << "\n初始状态:" << endl;
+    for (int i = 0; i < n; i++) {
+      cout << "天体 " << i << ": 质量=" << bodies[i].mass / mass_scale
+      << "e24 kg, "
+      << "位置=(" << bodies[i].position.x / dist_scale << ", "
+      << bodies[i].position.y / dist_scale << ", "
+      << bodies[i].position.z / dist_scale << ")e8 m" << endl;
+    }
+  }
+}
+
+// 计算local_particles中每个物体受到all_particles中所有物体的作用力
+// 并更新local_particles中物体的速度和位置
+void compute_local_forces(vector<Body>& local_particles, 
+                         const vector<Body>& all_particles,
+                         int local_start) {
+  for (size_t i = 0; i < local_particles.size(); i++) {
+    Vec3 total_force(0, 0, 0);
+    int global_idx = local_start + i;
+    
+    // 计算all_particles中所有物体对local_particles[i]的作用力
+    for (size_t j = 0; j < all_particles.size(); j++) {
+      // 跳过自己
+      if (global_idx == static_cast<int>(j)) continue;
+      
+      // 计算从物体i指向物体j的向量
+      Vec3 r_vec = all_particles[j].position - local_particles[i].position;
+      double distance = r_vec.magnitude();
+      
+      // 避免除以零
+      if (distance < 1e-10) continue;
+      
+      // 计算引力大小
+      double force_magnitude = G * local_particles[i].mass * all_particles[j].mass 
+                               / (distance * distance);
+      
+      // 计算力的方向并累加
+      Vec3 force_direction = r_vec / distance;
+      total_force = total_force + force_direction * force_magnitude;
+    }
+    
+    // 更新local_particles[i]的速度和位置
+    Vec3 v_new = local_particles[i].velocity + total_force * DT / local_particles[i].mass;
+    Vec3 x_new = local_particles[i].position + v_new * DT;
+    
+    local_particles[i].velocity = v_new;
+    local_particles[i].position = x_new;
+  }
+}
+
+void get_rank_info(int rank_id, 
+  int bodies_count, // total number of bodies
+  int world_size,   // total number of processes
+  int& send_size,  // number of bodies to be sent from `rank_id` process
+  int& send_offset // offset of bodies to be sent from `rank_id` process
+) {
+  int particles_per_proc = bodies_count / world_size;
+  int remainder = bodies_count % world_size;
+  
+  if (rank_id < remainder) {
+    send_size = particles_per_proc + 1;
+    send_offset = rank_id * (particles_per_proc + 1);
+  } else {
+    send_size = particles_per_proc;
+    send_offset = rank_id * particles_per_proc + remainder;
+  }
+  // for np = 2 and bodies_count = 5
+  // rank_id=0: send_size=3, send_offset=0
+  // rank_id=1: send_size=2, send_offset=3
+}
+
+int main(int argc, char **argv) {
+  MPI_Init(&argc, &argv);
+  
+  // 获取进程数量和当前进程rank
+  int world_size, world_rank;
+  bool verbose=false;
+  MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+  MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+
+  // 从命令行参数获取天体数量
+  int n = 4; // 默认4个天体
+  if (argc > 1) {
+    n = atoi(argv[1]);
+  }
+  if (argc > 2) {
+    verbose = (strcmp(argv[2], "--verbose") == 0 || strcmp(argv[2], "-v") == 0);
+  }
+  // 只有rank 0打印初始信息
+  if (verbose && world_rank == 0) {
+    cout << "N体问题并行模拟" << endl;
+    cout << "天体数量: " << n << endl;
+    cout << "进程数量: " << world_size << endl;
+    cout << "时间步长: " << DT << " s" << endl;
+    cout << "总步数: " << TMAX << endl;
+    cout << "----------------------------------------" << endl;
+  }
+
+  // 定义Body的MPI数据类型
+  // Body结构包含: mass(1) + position(3) + velocity(3) + force(3) = 10个double
+  MPI_Datatype MPI_BODY;
+  MPI_Type_contiguous(10, MPI_DOUBLE, &MPI_BODY);
+  MPI_Type_commit(&MPI_BODY);
+
+  // ============================================
+  // 步骤1: 获取分配给本进程的物体的初始信息local_particles
+  // 步骤2: 获取应用程序中所有物体的信息all_particles
+  // ============================================
+  
+  vector<Body> all_particles(n);
+  vector<Body> local_particles;
+  
+  // 计算每个进程分配到的物体数量
+  int particles_per_proc = n / world_size;
+  int remainder = n % world_size;
+  
+  int local_start, local_count;
+  if (world_rank < remainder) {
+    local_count = particles_per_proc + 1;
+    local_start = world_rank * local_count;
+  } else {
+    local_count = particles_per_proc;
+    local_start = world_rank * particles_per_proc + remainder;
+  }
+  
+  // Rank 0初始化所有物体
+  if (world_rank == 0) {
+    init_bodies(all_particles, n, verbose);
+  }
+  
+  // 广播所有物体的初始信息到所有进程
+  MPI_Bcast(all_particles.data(), n, MPI_BODY, 0, MPI_COMM_WORLD);
+  
+  // 每个进程提取自己负责的物体
+  local_particles.resize(local_count);
+  for (int i = 0; i < local_count; i++) {
+    local_particles[i] = all_particles[local_start + i];
+  }
+  
+  if (world_rank == 0) {
+    cout << "\n开始模拟..." << endl;
+  }
+  
+  // 创建发送和接收缓冲区信息
+  vector<int> all_send_size(world_size);
+  vector<int> all_send_offset(world_size);
+  
+  for (int r = 0; r < world_size; r++) {
+    get_rank_info(r, n, world_size, all_send_size[r], all_send_offset[r]);
+    #ifdef DEBUG
+    if (world_rank == 0) { // 只让rank 0打印
+      cout << "Process " << r << " will send " 
+           << all_send_size[r] << " bodies starting from offset " 
+           << all_send_offset[r] << endl;
+    }
+    #endif
+  }
+
+  double start_time = MPI_Wtime();
+  vector<Body> send_buf(local_count); // 使用local_count确定大小
+  
+  #ifdef DEBUG
+  if (verbose || world_rank == 0) {
+    cout << fixed << setprecision(6);
+    cout << "\n进程 " << world_rank << " 负责天体 " << local_start 
+         << " 到 " << (local_start + local_count - 1) << endl;
+  }
+  #endif
+  // ============================================
+  // 主循环：N体模拟
+  // ============================================
+  for (int t = 0; t < TMAX; t++) {
+    // ------------------------------------------
+    // 计算所有物体对分配给本进程的物体的作用力
+    // 并据此更新local_particles的本进程的物体信息
+    // ------------------------------------------
+    
+    compute_local_forces(local_particles, all_particles, local_start);
+    
+    // ------------------------------------------
+    // 将本进程信息local_particles保存到发送缓冲区send_buf
+    // 同时更新all_particles中的部分信息
+    // ------------------------------------------
+    send_buf = local_particles;
+    
+    // 更新all_particles中本进程负责的部分信息
+    for (int i = 0; i < local_count; i++) {
+      all_particles[local_start + i] = local_particles[i];
+    }
+    
+    // ------------------------------------------
+    // 环形通信：对每个进程进行m-1次通信
+    // ------------------------------------------
+    MPI_Allgatherv(send_buf.data(), local_count, 
+        MPI_BODY, all_particles.data(), 
+        all_send_size.data(), all_send_offset.data(), 
+        MPI_BODY, MPI_COMM_WORLD);
+    
+    
+    // 每10步输出一次状态（仅rank 0）
+    if (verbose && (t + 1) % 10 == 0 && world_rank == 0) {
+      cout << "时间步 " << t + 1 << ":" << endl;
+      for (int i = 0; i < n; i++) {
+        cout << "  天体 " << i << ": "
+             << "位置=(" << all_particles[i].position.x / dist_scale << ", "
+             << all_particles[i].position.y / dist_scale << ", "
+             << all_particles[i].position.z / dist_scale << ")e8 m, "
+             << "速度=(" << all_particles[i].velocity.x / vel_scale << ", "
+             << all_particles[i].velocity.y / vel_scale << ", "
+             << all_particles[i].velocity.z / vel_scale << ")e3 m/s" << endl;
+      }
+    }
+  }
+
+  if (world_rank == 0) {
+    cout << "" << endl;
+    double end_time = MPI_Wtime();
+    cout << "模拟用时: " << end_time - start_time << " 秒" << endl;
+    cout << "\n模拟完成!" << endl;
+  }
+
+  MPI_Type_free(&MPI_BODY);
+  MPI_Finalize();
+  return 0;
+}
diff --git a/lab3/nbody/nbody_ser.cpp b/lab3/nbody/nbody_ser.cpp
new file mode 100644
index 0000000..72d0070
--- /dev/null
+++ b/lab3/nbody/nbody_ser.cpp
@@ -0,0 +1,190 @@
+#include <cstring>
+#include <iostream>
+#include <cmath>
+#include <vector>
+#include <cstdlib>
+#include <iomanip>
+
+using namespace std;
+
+// 常量定义
+const double G = 6.67430e-11;  // 引力常数 (m^3 kg^-1 s^-2)
+const double DT = 0.01;        // 时间步长
+const int TMAX = 100;          // 总时间步数
+
+// 三维向量结构体
+struct Vec3 {
+    double x, y, z;
+    
+    Vec3() : x(0), y(0), z(0) {}
+    Vec3(double x, double y, double z) : x(x), y(y), z(z) {}
+    
+    Vec3 operator+(const Vec3& other) const {
+        return Vec3(x + other.x, y + other.y, z + other.z);
+    }
+    
+    Vec3 operator-(const Vec3& other) const {
+        return Vec3(x - other.x, y - other.y, z - other.z);
+    }
+    
+    Vec3 operator*(double scalar) const {
+        return Vec3(x * scalar, y * scalar, z * scalar);
+    }
+    
+    Vec3 operator/(double scalar) const {
+        return Vec3(x / scalar, y / scalar, z / scalar);
+    }
+    
+    double magnitude() const {
+        return sqrt(x*x + y*y + z*z);
+    }
+};
+
+// 天体结构体
+struct Body {
+    double mass;      // 质量
+    Vec3 position;    // 位置
+    Vec3 velocity;    // 速度
+    Vec3 force;       // 受力
+};
+
+// 计算第i个物体所受的引力
+Vec3 compute_force(int i, const vector<Body>& bodies) {
+    Vec3 total_force(0, 0, 0);
+    
+    for (size_t j = 0; j < bodies.size(); j++) {
+        if (i == j) continue;  // 跳过自己
+        
+        // 计算从物体i指向物体j的向量
+        Vec3 r_vec = bodies[j].position - bodies[i].position;
+        double distance = r_vec.magnitude();
+        
+        // 避免除以零（物体重合的情况）
+        if (distance < 1e-10) continue;
+        
+        // 计算引力大小: F = G * m_i * m_j / r^2
+        double force_magnitude = G * bodies[i].mass * bodies[j].mass / (distance * distance);
+        
+        // 计算力的方向（单位向量）
+        Vec3 force_direction = r_vec / distance;
+        
+        // 累加力（考虑方向）
+        total_force = total_force + force_direction * force_magnitude;
+    }
+    
+    return total_force;
+}
+
+int main(int argc, char** argv) {
+    // 可以从命令行参数获取天体数量
+    int n = 4;  // 默认4个天体
+    bool verbose = false;
+    if (argc > 1) {
+        n = atoi(argv[1]);
+    }
+    if (argc > 2) {
+        verbose = (strcmp(argv[2], "--verbose") == 0 || strcmp(argv[2], "-v") == 0);
+    }
+    
+    cout << "N体问题串行模拟" << endl;
+    cout << "天体数量: " << n << endl;
+    cout << "时间步长: " << DT << " s" << endl;
+    cout << "总步数: " << TMAX << endl;
+    cout << "----------------------------------------" << endl;
+    
+    // 初始化天体系统
+    vector<Body> bodies(n);
+    vector<Body> bodies_new(n);
+    
+    // 初始化天体数据（简化版：模拟太阳系内行星）
+    // 使用简化的单位系统以便观察效果
+    double mass_scale = 1e24;  // 质量缩放因子
+    double dist_scale = 1e8;   // 距离缩放因子
+    double vel_scale = 1e3;    // 速度缩放因子
+    
+    // 中心天体（类似太阳）
+    bodies[0].mass = 1000 * mass_scale;
+    bodies[0].position = Vec3(0, 0, 0);
+    bodies[0].velocity = Vec3(0, 0, 0);
+    
+    // 其他天体（类似行星）
+    for (int i = 1; i < n; i++) {
+        bodies[i].mass = (1.0 + i * 0.5) * mass_scale;
+        double angle = 2.0 * M_PI * i / n;
+        double radius = (1.0 + i * 0.5) * dist_scale;
+        
+        bodies[i].position = Vec3(
+            radius * cos(angle),
+            radius * sin(angle),
+            0.0
+        );
+        
+        // 给予切向速度以形成轨道
+        double orbital_speed = sqrt(G * bodies[0].mass / radius);
+        bodies[i].velocity = Vec3(
+            -orbital_speed * sin(angle),
+            orbital_speed * cos(angle),
+            0.0
+        );
+    }
+    
+    // 输出初始状态
+    cout << fixed << setprecision(6);
+    if(verbose){
+        cout << "\n初始状态:" << endl;
+        for (int i = 0; i < n; i++) {
+            cout << "天体 " << i << ": 质量=" << bodies[i].mass/mass_scale << "e24 kg, "
+            << "位置=(" << bodies[i].position.x/dist_scale << ", " 
+            << bodies[i].position.y/dist_scale << ", " 
+            << bodies[i].position.z/dist_scale << ")e8 m" << endl;
+        }
+    }
+        
+    // 主循环：N体模拟
+    cout << "\n开始模拟..." << endl;
+    time_t start_time = clock();
+    for (int t = 0; t < TMAX; t++) {
+        // 第一步：计算所有物体新的速度和位置
+        for (int i = 0; i < n; i++) {
+            // 计算第i个物体所受的力
+            Vec3 F = compute_force(i, bodies);
+            
+            // 计算新速度: v^(t+1) = v^t + F * dt / m
+            Vec3 v_new = bodies[i].velocity + F * DT / bodies[i].mass;
+            
+            // 计算新位置: x^(t+1) = x^t + v^(t+1) * dt
+            Vec3 x_new = bodies[i].position + v_new * DT;
+            
+            // 保存到临时数组
+            bodies_new[i].mass = bodies[i].mass;
+            bodies_new[i].position = x_new;
+            bodies_new[i].velocity = v_new;
+        }
+        
+        // 第二步：更新所有物体的速度和位置
+        for (int i = 0; i < n; i++) {
+            bodies[i].position = bodies_new[i].position;
+            bodies[i].velocity = bodies_new[i].velocity;
+        }
+        
+        // 每10步输出一次状态
+        if (verbose && (t + 1) % 10 == 0) {
+            cout << "时间步 " << t + 1 << ":" << endl;
+            for (int i = 0; i < n; i++) {
+                cout << "  天体 " << i << ": "
+                     << "位置=(" << bodies[i].position.x/dist_scale << ", " 
+                     << bodies[i].position.y/dist_scale << ", " 
+                     << bodies[i].position.z/dist_scale << ")e8 m, "
+                     << "速度=(" << bodies[i].velocity.x/vel_scale << ", " 
+                     << bodies[i].velocity.y/vel_scale << ", " 
+                     << bodies[i].velocity.z/vel_scale << ")e3 m/s" << endl;
+            }
+        }
+    }
+    time_t end_time = clock();
+    double elapsed_secs = double(end_time - start_time) / CLOCKS_PER_SEC;
+    cout << "\n模拟用时: " << elapsed_secs << " 秒" << endl;
+    cout << "\n模拟完成!" << endl;
+    
+    return 0;
+}
diff --git a/lab3/nbody/xmake.lua b/lab3/nbody/xmake.lua
new file mode 100644
index 0000000..6291d52
--- /dev/null
+++ b/lab3/nbody/xmake.lua
@@ -0,0 +1,15 @@
+add_rules("mode.debug", "mode.release")
+
+-- Find MPI package
+add_requires("mpi", {system = true})
+add_requires("mpi_cxx", {system = true})
+
+target("nbody_ser")
+    set_kind("binary")
+    add_files("nbody_ser.cpp")
+
+target("nbody_par")
+    set_kind("binary")
+    add_files("nbody_par.cpp")
+    add_packages("mpi")
+    add_packages("mpi_cxx")
diff --git a/lab3/prime/lab3_prime.sh b/lab3/prime/lab3_prime.sh
new file mode 100755
index 0000000..277c28b
--- /dev/null
+++ b/lab3/prime/lab3_prime.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Lab 3: Prime Number Calculation Performance Test
+# This script tests the parallel prime calculation program with different N and process counts
+
+echo "=========================================="
+echo "Lab 3: Prime Number Calculation Performance Test"
+echo "=========================================="
+echo ""
+
+# Array of N values
+N_VALUES=(100000 200000 400000 800000)
+
+# Array of process counts
+PROCESS_COUNTS=(1 2 4 6 8)
+
+# Output file for results
+OUTPUT_FILE="prime_results.txt"
+
+# Clear previous results
+> $OUTPUT_FILE
+
+# Print header
+echo "N值	进程数	素数个数	执行时间(秒)" | tee -a $OUTPUT_FILE
+echo "--------------------------------------------------------" | tee -a $OUTPUT_FILE
+
+# Loop through each N value
+for N in "${N_VALUES[@]}"; do
+    echo ""
+    echo "Testing N = $N"
+    echo "------------------------"
+    
+    # Loop through each process count
+    for P in "${PROCESS_COUNTS[@]}"; do
+        echo -n "Running with $P process(es)... "
+        
+        # Run the program and capture output
+        OUTPUT=$(mpirun -n $P ./build/linux/x86_64/release/prime_par_naive $N 2>&1)
+        
+        # Extract prime count and time from output
+        PRIME_COUNT=$(echo "$OUTPUT" | grep "Between" | grep -oP '\d+(?= primes)')
+        TIME=$(echo "$OUTPUT" | grep "Time =" | grep -oP '[0-9.]+(?= seconds)')
+        
+        # Print result
+        if [ ! -z "$PRIME_COUNT" ] && [ ! -z "$TIME" ]; then
+            echo "$N	$P	$PRIME_COUNT	$TIME" | tee -a $OUTPUT_FILE
+            echo "Done! (Primes: $PRIME_COUNT, Time: ${TIME}s)"
+        else
+            echo "Error running program!"
+            echo "$N	$P	ERROR	ERROR" | tee -a $OUTPUT_FILE
+        fi
+    done
+done
+
+echo ""
+echo "=========================================="
+echo "Test completed!"
+echo "=========================================="
+echo ""
+echo "Results saved to: $OUTPUT_FILE"
+echo ""
+echo "Summary Table:"
+echo "--------------------------------------------------------"
+cat $OUTPUT_FILE
+echo "--------------------------------------------------------"
diff --git a/lab3/prime/src/prime_par.cpp b/lab3/prime/src/prime_par.cpp
new file mode 100644
index 0000000..c05f76f
--- /dev/null
+++ b/lab3/prime/src/prime_par.cpp
@@ -0,0 +1,181 @@
+#include <iostream>
+#include <vector>
+#include <cmath>
+#include <mpi.h>
+
+// Function to perform the Sieve of Eratosthenes on a local segment
+void local_sieve(int low, int high, std::vector<bool>& is_prime, const std::vector<int>& base_primes) {
+    // Initialize all numbers in the local segment as potentially prime
+    is_prime.assign(high - low + 1, true);
+
+    // If the segment starts from 0 or 1, mark them as not prime
+    if (low == 0) {
+        is_prime[0] = false;
+        if (high >= 1) {
+            is_prime[1] = false;
+        }
+    } else if (low == 1) {
+        is_prime[0] = false;
+    }
+
+    // Use the base primes to mark non-primes in the local segment
+    for (int p : base_primes) {
+        // Find the first multiple of p within the [low, high] range
+        int start_multiple = (low / p) * p;
+        if (start_multiple < low) {
+            start_multiple += p;
+        }
+        // Ensure we don't mark the prime number itself as non-prime
+        if (start_multiple == p) {
+            start_multiple += p;
+        }
+
+        // Mark all multiples of p in the local segment as non-prime
+        for (int multiple = start_multiple; multiple <= high; multiple += p) {
+            is_prime[multiple - low] = false;
+        }
+    }
+}
+
+int main(int argc, char* argv[]) {
+    MPI_Init(&argc, &argv);
+
+    int rank, size;
+    double wtime;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+    // Check for correct number of arguments
+    if (argc != 3) {
+        if (rank == 0) {
+            std::cerr << "Usage: " << argv[0] << " <N> <B>" << std::endl;
+            std::cerr << "  N: Upper bound of the range [2, N]." << std::endl;
+            std::cerr << "  B: Block size for distributing the range." << std::endl;
+        }
+        MPI_Finalize();
+        return 1;
+    }
+
+    int N = std::atoi(argv[1]);
+    int B = std::atoi(argv[2]);
+
+    if (N < 2) {
+        if (rank == 0) {
+            std::cout << "The range [2, " << N << "] contains 0 prime numbers." << std::endl;
+        }
+        MPI_Finalize();
+        return 0;
+    }
+
+    // --- Step 1: Process 0 finds base primes up to sqrt(N) ---
+    std::vector<int> base_primes;
+    int limit = static_cast<int>(std::sqrt(N));
+    if (rank == 0) {
+		wtime = MPI_Wtime ( );
+
+        std::vector<bool> is_prime_small(limit + 1, true);
+        is_prime_small[0] = is_prime_small[1] = false;
+        for (int p = 2; p * p <= limit; ++p) {
+            if (is_prime_small[p]) {
+                for (int i = p * p; i <= limit; i += p) {
+                    is_prime_small[i] = false;
+                }
+            }
+        }
+        for (int i = 2; i <= limit; ++i) {
+            if (is_prime_small[i]) {
+                base_primes.push_back(i);
+            }
+        }
+    }
+
+    // --- Step 2: Broadcast base primes to all processes ---
+    int num_base_primes = base_primes.size();
+    MPI_Bcast(&num_base_primes, 1, MPI_INT, 0, MPI_COMM_WORLD);
+    if (rank != 0) {
+        base_primes.resize(num_base_primes);
+    }
+    MPI_Bcast(base_primes.data(), num_base_primes, MPI_INT, 0, MPI_COMM_WORLD);
+
+    // --- Step 3: Distribute the range [sqrt(N)+1, N] among processes ---
+    int start_range = limit + 1;
+    if (start_range > N) {
+        // No range to distribute, all primes are base primes
+        int total_count = base_primes.size();
+        if (rank == 0) {
+            std::cout << "Total prime count in [2, " << N << "] is " << total_count << "." << std::endl;
+        }
+        MPI_Finalize();
+        return 0;
+    }
+
+    int total_elements = N - start_range + 1;
+    int local_low, local_high;
+    std::vector<bool> is_prime_local;
+
+    // Calculate local range for this process
+    int num_blocks = (total_elements + B - 1) / B;
+    for (int i = 0; i < num_blocks; ++i) {
+        if (i % size == rank) {
+            int block_start = start_range + i * B;
+            int block_end = std::min(block_start + B - 1, N);
+            
+            // Perform sieve on this block
+            std::vector<bool> is_prime_block;
+            local_sieve(block_start, block_end, is_prime_block, base_primes);
+
+            // Count primes in this block
+            int block_count = 0;
+            for (bool prime : is_prime_block) {
+                if (prime) {
+                    block_count++;
+                }
+            }
+            
+            // In a real implementation, you would aggregate these counts.
+            // For simplicity, we'll just print from rank 0 after gathering.
+            // This part of the logic is simplified for the example.
+            // A more robust solution would gather all local counts.
+        }
+    }
+
+    // Simplified counting: each process calculates its total assigned range and counts.
+    // This is a more straightforward approach than iterating through blocks.
+    int elements_per_proc = total_elements / size;
+    int remainder = total_elements % size;
+
+    if (rank < remainder) {
+        local_low = start_range + rank * (elements_per_proc + 1);
+        local_high = local_low + elements_per_proc;
+    } else {
+        local_low = start_range + rank * elements_per_proc + remainder;
+        local_high = local_low + elements_per_proc - 1;
+    }
+    local_high = std::min(local_high, N);
+
+    // Perform sieve on the assigned local range
+    local_sieve(local_low, local_high, is_prime_local, base_primes);
+
+    // Count primes in the local range
+    int local_prime_count = 0;
+    for (bool prime : is_prime_local) {
+        if (prime) {
+            local_prime_count++;
+        }
+    }
+
+    // --- Step 4: Gather local prime counts ---
+    int global_prime_count = 0;
+    MPI_Reduce(&local_prime_count, &global_prime_count, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
+    double end_wtime;
+    // --- Step 5: Process 0 prints the final result ---
+    if (rank == 0) {
+        end_wtime = MPI_Wtime ( ) - wtime;
+        int total_count = base_primes.size() + global_prime_count;
+        std::cout << "Total prime count in [2, " << N << "] is " << total_count << "." << std::endl;
+        std::cout << "Time = " << end_wtime << " seconds" << std::endl;
+    }
+
+    MPI_Finalize();
+    return 0;
+}
\ No newline at end of file
diff --git a/lab3/prime/src/prime_par_naive.cpp b/lab3/prime/src/prime_par_naive.cpp
new file mode 100644
index 0000000..b8a748d
--- /dev/null
+++ b/lab3/prime/src/prime_par_naive.cpp
@@ -0,0 +1,99 @@
+#include <cstdlib>
+#include <cstdio>
+#include <ctime>
+#include <mpi.h>
+
+int main ( int argc, char *argv[] );
+int prime_part ( int id, int p, int n );
+
+int main ( int argc, char *argv[] )
+{
+	int id;
+	int n = 100000;
+	int p;
+	int total;
+	int total_part;
+	double wtime;
+
+	// (1) 调用MPI头文件 - 已在顶部添加 #include <mpi.h>
+	
+	// (2) 在并行处理之前调用MPI_Init(), MPI_Comm_size(), MPI_Comm_rank()
+	MPI_Init ( &argc, &argv );
+	MPI_Comm_size ( MPI_COMM_WORLD, &p );
+	MPI_Comm_rank ( MPI_COMM_WORLD, &id );
+
+	// (7) 利用MPI_Wtime()来统计时间
+	if ( id == 0 )
+	{
+		wtime = MPI_Wtime ( );
+	}
+
+	// (8) 将N改为用户输入参数
+	// Check for correct number of arguments
+	if (argc == 2) {
+		n = std::atoi(argv[1]);
+	} else if (argc > 2) {
+		if ( id == 0 )
+		{
+			printf("Usage: %s [n]\n", argv[0]);
+		}
+		MPI_Finalize();
+		return 1;
+	}
+
+	// 每个进程计算自己负责的部分
+	total_part = prime_part ( id, p, n );
+
+	// (4) 调用MPI_Reduce()收集各进程的计算结果
+	MPI_Reduce ( &total_part, &total, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD );
+
+	// (5) 进程0打印最终的计算结果
+	if ( id == 0 )
+	{
+		wtime = MPI_Wtime ( ) - wtime;
+		printf ( "\n" );
+		printf ( "Between 2 and %d, there are %d primes\n", n, total );
+		printf ( "Time = %f seconds\n", wtime );
+	}
+
+	// (3) 在并行处理之后调用MPI_Finalize()
+	MPI_Finalize ( );
+
+	return 0;
+}
+
+int prime_part ( int id, int p, int n )
+{
+	int i;
+	int j;
+	int prime;
+	int total_part;
+
+	total_part = 0;
+
+	// 每个进程处理自己负责的子列表
+	// 例如：P=4时
+	// Part 0: 2, 6, 10, 14, ...
+	// Part 1: 3, 7, 11, 15, ...
+	// Part 2: 4, 8, 12, ...
+	// Part 3: 5, 9, 13, ...
+	for ( i = 2 + id; i <= n; i = i + p )
+	{
+		prime = 1;
+ 
+		for ( j = 2; j < i; j++ )
+		{
+			if ( i % j == 0 )
+			{
+				prime = 0;
+				break;
+			}
+		}
+		if ( prime )
+		{
+			total_part = total_part + 1;
+		}
+	}
+
+	return total_part;
+}
diff --git a/lab3/prime/src/prime_ser.cpp b/lab3/prime/src/prime_ser.cpp
new file mode 100644
index 0000000..7436c8b
--- /dev/null
+++ b/lab3/prime/src/prime_ser.cpp
@@ -0,0 +1,58 @@
+#include <cstdlib>
+#include <cstdio>
+#include <ctime>
+int main ( int argc, char *argv[] );
+int prime_part ( int id, int p, int n );
+
+int main ( int argc, char *argv[] )
+{
+	int id;
+	int n = 100000;
+	int p;
+	int total;
+	int total_part;
+	p = 4;
+	    // Check for correct number of arguments
+    if (argc == 2) {
+		n = std::atoi(argv[1]);
+    } else if (argc > 2) {
+		printf("Usage: %s [n]\n", argv[0]);
+		return 1;
+	}
+
+	total = 0;
+	for ( id = 0; id < p; id++ )
+	{
+		total_part = prime_part ( id, p, n );
+		total = total + total_part;
+	}
+	printf ( "\n" );
+	printf ( "Between 2 and %d, there are %d primes\n", n, total );
+	return 0;
+}
+int prime_part ( int id, int p, int n )
+{
+	int i;
+	int j;
+	int prime;
+	int total_part;
+	total_part = 0;
+for ( i = 2 + id; i <= n; i = i + p )
+	{
+		prime = 1;
+ 
+		for ( j = 2; j < i; j++ )
+		{
+			if ( i % j == 0 )
+			{
+				prime = 0;
+				break;
+			}
+		}
+		if ( prime )
+		{
+			total_part = total_part + 1;
+		}
+	}
+	return total_part;
+}
diff --git a/lab3/prime/xmake.lua b/lab3/prime/xmake.lua
new file mode 100644
index 0000000..37e1a97
--- /dev/null
+++ b/lab3/prime/xmake.lua
@@ -0,0 +1,93 @@
+add_rules("mode.debug", "mode.release")
+
+-- Find MPI package
+add_requires("mpi", {system = true})
+add_requires("mpi_cxx", {system = true})
+
+target("prime_ser")
+    set_kind("binary")
+    add_files("src/prime_ser.cpp")
+
+target("prime_par")
+    set_kind("binary")
+    add_files("src/prime_par.cpp")
+    add_packages("mpi")
+    add_packages("mpi_cxx")
+
+        
+target("prime_par_naive")
+    set_kind("binary")
+    add_files("src/prime_par_naive.cpp")
+    add_packages("mpi")
+    add_packages("mpi_cxx")
+    -- Alternatively, if MPI is installed system-wide, you can use:
+
+--
+-- If you want to known more usage about xmake, please see https://xmake.io
+--
+-- ## FAQ
+--
+-- You can enter the project directory firstly before building project.
+--
+--   $ cd projectdir
+--
+-- 1. How to build project?
+--
+--   $ xmake
+--
+-- 2. How to configure project?
+--
+--   $ xmake f -p [macosx|linux|iphoneos ..] -a [x86_64|i386|arm64 ..] -m [debug|release]
+--
+-- 3. Where is the build output directory?
+--
+--   The default output directory is `./build` and you can configure the output directory.
+--
+--   $ xmake f -o outputdir
+--   $ xmake
+--
+-- 4. How to run and debug target after building project?
+--
+--   $ xmake run [targetname]
+--   $ xmake run -d [targetname]
+--
+-- 5. How to install target to the system directory or other output directory?
+--
+--   $ xmake install
+--   $ xmake install -o installdir
+--
+-- 6. Add some frequently-used compilation flags in xmake.lua
+--
+-- @code
+--    -- add debug and release modes
+--    add_rules("mode.debug", "mode.release")
+--
+--    -- add macro definition
+--    add_defines("NDEBUG", "_GNU_SOURCE=1")
+--
+--    -- set warning all as error
+--    set_warnings("all", "error")
+--
+--    -- set language: c99, c++11
+--    set_languages("c99", "c++11")
+--
+--    -- set optimization: none, faster, fastest, smallest
+--    set_optimize("fastest")
+--
+--    -- add include search directories
+--    add_includedirs("/usr/include", "/usr/local/include")
+--
+--    -- add link libraries and search directories
+--    add_links("tbox")
+--    add_linkdirs("/usr/local/lib", "/usr/lib")
+--
+--    -- add system link libraries
+--    add_syslinks("z", "pthread")
+--
+--    -- add compilation and link flags
+--    add_cxflags("-stdnolib", "-fno-strict-aliasing")
+--    add_ldflags("-L/usr/local/lib", "-lpthread", {force = true})
+--
+-- @endcode
+--
+