#include #include #include #include #define MAX_THREADS 64 int N; double **A, **B, **C_parallel, **C_serial; int nthreads; int rows_per_thread; double **allocate_matrix(int n) { double **mat = (double **)malloc(n * sizeof(double *)); for (int i = 0; i < n; i++) { mat[i] = (double *)malloc(n * sizeof(double)); } return mat; } void free_matrix(double **mat, int n) { for (int i = 0; i < n; i++) free(mat[i]); free(mat); } void init_matrix(double **mat, int n) { for (int i = 0; i < n; i++) for (int j = 0; j < n; j++) mat[i][j] = (double)(rand() % 100) / 10.0; } void *multiply_thread(void *arg) { int start_row = *(int *)arg; int end_row = start_row + rows_per_thread; if (end_row > N) end_row = N; for (int i = start_row; i < end_row; i++) { for (int j = 0; j < N; j++) { double sum = 0.0; for (int k = 0; k < N; k++) { sum += A[i][k] * B[k][j]; } C_parallel[i][j] = sum; } } return NULL; } void serial_multiply() { for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { double sum = 0.0; for (int k = 0; k < N; k++) { sum += A[i][k] * B[k][j]; } C_serial[i][j] = sum; } } } double get_time() { struct timeval tv; gettimeofday(&tv, NULL); return tv.tv_sec + tv.tv_usec / 1000000.0; } int verify() { for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { if (C_parallel[i][j] - C_serial[i][j] > 0.001 || C_serial[i][j] - C_parallel[i][j] > 0.001) { printf("Mismatch at [%d][%d]: parallel=%.6f, serial=%.6f\n", i, j, C_parallel[i][j], C_serial[i][j]); return 0; } } } return 1; } int main(int argc, char *argv[]) { if (argc != 3) { fprintf(stderr, "Usage: %s \n", argv[0]); return 1; } N = atoi(argv[1]); nthreads = atoi(argv[2]); if (nthreads > MAX_THREADS) nthreads = MAX_THREADS; if (nthreads > N) nthreads = N; rows_per_thread = N / nthreads; srand(42); A = allocate_matrix(N); B = allocate_matrix(N); C_parallel = allocate_matrix(N); C_serial = allocate_matrix(N); init_matrix(A, N); init_matrix(B, N); pthread_t threads[MAX_THREADS]; int starts[MAX_THREADS]; double t_start = get_time(); for (int i = 0; i < nthreads; i++) { starts[i] = i * rows_per_thread; pthread_create(&threads[i], NULL, multiply_thread, &starts[i]); } for (int i = 0; i < nthreads; i++) { pthread_join(threads[i], NULL); } double t_end = get_time(); double t_parallel = t_end - t_start; t_start = get_time(); serial_multiply(); t_end = get_time(); double t_serial = t_end - t_start; printf("Matrix size: %d x %d, Threads: %d\n", N, N, nthreads); printf("Parallel time: %.6f s\n", t_parallel); printf("Serial time: %.6f s\n", t_serial); printf("Speedup: %.4f\n", t_serial / t_parallel); printf("Efficiency: %.4f\n", t_serial / t_parallel / nthreads); if (verify()) { printf("Verification: SUCCESS\n"); } else { printf("Verification: FAILED\n"); } free_matrix(A, N); free_matrix(B, N); free_matrix(C_parallel, N); free_matrix(C_serial, N); return 0; }