Files
C-exp-collection/AI-work/matmult.c

152 lines
3.4 KiB
C
Raw Normal View History

2026-05-16 12:08:49 +08:00
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <sys/time.h>
#define MAX_THREADS 64
int N;
double **A, **B, **C_parallel, **C_serial;
int nthreads;
int rows_per_thread;
double **allocate_matrix(int n)
{
double **mat = (double **)malloc(n * sizeof(double *));
for (int i = 0; i < n; i++) {
mat[i] = (double *)malloc(n * sizeof(double));
}
return mat;
}
void free_matrix(double **mat, int n)
{
for (int i = 0; i < n; i++) free(mat[i]);
free(mat);
}
void init_matrix(double **mat, int n)
{
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
mat[i][j] = (double)(rand() % 100) / 10.0;
}
void *multiply_thread(void *arg)
{
int start_row = *(int *)arg;
int end_row = start_row + rows_per_thread;
if (end_row > N) end_row = N;
for (int i = start_row; i < end_row; i++) {
for (int j = 0; j < N; j++) {
double sum = 0.0;
for (int k = 0; k < N; k++) {
sum += A[i][k] * B[k][j];
}
C_parallel[i][j] = sum;
}
}
return NULL;
}
void serial_multiply()
{
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
double sum = 0.0;
for (int k = 0; k < N; k++) {
sum += A[i][k] * B[k][j];
}
C_serial[i][j] = sum;
}
}
}
double get_time()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + tv.tv_usec / 1000000.0;
}
int verify()
{
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
if (C_parallel[i][j] - C_serial[i][j] > 0.001 ||
C_serial[i][j] - C_parallel[i][j] > 0.001) {
printf("Mismatch at [%d][%d]: parallel=%.6f, serial=%.6f\n",
i, j, C_parallel[i][j], C_serial[i][j]);
return 0;
}
}
}
return 1;
}
int main(int argc, char *argv[])
{
if (argc != 3) {
fprintf(stderr, "Usage: %s <N> <num_threads>\n", argv[0]);
return 1;
}
N = atoi(argv[1]);
nthreads = atoi(argv[2]);
if (nthreads > MAX_THREADS) nthreads = MAX_THREADS;
if (nthreads > N) nthreads = N;
rows_per_thread = N / nthreads;
srand(42);
A = allocate_matrix(N);
B = allocate_matrix(N);
C_parallel = allocate_matrix(N);
C_serial = allocate_matrix(N);
init_matrix(A, N);
init_matrix(B, N);
pthread_t threads[MAX_THREADS];
int starts[MAX_THREADS];
double t_start = get_time();
for (int i = 0; i < nthreads; i++) {
starts[i] = i * rows_per_thread;
pthread_create(&threads[i], NULL, multiply_thread, &starts[i]);
}
for (int i = 0; i < nthreads; i++) {
pthread_join(threads[i], NULL);
}
double t_end = get_time();
double t_parallel = t_end - t_start;
t_start = get_time();
serial_multiply();
t_end = get_time();
double t_serial = t_end - t_start;
printf("Matrix size: %d x %d, Threads: %d\n", N, N, nthreads);
printf("Parallel time: %.6f s\n", t_parallel);
printf("Serial time: %.6f s\n", t_serial);
printf("Speedup: %.4f\n", t_serial / t_parallel);
printf("Efficiency: %.4f\n", t_serial / t_parallel / nthreads);
if (verify()) {
printf("Verification: SUCCESS\n");
} else {
printf("Verification: FAILED\n");
}
free_matrix(A, N);
free_matrix(B, N);
free_matrix(C_parallel, N);
free_matrix(C_serial, N);
return 0;
}