152 lines
3.4 KiB
C
152 lines
3.4 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <pthread.h>
|
|
#include <sys/time.h>
|
|
|
|
#define MAX_THREADS 64
|
|
|
|
int N;
|
|
double **A, **B, **C_parallel, **C_serial;
|
|
int nthreads;
|
|
int rows_per_thread;
|
|
|
|
double **allocate_matrix(int n)
|
|
{
|
|
double **mat = (double **)malloc(n * sizeof(double *));
|
|
for (int i = 0; i < n; i++) {
|
|
mat[i] = (double *)malloc(n * sizeof(double));
|
|
}
|
|
return mat;
|
|
}
|
|
|
|
void free_matrix(double **mat, int n)
|
|
{
|
|
for (int i = 0; i < n; i++) free(mat[i]);
|
|
free(mat);
|
|
}
|
|
|
|
void init_matrix(double **mat, int n)
|
|
{
|
|
for (int i = 0; i < n; i++)
|
|
for (int j = 0; j < n; j++)
|
|
mat[i][j] = (double)(rand() % 100) / 10.0;
|
|
}
|
|
|
|
void *multiply_thread(void *arg)
|
|
{
|
|
int start_row = *(int *)arg;
|
|
int end_row = start_row + rows_per_thread;
|
|
if (end_row > N) end_row = N;
|
|
|
|
for (int i = start_row; i < end_row; i++) {
|
|
for (int j = 0; j < N; j++) {
|
|
double sum = 0.0;
|
|
for (int k = 0; k < N; k++) {
|
|
sum += A[i][k] * B[k][j];
|
|
}
|
|
C_parallel[i][j] = sum;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
void serial_multiply()
|
|
{
|
|
for (int i = 0; i < N; i++) {
|
|
for (int j = 0; j < N; j++) {
|
|
double sum = 0.0;
|
|
for (int k = 0; k < N; k++) {
|
|
sum += A[i][k] * B[k][j];
|
|
}
|
|
C_serial[i][j] = sum;
|
|
}
|
|
}
|
|
}
|
|
|
|
double get_time()
|
|
{
|
|
struct timeval tv;
|
|
gettimeofday(&tv, NULL);
|
|
return tv.tv_sec + tv.tv_usec / 1000000.0;
|
|
}
|
|
|
|
int verify()
|
|
{
|
|
for (int i = 0; i < N; i++) {
|
|
for (int j = 0; j < N; j++) {
|
|
if (C_parallel[i][j] - C_serial[i][j] > 0.001 ||
|
|
C_serial[i][j] - C_parallel[i][j] > 0.001) {
|
|
printf("Mismatch at [%d][%d]: parallel=%.6f, serial=%.6f\n",
|
|
i, j, C_parallel[i][j], C_serial[i][j]);
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
if (argc != 3) {
|
|
fprintf(stderr, "Usage: %s <N> <num_threads>\n", argv[0]);
|
|
return 1;
|
|
}
|
|
|
|
N = atoi(argv[1]);
|
|
nthreads = atoi(argv[2]);
|
|
if (nthreads > MAX_THREADS) nthreads = MAX_THREADS;
|
|
if (nthreads > N) nthreads = N;
|
|
|
|
rows_per_thread = N / nthreads;
|
|
|
|
srand(42);
|
|
|
|
A = allocate_matrix(N);
|
|
B = allocate_matrix(N);
|
|
C_parallel = allocate_matrix(N);
|
|
C_serial = allocate_matrix(N);
|
|
|
|
init_matrix(A, N);
|
|
init_matrix(B, N);
|
|
|
|
pthread_t threads[MAX_THREADS];
|
|
int starts[MAX_THREADS];
|
|
|
|
double t_start = get_time();
|
|
|
|
for (int i = 0; i < nthreads; i++) {
|
|
starts[i] = i * rows_per_thread;
|
|
pthread_create(&threads[i], NULL, multiply_thread, &starts[i]);
|
|
}
|
|
for (int i = 0; i < nthreads; i++) {
|
|
pthread_join(threads[i], NULL);
|
|
}
|
|
|
|
double t_end = get_time();
|
|
double t_parallel = t_end - t_start;
|
|
|
|
t_start = get_time();
|
|
serial_multiply();
|
|
t_end = get_time();
|
|
double t_serial = t_end - t_start;
|
|
|
|
printf("Matrix size: %d x %d, Threads: %d\n", N, N, nthreads);
|
|
printf("Parallel time: %.6f s\n", t_parallel);
|
|
printf("Serial time: %.6f s\n", t_serial);
|
|
printf("Speedup: %.4f\n", t_serial / t_parallel);
|
|
printf("Efficiency: %.4f\n", t_serial / t_parallel / nthreads);
|
|
|
|
if (verify()) {
|
|
printf("Verification: SUCCESS\n");
|
|
} else {
|
|
printf("Verification: FAILED\n");
|
|
}
|
|
|
|
free_matrix(A, N);
|
|
free_matrix(B, N);
|
|
free_matrix(C_parallel, N);
|
|
free_matrix(C_serial, N);
|
|
|
|
return 0;
|
|
}
|