#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <mpi.h>

#define N 12
#define ITER 100
#define OMEGA 1.7


double **make_matrix(int n) {
    int i;
    double **A;

    A = (double**) malloc(n * sizeof(double*));
    for (i=0;i<n;i++) 
	A[i] = (double*) malloc(n * sizeof(double));
    return A;
}

void free_matrix(double **A, int n) {
    int i;
    for (i=0;i<n;i++) 
	free(A[i]);
    free(A);
}

double **fill_random_matrix(double **A, int n) {
    int i, j;

    for (i=0;i<n;i++) {
	for (j=0;j<n;j++) {
	    A[i][j] = ((double) rand()) / ((double) RAND_MAX);
	}
	A[i][i] += 10;
    }
    return A;
}

double *fill_random_vector(double *v, int n) {
    int i;

    for (i=0;i<n;i++) 
	v[i] = ((double) rand()) / ((double) RAND_MAX);
    return v;
}

void print_matrix(double **A, int n) {
    int i, j;

    for (i=0;i<n;i++) {
	for (j=0;j<n;j++) {
	    printf("%e ",A[i][j]);
	}
	printf("\n");
    }
    printf("\n");
}

void print_vector(double *v, int n) {
    int i;

    for (i=0;i<n;i++) 
	printf("%e\n",v[i]);
    printf("\n");
}

double **copy_matrix(double **A, int n) {
    double **B;

    int i,j;

    B = make_matrix(n);
    for (i=0;i<n;i++) 
	for (j=0;j<n;j++) 
	    B[i][j] = A[i][j];
    return B;
}

double *copy_vector(double *v, int n) {
    double *w;
    int i;
    w = (double*) malloc(n * sizeof(double));
    for (i=0;i<n;i++) 
	w[i] = v[i];
    return w;
}

double *subtract_vector(double *u, double *v, int n) {
    int i;
    double *w;
    w = (double*) malloc(n * sizeof(double));
    for (i=0;i<n;i++) 
	w[i] = u[i] - v[i];
    return w;
}

double vector_norm(double *v, int n) {
    int i;
    double no;
    no = 0;
    for (i=0;i<n;i++) 
	no += v[i] * v[i];
    no = sqrt(no);
    return no;
}

double *solve_by_gaussian_elimination(double **A, double *d, int n) {
    double **A_;
    double *x, *d_;
    double l;
    int i,j,k;
    
    A_ = copy_matrix(A,n);
    d_ = copy_vector(d,n);
    for (k=0;k<n-1;k++) {
	for (i=k+1;i<n;i++) {
	    l = A_[i][k] / A_[k][k];
	    for (j=k;j<n;j++) {
		A_[i][j] -= l*A_[k][j];
	    }
	    d_[i] -= l*d_[k];
	}
    }
    x = (double*) malloc(n * sizeof(double));
    for (k=n-1;k>=0;k--) {
	l = 0;
	for (i=n-1;i>k;i--) {
	    l += x[i]*A_[k][i];
	}
	x[k] = (d_[k] - l) / A_[k][k];
    }
    free_matrix(A_,n);
    free(d_);
    return x;
}



int main(int argc, char *argv[]) {
    MPI_Status status;
    int p, my_rank, i,j,k; 
    double **A;
    double *d, *x_ge, *x_sor, *diff;
    double norm, omega, b, s;
    
    omega = OMEGA;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &p);
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

    A = make_matrix(N);
    d = (double*) malloc(N * sizeof(double));
    x_sor = (double*) malloc(N * sizeof(double));
    for (i=0;i<N;i++) {
	x_sor[i] = 0.0;
    }

    if (my_rank == 0) {
	fill_random_matrix(A,N);
	fill_random_vector(d,N);
	
	print_matrix(A,N);
	print_vector(d,N);

	x_ge = solve_by_gaussian_elimination(A,d,N);
	print_vector(x_ge,N);
    } 

    for (i=0;i<N;i++) {
	MPI_Bcast(A[i],N,MPI_DOUBLE,0,MPI_COMM_WORLD);
    }
    MPI_Bcast(d,N,MPI_DOUBLE,0,MPI_COMM_WORLD);

    for (k=0;k<ITER;k++) {
	for (i=0;i<N;i++) {
	    s = 0.0;
	    for (j=0;j<N;j++) {
		if (my_rank == (j % p)) {
		    b = ((i!=j) ? (-1.0*A[i][j]) : ((1.0 - omega) / omega) * A[i][j]);
		    s += b*x_sor[j];
		}
	    }
	    MPI_Reduce(&s,&x_sor[i],1,MPI_DOUBLE,MPI_SUM,i%p,MPI_COMM_WORLD);
	    if (my_rank == (i % p)) {
		x_sor[i] = (omega / A[i][i])*(x_sor[i] + d[i]);
	    }
	}
    }
    
    for (i=0;i<N;i++) {
	if ((i % p) != 0) {
	    if (my_rank == 0) {
		MPI_Recv(&x_sor[i],1,MPI_DOUBLE,i%p,i+1,MPI_COMM_WORLD,&status);
	    } else {
		if (my_rank == (i % p)) {
		    MPI_Send(&x_sor[i],1,MPI_DOUBLE,0,i+1,MPI_COMM_WORLD);
		}
	    }
	}
    }

    if (my_rank == 0) {
	print_vector(x_sor,N);
	diff = subtract_vector(x_ge,x_sor,N);
	norm = vector_norm(diff,N);
	printf("|| x_ge - x_sor || = %e\n",norm);
	free(diff);
	free(x_ge);
    }
    free_matrix(A,N);
    free(x_sor);
    free(d);
    fflush(stdout);
    fflush(stderr);
    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Finalize();
    return 0;
}


