/* 
   program for measure 2 
   mpicc hopfield_measure2.c -lm -o hopfield_measure2c
   mpirun -np 3 hopfield_measure2c N M filename
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <mpi.h>
#include <malloc.h>
#include <time.h>

#define rnd() ( (double)rand() /RAND_MAX )

int N,M;
char filename[30];

double **distance;
double **unit,Acoef,Bcoef,Dcoef;
double **city_pos;

int myrank,size;
int fst_index,snd_index;

int get_fst_index(){
  int a,j;
  a = 0;
  for (j=0;j<myrank;j++){
    a += ((N+j)/size);
  }
  return a;
}

int get_snd_index(){
  int a,j;
  a = 0;
  for (j=0;j<=myrank;j++){
    a += ((N+j)/size);
  }
  a--;
  return a;
}

void initialize(){
  char buf[256],*buf2;
  FILE *fp;
  int i,j;
  double dtotal;

  fp = fopen(filename,"r");
  if ( fp == NULL ){
    exit(1);
  }
  for(i=0;i<N;i++){
    if ( fgets(buf,sizeof(buf),fp) == 0 ){
      break; 
    }
    // sscanf is not work
    buf2 = strtok(buf,",");
    city_pos[i][0] = atof(buf2);
    buf2 = strtok(NULL,",");
    city_pos[i][1] = atof(buf2);
  }

  fclose(fp);

  Acoef = 1.0;
  Bcoef = 1.0;
  Dcoef = 2.0;
  fst_index = get_fst_index();
  snd_index = get_snd_index();

  for(i=0;i<N;i++){
    for(j=0;j<N;j++){
      distance[i][j] = 
	sqrt ( (city_pos[i][0] - city_pos[j][0]) * (city_pos[i][0] - city_pos[j][0]) +
	       (city_pos[i][1] - city_pos[j][1]) * (city_pos[i][1] - city_pos[j][1]) );
      dtotal += distance[i][j];
    }
  }

  for(i=0;i<N;i++){
    for(j=0;j<N;j++){
      distance[i][j] = 10.0 * distance[i][j] / dtotal;
    }
  }

  if(myrank==0){
    for(i=0;i<N;i++){
      for(j=0;j<N;j++){
	unit[i][j] = rnd();
      }
    }
  }

  /* broadcast init unit */
  /*this exp is error ->  MPI_Bcast(unit,N*N,MPI_DOUBLE,0,MPI_COMM_WORLD);
     city_pos is change*/
  for(i=0;i<N;i++){
    MPI_Bcast(unit[i],N,MPI_DOUBLE,0,MPI_COMM_WORLD);
  }


}


double energy(){
  int i,j,m,jp,jm;
  double term1,term2,term3,term4,term5,term6;

  term1 = term2 = term3 = 0.0;
  term4 = term5 = term6 = 0.0;

  for(i=fst_index;i<=snd_index;i++){
    for(j=0;j<N;j++){
      for(m=0;m<N;m++){
	if(m!=j) term1 += unit[i][j]*unit[i][m];
      }
    }
  }
  MPI_Allreduce(&term1,&term4,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
  term4 = 0.5 * Acoef * term4;

  for (j=fst_index;j<=snd_index;j++){
    for(i=0;i<N;i++){
      for(m=0;m<N;m++){
	if(m!=i) term2 += unit[i][j]*unit[m][j];
      }
    }
  }
  MPI_Allreduce(&term2,&term5,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
  term5 = 0.5 * Bcoef * term5;

  for(i=fst_index;i<=snd_index;i++){
    for(j=0;j<N;j++){
      if(j-1==-1) jm = N-1; else jm=j-1;
      if(j+1==N) jp=0; else jp=j+1;
      for(m=0;m<N;m++){
	term3 += distance[i][m]*unit[i][j]*(unit[m][jp] + unit[m][jm]);
      }
    }
  }
  MPI_Allreduce(&term3,&term6,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
  term6 = 0.5 * Dcoef * term6;

  return (term4+term5+term6);
}

/* single process function for energy calculation
   for check */
double energy2(){
  int i,j,m,jp,jm;
  double term1,term2,term3;

  term1 = term2 = term3 = 0.0;

  for(i=0;i<N;i++){
    for(j=0;j<N;j++){
      for(m=0;m<N;m++){
	if(m!=j) term1 += unit[i][j]*unit[i][m];
      }
    }
  }
  term1 = 0.5 * Acoef * term1;

  for (j=0;j<N;j++){
    for(i=0;i<N;i++){
      for(m=0;m<N;m++){
	if(m!=i) term2 += unit[i][j]*unit[m][j];
      }
    }
  }
  term2 = 0.5 * Bcoef * term2;

  for(i=0;i<N;i++){
    for(j=0;j<N;j++){
      if(j-1==-1) jm = N-1; else jm=j-1;
      if(j+1==N) jp=0; else jp=j+1;
      for(m=0;m<N;m++){ 
	term3 += distance[i][m]*unit[i][j]*(unit[m][jp] + unit[m][jm]);
      }
    }
  }
  term3 = 0.5 * Dcoef * term3;

  return (term1+term2+term3);
}



main(int argc,char *argv[]){
  int i,j;
  double elapsed_time1,elapsed_time2;
  MPI_Status status;
  MPI_Init(&argc,&argv);
  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
  MPI_Comm_size(MPI_COMM_WORLD,&size);
  if(myrank==0){
    N = atoi(argv[1]);
    M = atoi(argv[2]);
    strcpy(filename,argv[3]);
  }
  MPI_Bcast(&N,1,MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(&M,1,MPI_INT,0,MPI_COMM_WORLD);
  MPI_Bcast(filename,30,MPI_CHAR,0,MPI_COMM_WORLD);

  
  distance = (double **)malloc(N * sizeof(double *));
  unit = (double **)malloc(N * sizeof(double *));
  city_pos = (double **)malloc(N * sizeof(double *));
  for(i=0;i<N;i++){
    distance[i] = (double *)malloc(N * sizeof(double));
    unit[i] = (double *)malloc(N * sizeof(double));
    city_pos[i] = (double *)malloc(2 * sizeof(double));
  }

  initialize();

  MPI_Barrier(MPI_COMM_WORLD);
  elapsed_time1 = (double)clock();
  for(i=0;i<M;i++){
    energy();
    /* printf(" energy_mpi:%lf\n energy_single:%lf\n",energy(),energy2()); */
  }
  MPI_Barrier(MPI_COMM_WORLD);
  elapsed_time2 = (double)clock();
  
  if (myrank==0) printf("elapsed time:%lf second\n",(elapsed_time2-elapsed_time1)/CLOCKS_PER_SEC);
  
  for(i=0;i<N;i++){
    free(distance[i]);
    free(unit[i]);
    free(city_pos[i]);
  }
  free(distance);
  free(unit);
  free(city_pos);
  
  MPI_Finalize();
}
