/*
  Copyright (C) 2019 Alessandro Languasco
*/
#define _GNU_SOURCE
#include <time.h>
#include <fftw3.h> 
#include <stdio.h>
#include <stdlib.h>  
#include <math.h>

#define REAL 0
#define IMAG 1


void prntc(fftwl_complex* result, ptrdiff_t dim) { /* for checking results */
  int i;
  for (i = 0; i < dim; ++i) {
    printf("%Lf\n", result[i][REAL]);
    printf("%Lf\n", result[i][IMAG]);
  }
}
void prntr(long double* result,ptrdiff_t dim) { /* for checking results */
  int i;
  for (i = 0; i < dim; ++i) {
    printf("%Lf\n", result[i]);
  }
}


int main() {
  ptrdiff_t a;  
  long double c; 
  long double d; 
  ptrdiff_t i;
  ptrdiff_t j;
  ptrdiff_t k;
  ptrdiff_t m;
  ptrdiff_t q;   
  ptrdiff_t primroot; 
  ptrdiff_t NUM_POINTS;
  ptrdiff_t nhalf;
  ptrdiff_t nquarter; 
  long double oddresult;
  long double evenresult; 
  long double pi;   
  pi=3.1415926535897932384626433832795028841971693993751;

 
  printf("********** A. LANGUASCO  *********** \n");
  printf("Computation of the max_{chi neq chi_0} |L(1,chi)| with the logGamma function. \n");
  printf("[LONG DOUBLE PRECISION - GURU64 - OUTPLACE transforms]\n");
  printf("Acquiring q from file: q = ");
  FILE *myFile;
  myFile = fopen("primroot.res", "r");
  
  if (myFile == NULL){
    printf("File primroot.res doesn't exist\n");
    exit (0);
  }
  fscanf(myFile, "%tu", &q);
  printf("%td\n",q);
  
 /*getting the value of the primitive root g mod q used in the precomputation */       
  fscanf(myFile, "%tu", &primroot);
  
  fclose(myFile);
  
  NUM_POINTS=q-1;
  nhalf=NUM_POINTS/2;  
  nquarter=nhalf/2;  

  /*BEGIN GURU 64 PLAN PREPARATION */
  printf("%s", "Start FFT plans preparation"); 
  clock_t startplan = clock();  /* start clock */
   
  /***********  LOGGAMMA PLAN PREPARATION ***********/
    /***** for an OUTPLACE TRANSFORM *****/
   long double *lngammavalues;
  lngammavalues = (long double*) fftwl_malloc(sizeof(long double) * nhalf); 

  fftwl_complex *lngammatrasf;
  lngammatrasf = (fftwl_complex*) fftwl_malloc(sizeof(fftwl_complex) * (nquarter+1));
  
  /***** for an INPLACE TRANSFORM *****/
 /* for an INPLACE TRANSFORM; lngammavalues should be of the same dim of the output ; 2*(nquarter+1) */
 /*
  long double *lngammavalues;
  lngammavalues = (long double*) fftwl_malloc(sizeof(long double) * nhalf+2); */
  /* the FFT of a real sequence with length = nhalf has nquarter + 1 complex positions = nhalf+2 long double positions */
  /* fftwl_complex *lngammatrasf; */
 /*  lngammatrasf = (fftwl_complex*)lngammavalues;  */


  int rank=1;
  fftw_iodim64 *dims=malloc(rank*sizeof(fftw_iodim64));
  dims[0].n= nhalf;  /* length of the trasform */
  dims[0].is=1;
  dims[0].os=1;   
  int howmany_rank=1;
  fftw_iodim64 *howmany_dims=malloc(howmany_rank*sizeof(fftw_iodim64)); 
  howmany_dims[0].n=1;
  howmany_dims[0].is=1;
  howmany_dims[0].os=1;  
  fftwl_plan plan_lngamma = fftwl_plan_guru64_dft_r2c(rank, /*rank=1 means monodimensional vector*/
              dims, /* NUM_POINTS */
              howmany_rank,
              howmany_dims,
              lngammavalues,  /*input*/
              lngammatrasf,  /*output; for outplace transforms*/
              FFTW_ESTIMATE); /*flags*/
              
  if (plan_lngamma ==NULL){fprintf(stderr,"loggamma plan creation failed\n");exit(1);}
  
  /********** ak PLAN PREPARATION ***********/
   /***** for an OUTPLACE TRANSFORM *****/
   
  fftwl_complex *akvalues;
  akvalues = (fftwl_complex*) fftwl_malloc(sizeof(fftwl_complex) * nhalf); 
  fftwl_complex *akvalues_trasf;
   /* the FFT of a complex sequence with length = nhalf has nhalf complex positions*/
  akvalues_trasf = (fftwl_complex*) fftwl_malloc(sizeof(fftwl_complex) * nhalf);
  
    /***** for an INPLACE TRANSFORM *****/
     /* This is a FORWARD transform; corresponds to the sum over a of conj(chi)(a) */
 
  /*fftwl_complex *akvalues;
  akvalues = (fftwl_complex*) fftwl_malloc(sizeof(fftwl_complex) * nhalf); */
  /*fftwl_complex *akvalues_trasf;*/
   /* the FFT of a complex sequence with length = nhalf has nhalf complex positions*/
 /* akvalues_trasf = (fftwl_complex*) fftwl_malloc(sizeof(fftwl_complex) * nhalf);*/
  
  fftw_iodim64 *ak_dims=malloc(rank*sizeof(fftw_iodim64));
  ak_dims[0].n= nhalf;   /* length of the trasform */
  ak_dims[0].is=1;
  ak_dims[0].os=1;  
  fftwl_plan plan_ak = fftwl_plan_guru64_dft(rank, /*rank=1 means monodimensional vector*/
              ak_dims, /* nhalf */
              howmany_rank,
              howmany_dims,
              akvalues,  /*input for outplace transforms*/ 
              akvalues_trasf,  /*output; for outplace transforms*/ 
              FFTW_FORWARD, /* -1 because it is the sign of FORWARD transform */
              FFTW_ESTIMATE); /*flags*/
              
  if (plan_ak ==NULL){fprintf(stderr,"ak plan creation failed\n");exit(1);}
  
    clock_t endplan = clock(); /* stop clock */
  /*END GURU 64 PLAN PREPARATION */
  
  
  /*********** acquire the values to generate sequence g^k%q ***********/
  
  printf("%s", "Generating log(gamma(a/q)) and g^k%q [decimated in frequency]"); 
  
  
  /******** initialisation for log gamma and a_k **********/
  
  /*  COMMENT: the speediness of the DIF for a_k can be improved
  by replacing the nhalf calls to sin and cos with the summation
  formulae for the sin and cos functions (so using just one call of
  sin and cos and about nhalf sums and products). This  works nicely in
  quadruple precision but in the long double precision it introduces
  some computation errors in the final value of the euler-kronecker constants.
  So we are using here the slower but more precise version of the DIF
  for the a_k sequence.
  */
  clock_t startinitseq = clock();  /* start clock */
  long double sinvalue;
  long double cosvalue;
  long double logpi = logl(pi);
  long double twist = 0.0L;
  long double angle = (long double) pi/nhalf;
  a = 1; 
  for (k = 0; k <nhalf ; k++){  
     c = (long double) (2*a-q); /*g^k -R g^(k+m) \equiv g^k  -R (q-1)g^k \equiv g^k -R (-g^k) \equiv g^k -R (q -g^k) = 2*g^k -R q */
     /* sincosl(twist, &sinvalue, &cosvalue);  works in LINUX; not in C standard; slightly faster */
     cosvalue = cosl(twist);
     sinvalue = sinl(twist); 
     akvalues[k][REAL]= c * cosvalue;  
     akvalues[k][IMAG]= (-1.0L) * c * sinvalue; /* depends on the sign = -1 of the transform */
     d = (long double) a/q ;
     lngammavalues[k]= logpi - logl(sinl(pi*d));  /* values of log(gamma(a_k/q) via the reflection formula*/
     a = (a*primroot)%q;  
     twist = twist + angle;
  } 
   clock_t endinitseq = clock();  /* stop clock */
  printf("%s\n", ". Done"); 
  
  printf("%s", "Start first FFT transform (logGamma)"); 

  clock_t start = clock();  /* start clock */
  /* transform loggamma */
  fftwl_execute(plan_lngamma);

  /* lngammatrasf contains the DFT of loggamma; due to the simmetries the output has half elements than the input */
  /* lngammatrasf[i][REAL]=lngammatrasf[nhalf-i][REAL];  lngammatrasf[i][nhalf]=-lngammatrasf[nhalf-i][IMAG] */
    /*   SO THEY HAVE THE SAME ABSOLUTE VALUES  */
  
  clock_t end = clock(); /* stop clock */
  printf("%s\n", " -- End first FFT transform."); 
  
  /* destroy the FFTW plans */  
  fftwl_destroy_plan(plan_lngamma);  /* DO NOT free lngammatrasf; we need it later */
  /* freeing the fftw_malloc*/ 
  fftwl_free(lngammavalues);  
  
  /* Computing max_{chi\ne chi_0} \vert L(1,chi) \vert =: M(q) */
  /* sum starts from i=1 to avoid the trivial character contribution */
  
  printf("%s", "Computing the max over even characters"); 
  
  clock_t startcompeven = clock(); /* start clock */
  long double M=0;/* even characters */
  long double value=0;      
  for (i=1;  i < nquarter; ++i) { /* computes |(a+ib)| */ 
         value = lngammatrasf[i][REAL] * lngammatrasf[i][REAL] +  lngammatrasf[i][IMAG]*  lngammatrasf[i][IMAG]; 
          if (value> M) {M=value;}; 
    /*printf("%Lf\n", M);*/
  }
  evenresult=2*sqrtl(M)/sqrtl(q); /* max for even characters */
  
  clock_t endcompeven = clock(); /* stop clock */
  printf("%s\n", ". Done");
    /* freeing the fftw_malloc*/ 
  fftwl_free(lngammatrasf);  
  
    printf("%s", "Start second FFT transform (g^k%q)"); 
    clock_t start1 = clock();  /* start clock */
  /* transform ak */
  fftwl_execute(plan_ak);

  /*akvalues_trasf contains the DFT of ak; it's a complex to complex transform */
  
  clock_t end1 = clock(); /* stop clock */

  printf("%s\n", " -- End second FFT transform."); 
  
  /* destroy the ak plan */ 
  fftwl_destroy_plan(plan_ak);
    /* freeing the fftw_malloc*/ 
  fftwl_free(akvalues);  
  

  printf("%s", "Computing the max over odd characters"); 
  clock_t startcompodd = clock(); /* start clock */
  
  M=0;/* odd characters */
  value=0;   
  for (i=0;  i < nhalf; ++i) { 
          value = akvalues_trasf[i][REAL] * akvalues_trasf[i][REAL] +  akvalues_trasf[i][IMAG]*akvalues_trasf[i][IMAG];
          if (value> M) {M=value;};
 
    /*printf("%Lf\n", M);*/
  }
  oddresult=pi*sqrtl(M)/(q*sqrtl(q)); /* max for odd characters */
  clock_t endcompodd = clock(); /* stop clock */
  printf("%s\n\n", ". Done");
  
  /* freeing the fftw_malloc*/ 
  fftwl_free(akvalues_trasf);
  /* clean the fftw setting */ 
  fftwl_cleanup();
 
  
  /******************** output of the results *********************/
  printf("%s\n\n", "*** RESULTS:");
  
   /* print max_q */
  printf("%s", "max_even(");
  printf("%td",q);
  printf("%s", ") = ");
  printf("%Lf", evenresult);
  printf("\n\n");
  
      /* print max_q */
  printf("%s", "max_odd(");
  printf("%td",q);
  printf("%s", ") = ");
  printf("%Lf", oddresult);
  printf("\n\n");
  
    /* print max_q */
  printf("%s", "max_{chi neq chi_0}|L(1,chi)|(");
  printf("%td",q);
  printf("%s", ") = ");
  printf("%Lf", fmaxl(evenresult,oddresult));
  printf("\n\n");
   
  
  /********************* output of the computation times *********************/
  
  printf("%s\n", "*** TIMES:");
  double time_elapsed_in_sec = (end - start+ end1 - start1)/(double)CLOCKS_PER_SEC; 
  
  /* output of the FFT total computation time */
  double min=floor(time_elapsed_in_sec/60);
  double sec=floor(time_elapsed_in_sec-min*60);
  double msec=(time_elapsed_in_sec-min*60-sec)*1000;
  printf("%s", "FFT - total computation time: ");
  printf("%f", min);
  printf("%s", " min. ");
  printf("%f", sec);
  printf("%s", " sec. ");
  printf("%f", msec);
  printf("%s\n", " msec. "); 
  
  /* output of the PLAN FFT total computation time */
  time_elapsed_in_sec = (endplan - startplan)/(double)CLOCKS_PER_SEC; 
  
  min=floor(time_elapsed_in_sec/60);
  sec=floor(time_elapsed_in_sec-min*60);
  msec=(time_elapsed_in_sec-min*60-sec)*1000;
  printf("%s", "PLAN FFT - total computation time: ");
  printf("%f", min);
  printf("%s", " min. ");
  printf("%f", sec);
  printf("%s", " sec. ");
  printf("%f", msec);
  printf("%s\n", " msec. ");
  
  /* output of the total computation time */
  time_elapsed_in_sec = (endcompeven - startcompeven + endcompodd - startcompodd)/(double)CLOCKS_PER_SEC; 
  
  min=floor(time_elapsed_in_sec/60);
  sec=floor(time_elapsed_in_sec-min*60);
  msec=(time_elapsed_in_sec-min*60-sec)*1000;
  printf("%s", "Final comp maxq - total computation time: ");
  printf("%f", min);
  printf("%s", " min. ");
  printf("%f", sec);
  printf("%s", " sec. ");
  printf("%f", msec);
  printf("%s\n", " msec. "); 
  
  
    /* output initialisation time */
  time_elapsed_in_sec = (endinitseq - startinitseq)/(double)CLOCKS_PER_SEC; 
  
  min=floor(time_elapsed_in_sec/60);
  sec=floor(time_elapsed_in_sec-min*60);
  msec=(time_elapsed_in_sec-min*60-sec)*1000;
  printf("%s", "Initialisation - time: ");
  printf("%f", min);
  printf("%s", " min. ");
  printf("%f", sec);
  printf("%s", " sec. ");
  printf("%f", msec);
  printf("%s\n", " msec. "); 
  
  
  /* total time */
  time_elapsed_in_sec = (endinitseq - startinitseq + end - start+ end1 - start1 + endplan - startplan + endcompeven - startcompeven + endcompodd - startcompodd)/(double)CLOCKS_PER_SEC; 
  
  min=floor(time_elapsed_in_sec/60);
  sec=floor(time_elapsed_in_sec-min*60);
  msec=(time_elapsed_in_sec-min*60-sec)*1000;
  printf("%s", "Total elapsed time: ");
  printf("%f", min);
  printf("%s", " min. ");
  printf("%f", sec);
  printf("%s", " sec. ");
  printf("%f", msec);
  printf("%s\n", " msec. "); 
  printf("********** END PROGRAM  *********** \n");
  return 0;
}

 
/******  to compile
 Compilation instructions vary by platform. On the Mac and other UNIX-like systems, you should just be able to type:
 
 clang -o  MaxL-fftwl.exe  MaxL-fftwl.c -lfftw3l -lm  using long double precision (on mac ; fftw installed using homebrew)
 
 clang -o MaxL-fftwl.exe  MaxL-fftwl.c -lfftw3l -lm (on the optiplex linux box at the dept) 

 ******************/