#
#   Copyright (C) 2020 Alessandro Languasco
#
import pandas as pd
import numpy as np 

# flagsmall = TRUE; find the q>bound such that mq<eps to be recomputed with quadruple precision
# already recomputed the ones with q<10^6
flagsmall = True
#flagsmall = False
#eps = 0.0001  #10^(-4)
eps = 0.00001  #10^(-5)
#bound = 1000000  #10^6=1000000
bound = 2  #10^6=1000000  

# flagbounds = TRUE; find the q such that mq>=ubound(q) and mq<= lbound(q) 
flagbounds = True
#flagbounds = False
# functions for lower and upper bounds
def ubound(x): 
    y = np.log(np.log(x))/np.log(x)
    return y

def lbound(x): 
    y = 0.105/x #  0.105=21/200
    return y

# function to Find indexes of an element in pandas dataframe
# from https://thispointer.com/python-find-indexes-of-an-element-in-pandas-dataframe/
# to be used at the bottom of the script to detect when  mineven == minodd  and when there are small values

def getIndexes(dfObj, value):
    ''' Get index positions of value in dataframe i.e. dfObj.'''
 
    listOfPos = list()
    # Get bool dataframe with True at positions where the given value exists
    result = dfObj.isin([value])
    # Get list of columns that contains the value
    seriesObj = result.any()
    columnNames = list(seriesObj[seriesObj == True].index)
    # Iterate over list of columns and fetch the rows indexes where value exists
    for col in columnNames:
        rows = list(result[col][result[col] == True].index)
        for row in rows:
            listOfPos.append((row, col))
    # Return a list of tuples indicating the positions of value in the dataframe
    return listOfPos

# function to check for small values; flagsmall is boolean; eps is the bound
# uses the previous locator function

def checksmallvalues(flagsmall, eps, bound):	
	if flagsmall :
		print('*** Detecting values mq <', eps, 'and q > ', bound, ' and saving them on file recompute.txt') 
		fileanalysis.write("*** Detecting values mq < ")
		fileanalysis.write(str(eps))
		fileanalysis.write(" and q > ")
		fileanalysis.write(str(bound))
		fileanalysis.write("\n")
		fileanalysis.write("*** and saving them on file recompute.txt ")
		fileanalysis.write("\n")
		# if there are cases of min < eps; compute and prints such cases
		# to be recomputed with quadruple precision
		smallvalues = len(df.loc[(df['min'] > 0) & (df['min'] < eps) & (df['q']> bound)])
		if smallvalues > 0 :
			print('*** There are n.',smallvalues,' primes in which mq < ', eps, ' and  q >', bound) 
			print('*** recompute them with quadruple precision ***')
			fileanalysis.write("*** There are n.")
			fileanalysis.write(str(smallvalues))
			fileanalysis.write(" primes in which mq < ")
			fileanalysis.write(str(eps))
			fileanalysis.write(" and q > ")
			fileanalysis.write(str(bound))
			fileanalysis.write("\n")
			fileanalysis.write("*** recompute them with quadruple precision ***")
			fileanalysis.write("\n")
			df['small'] = (df['min'] > 0) & (df['min'] < eps)
			#print(df['small'])
			listOfPositions = getIndexes(df, True) 
			filerecomp= open("recompute.txt","w")
			for i in range(len(listOfPositions)):
				#print(i+1, ': q = ', df['q'].iloc[listOfPositions[i][0]])
				filerecomp.write(str(df['q'].iloc[listOfPositions[i][0]]))
				filerecomp.write("\n")
				#print(df['q'].iloc[listOfPositions[i][0]])
    			# print (listOfPositions)
			# remove the temporary column
			del df['small']
			filerecomp.close()  
		else:
			print(' No values mq <', eps, 'with q>', bound, 'detected') 
			fileanalysis.write(" No values mq < ")
			fileanalysis.write(str(eps))
			fileanalysis.write(" with q> ")
			fileanalysis.write(str(bound))
			fileanalysis.write(" detected")
			fileanalysis.write("\n") 
	else:
		print('*** Not detecting values <', eps)
		fileanalysis.write("*** Not detecting values < ")
		fileanalysis.write(str(eps))
		fileanalysis.write("\n")

# function to check the lower and upper bounds; flagbounds is boolean;
# uses the previous locator function

def checkbounds(flagbounds):	
	if flagbounds :
	    ##  UPPER BOUND
		print('*** Detecting values mq >= ubound(q) and saving them on file uexcept.txt') 
		fileanalysis.write("*** Detecting values mq >= ubound(q) and saving them on file uexcept.txt")  
		fileanalysis.write("\n")
		# if there are cases of mq >= ubound(q); compute and prints such cases 
		uboundexceptions = len(df.loc[(df['min'] >=  ubound(df['q']))])
		if uboundexceptions > 0 :
			print('*** There are n.',uboundexceptions,' primes for which mq >= ubound(q)')  
			fileanalysis.write("*** There are n.")
			fileanalysis.write(str(uboundexceptions))
			fileanalysis.write(" primes for which mq >= ubound(q) ") 
			fileanalysis.write("\n")
			df['uboundexcep'] = (df['min'] >= ubound(df['q']))
			listOfPositions = getIndexes(df, True) 
			fileuexcept= open("uexcept.txt","w")
			for i in range(len(listOfPositions)):
				#print(i+1, ': q = ', df['q'].iloc[listOfPositions[i][0]])
				fileuexcept.write(str(df['q'].iloc[listOfPositions[i][0]]))
				fileuexcept.write("\n")
				#print(df['q'].iloc[listOfPositions[i][0]])
    			# print (listOfPositions)
			# remove the temporary column
			del df['uboundexcep']
			fileuexcept.close()  
		else:
			print(' No primes for which mq >= ubound(q) detected') 
			fileanalysis.write(" No primes for which mq >= ubound(q) detected ") 
			fileanalysis.write("\n")
		##  LOWER BOUND
		print('*** Detecting values mq <= lbound(q) and saving them on file lexcept.txt') 
		fileanalysis.write("*** Detecting values mq <= lbound(q)  and saving them on file lexcept.txt")  
		fileanalysis.write("\n")
		# if there are cases of mq <= lbound(q); compute and prints such cases 
		lboundexceptions = len(df.loc[(df['min'] <=  lbound(df['q']))])
		if lboundexceptions > 0 :
			print('*** There are n.',lboundexceptions,' primes for which mq <= lbound(q)')  
			fileanalysis.write("*** There are n.")
			fileanalysis.write(str(lboundexceptions))
			fileanalysis.write(" primes for which mq <= lbound(q) ") 
			fileanalysis.write("\n")
			df['lboundexcep'] = (df['min'] <= lbound(df['q']))
			listOfPositions = getIndexes(df, True)  
			filelexcept= open("lexcept.txt","w")
			for i in range(len(listOfPositions)):
				#print(i+1, ': q = ', df['q'].iloc[listOfPositions[i][0]])
				filelexcept.write(str(df['q'].iloc[listOfPositions[i][0]]))
				filelexcept.write("\n")
				#print(df['q'].iloc[listOfPositions[i][0]])
				#print (listOfPositions)
			# remove the temporary column
			del df['lboundexcep']
			filelexcept.close()  
		else:
			print(' No primes for which mq <= lbound(q) detected') 
			fileanalysis.write(" No primes for which mq <= lbound(q) detected ") 
			fileanalysis.write("\n") 
	else:
		print('*** Not checking lower and upper bounds')
		fileanalysis.write("*** Not checking lower and upper bounds ") 
		fileanalysis.write("\n")
		

# open output file 
fileanalysis= open("analysis_merged.txt","w")

# input data from csv file    
data=pd.read_csv(r'minderlog-merged.csv', sep=';',dtype='str')
data.head(4)
df = pd.DataFrame(data, columns= ['q','mineven','minodd','min'])
df = df.astype('float128')
#df['q'] = df.astype('int32')

#print (df)

#FINDING MAX AND MIN and where they are attained
#M=df['min'].max()
#idxM=df['min'].idxmax()
#argM=df['q'].iloc[idxM]
#m=df['min'].min()
#idxm=df['min'].idxmin()
#argm=df['q'].iloc[idxm]

#print('minimal value on column min =',m,'attained at q =',argm)
#print('maximal value on column min =',M,'attained at q =',argM)


#FINDING MAX and second MAX; MIN and second MIN; and where they are attained

print(' *****   Statistics for Min derlog L  *****')
print('name source file = minderlog.csv')

fileanalysis.write(" *****   Statistics for Min derlog L  ***** ")
fileanalysis.write("\n")
fileanalysis.write("name source file = minderlog.csv")
fileanalysis.write("\n")

print(' *****   MAX   *****')
 
maxima =df['min'].nlargest(2)
M=maxima.iloc[0]
idxM=maxima.idxmax()
argM=df['q'].iloc[idxM]
M1=maxima.iloc[1]
idxM1=maxima.idxmin()
argM1=df['q'].iloc[idxM1]
print('maximal value on column min =','{0:.16f}'.format(M),'attained at q =',argM)
print('second maximal value on column min =','{0:.16f}'.format(M1),'attained at q =',argM1)

fileanalysis.write("  *****   MAX   *****\n")
fileanalysis.write("maximal value on column min = ")
fileanalysis.write(str('{0:.16f}'.format(M)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(argM))
fileanalysis.write("\n")
fileanalysis.write("second maximal value on column min = ")
fileanalysis.write(str('{0:.16f}'.format(M1)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(argM1))
fileanalysis.write("\n")
 

print(' *****   MIN   *****') 
minima =df['min'].nsmallest(2)
m=minima.iloc[0]
idxm=minima.idxmin()
argm=df['q'].iloc[idxm]
m1=minima.iloc[1]
idxm1=minima.idxmax()
argm1=df['q'].iloc[idxm1]
print('minimal value on column min =','{0:.16f}'.format(m),'attained at q =',argm)
print('second minimal value on column min =','{0:.16f}'.format(m1),'attained at q =',argm1)

fileanalysis.write("  *****   MIN   *****\n")
fileanalysis.write("minimal value on column min = ")
fileanalysis.write(str('{0:.16f}'.format(m)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(argm))
fileanalysis.write("\n")
fileanalysis.write("second minimal value on column min = ")
fileanalysis.write(str('{0:.16f}'.format(m1)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(argm1))
fileanalysis.write("\n") 


# create a new column with normalised min values
df['norm'] = (df['min'] / lbound(df['q']) )

print(' *****   MAX normalised values min/lbound(q) *****')
 
maxima =df['norm'].nlargest(2)
M=maxima.iloc[0]
idxM=maxima.idxmax()
argM=df['q'].iloc[idxM]
M1=maxima.iloc[1]
idxM1=maxima.idxmin()
argM1=df['q'].iloc[idxM1]
print('maximal value on column norm =','{0:.16f}'.format(M),'attained at q =',argM)
print('second maximal value on column norm =','{0:.16f}'.format(M1),'attained at q =',argM1)

fileanalysis.write("  *****   MAX  normalised values min/lbound(q) *****\n")
fileanalysis.write("maximal value on column norm = ")
fileanalysis.write(str('{0:.16f}'.format(M)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(argM))
fileanalysis.write("\n")
fileanalysis.write("second maximal value on column norm = ")
fileanalysis.write(str('{0:.16f}'.format(M1)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(argM1))
fileanalysis.write("\n")
 

print(' *****   MIN normalised values min/lbound(q) *****') 
minima =df['norm'].nsmallest(2)
m=minima.iloc[0]
idxm=minima.idxmin()
argm=df['q'].iloc[idxm]
m1=minima.iloc[1]
idxm1=minima.idxmax()
argm1=df['q'].iloc[idxm1]
print('minimal value on column norm =','{0:.16f}'.format(m),'attained at q =',argm)
print('second minimal value on column norm =','{0:.16f}'.format(m1),'attained at q =',argm1)

fileanalysis.write("  *****   Min normalised values min/lbound(q) *****\n")
fileanalysis.write("minimal value on column norm = ")
fileanalysis.write(str('{0:.16f}'.format(m)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(argm))
fileanalysis.write("\n")
fileanalysis.write("second minimal value on column norm = ")
fileanalysis.write(str('{0:.16f}'.format(m1)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(argm1))
fileanalysis.write("\n")


del df['norm'] 

print(' *****   STATS Min derlog L   *****')

# number of  minodd > mineven for Min derlog L 
total=float(len(df))
oddgreatereven = len(df.loc[df.minodd > df.mineven])
percent=float(oddgreatereven)/total
print('number of odd > even :', oddgreatereven,'percentage =', percent*100)

fileanalysis.write("  *****   STATS Min derlog L   *****\n")
fileanalysis.write("number of odd > even : ")
fileanalysis.write(str(oddgreatereven))
fileanalysis.write(" percentage = ")
fileanalysis.write(str(percent*100))
fileanalysis.write("\n")

# number of  even > odd for Min derlog L 
evengreaterodd = len(df.loc[df.mineven > df.minodd]) 
percent=float(evengreaterodd)/total
print('number of even > odd :',evengreaterodd,'percentage =', percent*100)

fileanalysis.write("number of even > odd : ")
fileanalysis.write(str(evengreaterodd))
fileanalysis.write(" percentage = ")
fileanalysis.write(str(percent*100))
fileanalysis.write("\n")

# number of  even = odd for Min derlog L 
evenequalodd = len(df.loc[df.mineven == df.minodd]) 
percent=float(evenequalodd)/total
print('number of even = odd :',evenequalodd,'percentage =', percent*100)

fileanalysis.write("number of even = odd : ")
fileanalysis.write(str(evenequalodd))
fileanalysis.write(" percentage = ")
fileanalysis.write(str(percent*100))
fileanalysis.write("\n")

print('total number (oddgreatereven+evengreaterodd) =', oddgreatereven+evengreaterodd)
print('total number (evenequalodd) =', evenequalodd)
print('total number (number of rows) =', int(total)) 

fileanalysis.write("total number (oddgreatereven+evengreaterodd) = ")
fileanalysis.write(str(oddgreatereven+evengreaterodd))
fileanalysis.write("\n")
fileanalysis.write("total number (evenequalodd) = ")
fileanalysis.write(str(evenequalodd))
fileanalysis.write("\n")
fileanalysis.write("total number (number of rows) = ")
fileanalysis.write(str(int(total)))
fileanalysis.write("\n")

# if there are cases of even == odd; compute and prints such cases
if evenequalodd > 0:
	# create a new (temporary) column with difference even-odd
	df['even - odd'] = (df['mineven'] - df['minodd'])
	# Get list of index positions i.e. row & column of all occurrences of 0 in the dataframe
	listOfPositions = getIndexes(df, 0)
	print('Primes in which mineven==minodd (prints zero occurrences in mineven - minodd column) : ')
	print('*** recompute them with quadruple precision ***')
	for i in range(len(listOfPositions)):
		print(i+1, ': q = ', df['q'].iloc[listOfPositions[i][0]])
    # print (listOfPositions)
	# remove the temporary column
	del df['even - odd'] 


#checking for small values
checksmallvalues(flagsmall, eps, bound)	  

#checking for lower and upper bounds
checkbounds(flagbounds)	

	
# Check for possible wrong datas *******	
# number of  mineven > 1 for Minderlog
greater1 = len(df.loc[df.mineven > 1]) 
if greater1 > 0 :
	print(' *****  Possible wrong datas *******') 
	print('number of mineven > 1 :',greater1,'percentage =', percent*100)
 
# number of  odd > 1 for Minderlog
greater1 = len(df.loc[df.minodd > 1])
if greater1 > 0 :
	print(' *****  Possible wrong datas *******') 
	print('number of odd > 1 :',greater1,'percentage =', percent*100)	

	
print(' *****   Exported data frame for gnuplot   *****')
print(' *****   in file named: min-dati-totali-per-grafico.txt  *****')

df.to_csv (r'min-dati-totali-per-grafico.txt', sep='\t', index = False, header=True)


print(' *****   End analysis script   *****')
fileanalysis.write("*****   End analysis script   *****")
fileanalysis.close()