#
#   Copyright (C) 2020 Alessandro Languasco
#
import pandas as pd
import numpy as np 

# functions needed to create the normalised values
def f(x): 
    y = np.log(np.log(x))
    return y
    
# flagbounds = TRUE; find the q such that Mq>=ubound(q) and Mq<= lbound(q) 
flagbounds = True
#flagbounds = False
# functions for lower and upper bounds
def ubound(x): 
    y = 1.25* f(x)  # 1.25 = 32/25
    return y

def lbound(x): 
    y = 0.85*f(x)  # 0.85 = 17/20
    return y    

# function to Find indexes of an element in pandas dataframe
# from https://thispointer.com/python-find-indexes-of-an-element-in-pandas-dataframe/
# to be used at the bottom of the script to detect when  even == odd 

def getIndexes(dfObj, value):
    ''' Get index positions of value in dataframe i.e. dfObj.'''
 
    listOfPos = list()
    # Get bool dataframe with True at positions where the given value exists
    result = dfObj.isin([value])
    # Get list of columns that contains the value
    seriesObj = result.any()
    columnNames = list(seriesObj[seriesObj == True].index)
    # Iterate over list of columns and fetch the rows indexes where value exists
    for col in columnNames:
        rows = list(result[col][result[col] == True].index)
        for row in rows:
            listOfPos.append((row, col))
    # Return a list of tuples indicating the positions of value in the dataframe
    return listOfPos

# open output fileanalysis 
fileanalysis= open("../analysis.txt","w")

   
# function to check the lower and upper bounds; flagbounds is boolean;
# uses the previous locator function

def checkbounds(flagbounds):	
	if flagbounds :
	    ##  UPPER BOUND
		print('*** Detecting values Mq >= ubound(q) and saving them on file uexcept.txt') 
		fileanalysis.write("*** Detecting values Mq >= ubound(q) and saving them on file uexcept.txt")  
		fileanalysis.write("\n")
		# if there are cases of Mq >= ubound(q); compute and prints such cases 
		uboundexceptions = len(df.loc[(df['max'] >=  ubound(df['q']))])
		if uboundexceptions > 0 :
			print('*** There are n.',uboundexceptions,' primes for which Mq >= ubound(q)')  
			fileanalysis.write("*** There are n.")
			fileanalysis.write(str(uboundexceptions))
			fileanalysis.write(" primes for which Mq >= ubound(q) ") 
			fileanalysis.write("\n")
			df['uboundexcep'] = (df['max'] >= ubound(df['q']))
			listOfPositions = getIndexes(df, True) 
			fileuexcept= open("uexcept.txt","w")
			for i in range(len(listOfPositions)):
				#print(i+1, ': q = ', df['q'].iloc[listOfPositions[i][0]])
				fileuexcept.write(str(df['q'].iloc[listOfPositions[i][0]]))
				fileuexcept.write("\n")
				#print(df['q'].iloc[listOfPositions[i][0]])
    			# print (listOfPositions)
			# remove the temporary column
			del df['uboundexcep']
			fileuexcept.close()  
		else:
			print(' No primes for which Mq >= ubound(q) detected') 
			fileanalysis.write(" No primes for which Mq >= ubound(q) detected ") 
			fileanalysis.write("\n")
		##  LOWER BOUND
		print('*** Detecting values Mq <= lbound(q) and saving them on file lexcept.txt') 
		fileanalysis.write("*** Detecting values Mq <= lbound(q)  and saving them on file lexcept.txt")  
		fileanalysis.write("\n")
		# if there are cases of Mq <= lbound(q); compute and prints such cases 
		lboundexceptions = len(df.loc[(df['max'] <=  lbound(df['q']))])
		if lboundexceptions > 0 :
			print('*** There are n.',lboundexceptions,' primes for which Mq <= lbound(q)')  
			fileanalysis.write("*** There are n.")
			fileanalysis.write(str(lboundexceptions))
			fileanalysis.write(" primes for which Mq <= lbound(q) ") 
			fileanalysis.write("\n")
			df['lboundexcep'] = (df['max'] <= lbound(df['q']))
			listOfPositions = getIndexes(df, True)  
			filelexcept= open("lexcept.txt","w")
			for i in range(len(listOfPositions)):
				#print(i+1, ': q = ', df['q'].iloc[listOfPositions[i][0]])
				filelexcept.write(str(df['q'].iloc[listOfPositions[i][0]]))
				filelexcept.write("\n")
				#print(df['q'].iloc[listOfPositions[i][0]])
				#print (listOfPositions)
			# remove the temporary column
			del df['lboundexcep']
			filelexcept.close()  
		else:
			print(' No primes for which Mq <= lbound(q) detected') 
			fileanalysis.write(" No primes for which Mq <= lbound(q) detected ") 
			fileanalysis.write("\n") 
	else:
		print('*** Not checking lower and upper bounds')
		fileanalysis.write("*** Not checking lower and upper bounds ") 
		fileanalysis.write("\n")
		

# input data from csv fileanalysis    
data=pd.read_csv(r'maxderlog.csv', sep=';',dtype='str')
data.head(4)
df = pd.DataFrame(data, columns= ['q','maxeven','maxodd','max'])
df = df.astype('float128')

# create a new column with normalised max values
df['norm'] = (df['max'] / f(df['q']) )

#print (df)

#FINDING MAX AND MIN and where they are attained
#M=df['min'].max()
#idxM=df['min'].idxmax()
#argM=df['q'].iloc[idxM]
#m=df['min'].min()
#idxm=df['min'].idxmin()
#argm=df['q'].iloc[idxm]

#print('minimal value on column min =',m,'attained at q =',argm)
#print('maximal value on column min =',M,'attained at q =',argM)


#FINDING MAX and second MAX; MIN and second MIN; and where they are attained

print(' *****   Statistics for Max derlog L  *****')
print('name source fileanalysis = maxderlog.csv')

fileanalysis.write(" *****   Statistics for Max derlog L  ***** ")
fileanalysis.write("\n")
fileanalysis.write("name source fileanalysis = maxderlog.csv")
fileanalysis.write("\n")

print(' *****   MAX   *****')
 
#maxima =df['max'].nlargest(2)
#M=maxima.iloc[0]
#idxM=maxima.idxmax()
#argM=df['q'].iloc[idxM]
#M1=maxima.iloc[1]
#idxM1=maxima.idxmin()
#argM1=df['q'].iloc[idxM1]

maxima =df.nlargest(4, 'max', keep = 'all') 
M=maxima['max'].iloc[0] 
idxM=maxima['q'].iloc[0] 
M1=maxima['max'].iloc[1] 
idxM1=maxima['q'].iloc[1] 
M2=maxima['max'].iloc[2] 
idxM2=maxima['q'].iloc[2] 
M3=maxima['max'].iloc[3] 
idxM3=maxima['q'].iloc[3]  

print('maximal value on column max =','{0:.16f}'.format(M),'attained at q =',idxM)
print('second maximal value on column max =','{0:.16f}'.format(M1),'attained at q =',idxM1)
print('third minimal value on column max =','{0:.16f}'.format(M2),'attained at q =',idxM2)
print('fourth minimal value on column max =','{0:.16f}'.format(M3),'attained at q =',idxM3)

fileanalysis.write("  *****   MAX   *****\n")
fileanalysis.write("maximal value on column max = ")
fileanalysis.write(str('{0:.16f}'.format(M)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(idxM))
fileanalysis.write("\n")
fileanalysis.write("second maximal value on column max = ")
fileanalysis.write(str('{0:.16f}'.format(M1)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(idxM1))
fileanalysis.write("\n")
fileanalysis.write("third maximal value on column max = ")
fileanalysis.write(str('{0:.16f}'.format(M2)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(idxM2))
fileanalysis.write("\n")
fileanalysis.write("fourth maximal value on column max = ")
fileanalysis.write(str('{0:.16f}'.format(M3)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(idxM3))
fileanalysis.write("\n")
 

print(' *****   MIN   *****') 
#minima =df['max'].nsmallest(2)
#m=minima.iloc[0]
#idxm=minima.idxmin()
#argm=df['q'].iloc[idxm]
#m1=minima.iloc[1]
#idxm1=minima.idxmax()
#argm1=df['q'].iloc[idxm1]
#print('minimal value on column max =','{0:.16f}'.format(m),'attained at q =',argm)
#print('second minimal value on column max =','{0:.16f}'.format(m1),'attained at q =',argm1)


minima =df.nsmallest(4, 'max', keep = 'all') 
#print(minima)
#minima.to_csv (r'minima.csv', sep=';', index = False, header=True)
m=minima['max'].iloc[0] 
idxm=minima['q'].iloc[0] 
m1=minima['max'].iloc[1] 
idxm1=minima['q'].iloc[1] 
m2=minima['max'].iloc[2] 
idxm2=minima['q'].iloc[2] 
m3=minima['max'].iloc[3] 
idxm3=minima['q'].iloc[3]  

print('minimal value on column max =','{0:.16f}'.format(m),'attained at q =',idxm)
print('second minimal value on column max =','{0:.16f}'.format(m1),'attained at q =',idxm1)
print('third minimal value on column max =','{0:.16f}'.format(m2),'attained at q =',idxm2)
print('fourth minimal value on column max =','{0:.16f}'.format(m3),'attained at q =',idxm3)

fileanalysis.write("  *****   MIN   *****\n")
fileanalysis.write("minimal value on column max = ")
fileanalysis.write(str('{0:.16f}'.format(m)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(idxm))
fileanalysis.write("\n")
fileanalysis.write("second minimal value on column max = ")
fileanalysis.write(str('{0:.16f}'.format(m1)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(idxm1))
fileanalysis.write("\n") 
fileanalysis.write("third minimal value on column max = ")
fileanalysis.write(str('{0:.16f}'.format(m2)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(idxm2))
fileanalysis.write("\n") 
fileanalysis.write("fourth minimal value on column max = ")
fileanalysis.write(str('{0:.16f}'.format(m3)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(idxm3))
fileanalysis.write("\n") 

print(' *****   MAX normalised values max/f(q) *****')
 
maxima =df['norm'].nlargest(2)
M=maxima.iloc[0]
idxM=maxima.idxmax()
argM=df['q'].iloc[idxM]
M1=maxima.iloc[1]
idxM1=maxima.idxmin()
argM1=df['q'].iloc[idxM1]
print('maximal value on column norm =','{0:.16f}'.format(M),'attained at q =',argM)
print('second maximal value on column norm =','{0:.16f}'.format(M1),'attained at q =',argM1)

fileanalysis.write("  *****   MAX  normalised values max/log(log(q)) *****\n")
fileanalysis.write("maximal value on column norm = ")
fileanalysis.write(str('{0:.16f}'.format(M)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(argM))
fileanalysis.write("\n")
fileanalysis.write("second maximal value on column norm = ")
fileanalysis.write(str('{0:.16f}'.format(M1)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(argM1))
fileanalysis.write("\n")
 

print(' *****   MIN normalised values max/log(log(q)) *****') 
minima =df['norm'].nsmallest(2)
m=minima.iloc[0]
idxm=minima.idxmin()
argm=df['q'].iloc[idxm]
m1=minima.iloc[1]
idxm1=minima.idxmax()
argm1=df['q'].iloc[idxm1]
print('minimal value on column norm =','{0:.16f}'.format(m),'attained at q =',argm)
print('second minimal value on column norm =','{0:.16f}'.format(m1),'attained at q =',argm1)

fileanalysis.write("  *****   Min normalised values max/f(q) *****\n")
fileanalysis.write("minimal value on column norm = ")
fileanalysis.write(str('{0:.16f}'.format(m)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(argm))
fileanalysis.write("\n")
fileanalysis.write("second minimal value on column norm = ")
fileanalysis.write(str('{0:.16f}'.format(m1)))
fileanalysis.write(" attained at q = ")
fileanalysis.write(str(argm1))
fileanalysis.write("\n")

print(' *****   STATS Max derlog L   *****')

# number of  odd > even for Max derlog L 
total=float(len(df))
oddgreatereven = len(df.loc[df.maxodd > df.maxeven])
percent=float(oddgreatereven)/total
print('number of odd > even :', oddgreatereven,'percentage =', percent*100)

fileanalysis.write("  *****   STATS MAx derlog L   *****\n")
fileanalysis.write("number of odd > even : ")
fileanalysis.write(str(oddgreatereven))
fileanalysis.write(" percentage = ")
fileanalysis.write(str(percent*100))
fileanalysis.write("\n")

# number of  even > odd for Max derlog L 
evengreaterodd = len(df.loc[df.maxeven > df.maxodd]) 
percent=float(evengreaterodd)/total
print('number of even > odd :',evengreaterodd,'percentage =', percent*100)

fileanalysis.write("number of even > odd : ")
fileanalysis.write(str(evengreaterodd))
fileanalysis.write(" percentage = ")
fileanalysis.write(str(percent*100))
fileanalysis.write("\n")

# number of  even = odd for Max derlog L 
evenequalodd = len(df.loc[df.maxeven == df.maxodd]) 
percent=float(evenequalodd)/total
print('number of even = odd :',evenequalodd,'percentage =', percent*100)

fileanalysis.write("number of even = odd : ")
fileanalysis.write(str(evenequalodd))
fileanalysis.write(" percentage = ")
fileanalysis.write(str(percent*100))
fileanalysis.write("\n")

print('total number (oddgreatereven+evengreaterodd) =', oddgreatereven+evengreaterodd)
print('total number (evenequalodd) =', evenequalodd)
print('total number (number of rows) =', int(total)) 

fileanalysis.write("total number (oddgreatereven+evengreaterodd) = ")
fileanalysis.write(str(oddgreatereven+evengreaterodd))
fileanalysis.write("\n")
fileanalysis.write("total number (evenequalodd) = ")
fileanalysis.write(str(evenequalodd))
fileanalysis.write("\n")
fileanalysis.write("total number (number of rows) = ")
fileanalysis.write(str(int(total)))
fileanalysis.write("\n")


# if there are cases of even == odd; compute and prints such cases
if evenequalodd > 0:
	# create a new (temporary) column with difference even-odd
	df['even - odd'] = (df['maxeven'] - df['maxodd'])
	# Get list of index positions i.e. row & column of all occurrences of 0 in the dataframe
	listOfPositions = getIndexes(df, 0)
	print('Primes in which even==odd (prints zero occurrences in even - odd column) : ')
	print('*** recompute them with quadruple precision ***')
	for i in range(len(listOfPositions)):
		print(i+1, ': q = ', df['q'].iloc[listOfPositions[i][0]])
    # print (listOfPositions)
	# remove the temporary column
	del df['even - odd'] 


#checking for lower and upper bounds
checkbounds(flagbounds)	
	
# number of  even > 5 for Maxderlog
greater1 = len(df.loc[df.maxeven > 5]) 
if greater1 > 0 :
	print(' *****  Possible wrong datas *******') 
	print('number of even > 5 :',greater1,'percentage =', percent*100)
 
# number of  odd > 5 for Maxderlog
greater1 = len(df.loc[df.maxodd > 5])
if greater1 > 0 :
	print(' *****  Possible wrong datas *******') 
	print('number of odd > 5 :',greater1,'percentage =', percent*100)	


print(' *****   Exported data frame for gnuplot   *****')
print(' *****   in fileanalysis named: max-dati-totali-per-grafico.txt  *****')

df.to_csv (r'max-dati-totali-per-grafico.txt', sep='\t', index = False, header=True)


print(' *****   End analysis script   *****')
fileanalysis.write("*****   End analysis script   *****")
fileanalysis.close()