# # Copyright (C) 2020 Alessandro Languasco # import pandas as pd import numpy as np # flagsmall = TRUE; find the q>bound such that mq=ubound(q) and mq<= lbound(q) flagbounds = True #flagbounds = False # functions for lower and upper bounds def ubound(x): y = np.log(np.log(x))/np.log(x) return y def lbound(x): y = 0.105/x # 0.105=21/200 return y # function to Find indexes of an element in pandas dataframe # from https://thispointer.com/python-find-indexes-of-an-element-in-pandas-dataframe/ # to be used at the bottom of the script to detect when mineven == minodd and when there are small values def getIndexes(dfObj, value): ''' Get index positions of value in dataframe i.e. dfObj.''' listOfPos = list() # Get bool dataframe with True at positions where the given value exists result = dfObj.isin([value]) # Get list of columns that contains the value seriesObj = result.any() columnNames = list(seriesObj[seriesObj == True].index) # Iterate over list of columns and fetch the rows indexes where value exists for col in columnNames: rows = list(result[col][result[col] == True].index) for row in rows: listOfPos.append((row, col)) # Return a list of tuples indicating the positions of value in the dataframe return listOfPos # function to check for small values; flagsmall is boolean; eps is the bound # uses the previous locator function def checksmallvalues(flagsmall, eps, bound): if flagsmall : print('*** Detecting values mq <', eps, 'and q > ', bound, ' and saving them on file recompute.txt') fileanalysis.write("*** Detecting values mq < ") fileanalysis.write(str(eps)) fileanalysis.write(" and q > ") fileanalysis.write(str(bound)) fileanalysis.write("\n") fileanalysis.write("*** and saving them on file recompute.txt ") fileanalysis.write("\n") # if there are cases of min < eps; compute and prints such cases # to be recomputed with quadruple precision smallvalues = len(df.loc[(df['min'] > 0) & (df['min'] < eps) & (df['q']> bound)]) if smallvalues > 0 : print('*** There are n.',smallvalues,' primes in which mq < ', eps, ' and q >', bound) print('*** recompute them with quadruple precision ***') fileanalysis.write("*** There are n.") fileanalysis.write(str(smallvalues)) fileanalysis.write(" primes in which mq < ") fileanalysis.write(str(eps)) fileanalysis.write(" and q > ") fileanalysis.write(str(bound)) fileanalysis.write("\n") fileanalysis.write("*** recompute them with quadruple precision ***") fileanalysis.write("\n") df['small'] = (df['min'] > 0) & (df['min'] < eps) #print(df['small']) listOfPositions = getIndexes(df, True) filerecomp= open("recompute.txt","w") for i in range(len(listOfPositions)): #print(i+1, ': q = ', df['q'].iloc[listOfPositions[i][0]]) filerecomp.write(str(df['q'].iloc[listOfPositions[i][0]])) filerecomp.write("\n") #print(df['q'].iloc[listOfPositions[i][0]]) # print (listOfPositions) # remove the temporary column del df['small'] filerecomp.close() else: print(' No values mq <', eps, 'with q>', bound, 'detected') fileanalysis.write(" No values mq < ") fileanalysis.write(str(eps)) fileanalysis.write(" with q> ") fileanalysis.write(str(bound)) fileanalysis.write(" detected") fileanalysis.write("\n") else: print('*** Not detecting values <', eps) fileanalysis.write("*** Not detecting values < ") fileanalysis.write(str(eps)) fileanalysis.write("\n") # function to check the lower and upper bounds; flagbounds is boolean; # uses the previous locator function def checkbounds(flagbounds): if flagbounds : ## UPPER BOUND print('*** Detecting values mq >= ubound(q) and saving them on file uexcept.txt') fileanalysis.write("*** Detecting values mq >= ubound(q) and saving them on file uexcept.txt") fileanalysis.write("\n") # if there are cases of mq >= ubound(q); compute and prints such cases uboundexceptions = len(df.loc[(df['min'] >= ubound(df['q']))]) if uboundexceptions > 0 : print('*** There are n.',uboundexceptions,' primes for which mq >= ubound(q)') fileanalysis.write("*** There are n.") fileanalysis.write(str(uboundexceptions)) fileanalysis.write(" primes for which mq >= ubound(q) ") fileanalysis.write("\n") df['uboundexcep'] = (df['min'] >= ubound(df['q'])) listOfPositions = getIndexes(df, True) fileuexcept= open("uexcept.txt","w") for i in range(len(listOfPositions)): #print(i+1, ': q = ', df['q'].iloc[listOfPositions[i][0]]) fileuexcept.write(str(df['q'].iloc[listOfPositions[i][0]])) fileuexcept.write("\n") #print(df['q'].iloc[listOfPositions[i][0]]) # print (listOfPositions) # remove the temporary column del df['uboundexcep'] fileuexcept.close() else: print(' No primes for which mq >= ubound(q) detected') fileanalysis.write(" No primes for which mq >= ubound(q) detected ") fileanalysis.write("\n") ## LOWER BOUND print('*** Detecting values mq <= lbound(q) and saving them on file lexcept.txt') fileanalysis.write("*** Detecting values mq <= lbound(q) and saving them on file lexcept.txt") fileanalysis.write("\n") # if there are cases of mq <= lbound(q); compute and prints such cases lboundexceptions = len(df.loc[(df['min'] <= lbound(df['q']))]) if lboundexceptions > 0 : print('*** There are n.',lboundexceptions,' primes for which mq <= lbound(q)') fileanalysis.write("*** There are n.") fileanalysis.write(str(lboundexceptions)) fileanalysis.write(" primes for which mq <= lbound(q) ") fileanalysis.write("\n") df['lboundexcep'] = (df['min'] <= lbound(df['q'])) listOfPositions = getIndexes(df, True) filelexcept= open("lexcept.txt","w") for i in range(len(listOfPositions)): #print(i+1, ': q = ', df['q'].iloc[listOfPositions[i][0]]) filelexcept.write(str(df['q'].iloc[listOfPositions[i][0]])) filelexcept.write("\n") #print(df['q'].iloc[listOfPositions[i][0]]) #print (listOfPositions) # remove the temporary column del df['lboundexcep'] filelexcept.close() else: print(' No primes for which mq <= lbound(q) detected') fileanalysis.write(" No primes for which mq <= lbound(q) detected ") fileanalysis.write("\n") else: print('*** Not checking lower and upper bounds') fileanalysis.write("*** Not checking lower and upper bounds ") fileanalysis.write("\n") # open output file fileanalysis= open("analysis_merged.txt","w") # input data from csv file data=pd.read_csv(r'minderlog-merged.csv', sep=';',dtype='str') data.head(4) df = pd.DataFrame(data, columns= ['q','mineven','minodd','min']) df = df.astype('float128') #df['q'] = df.astype('int32') #print (df) #FINDING MAX AND MIN and where they are attained #M=df['min'].max() #idxM=df['min'].idxmax() #argM=df['q'].iloc[idxM] #m=df['min'].min() #idxm=df['min'].idxmin() #argm=df['q'].iloc[idxm] #print('minimal value on column min =',m,'attained at q =',argm) #print('maximal value on column min =',M,'attained at q =',argM) #FINDING MAX and second MAX; MIN and second MIN; and where they are attained print(' ***** Statistics for Min derlog L *****') print('name source file = minderlog.csv') fileanalysis.write(" ***** Statistics for Min derlog L ***** ") fileanalysis.write("\n") fileanalysis.write("name source file = minderlog.csv") fileanalysis.write("\n") print(' ***** MAX *****') maxima =df['min'].nlargest(2) M=maxima.iloc[0] idxM=maxima.idxmax() argM=df['q'].iloc[idxM] M1=maxima.iloc[1] idxM1=maxima.idxmin() argM1=df['q'].iloc[idxM1] print('maximal value on column min =','{0:.16f}'.format(M),'attained at q =',argM) print('second maximal value on column min =','{0:.16f}'.format(M1),'attained at q =',argM1) fileanalysis.write(" ***** MAX *****\n") fileanalysis.write("maximal value on column min = ") fileanalysis.write(str('{0:.16f}'.format(M))) fileanalysis.write(" attained at q = ") fileanalysis.write(str(argM)) fileanalysis.write("\n") fileanalysis.write("second maximal value on column min = ") fileanalysis.write(str('{0:.16f}'.format(M1))) fileanalysis.write(" attained at q = ") fileanalysis.write(str(argM1)) fileanalysis.write("\n") print(' ***** MIN *****') minima =df['min'].nsmallest(2) m=minima.iloc[0] idxm=minima.idxmin() argm=df['q'].iloc[idxm] m1=minima.iloc[1] idxm1=minima.idxmax() argm1=df['q'].iloc[idxm1] print('minimal value on column min =','{0:.16f}'.format(m),'attained at q =',argm) print('second minimal value on column min =','{0:.16f}'.format(m1),'attained at q =',argm1) fileanalysis.write(" ***** MIN *****\n") fileanalysis.write("minimal value on column min = ") fileanalysis.write(str('{0:.16f}'.format(m))) fileanalysis.write(" attained at q = ") fileanalysis.write(str(argm)) fileanalysis.write("\n") fileanalysis.write("second minimal value on column min = ") fileanalysis.write(str('{0:.16f}'.format(m1))) fileanalysis.write(" attained at q = ") fileanalysis.write(str(argm1)) fileanalysis.write("\n") # create a new column with normalised min values df['norm'] = (df['min'] / lbound(df['q']) ) print(' ***** MAX normalised values min/lbound(q) *****') maxima =df['norm'].nlargest(2) M=maxima.iloc[0] idxM=maxima.idxmax() argM=df['q'].iloc[idxM] M1=maxima.iloc[1] idxM1=maxima.idxmin() argM1=df['q'].iloc[idxM1] print('maximal value on column norm =','{0:.16f}'.format(M),'attained at q =',argM) print('second maximal value on column norm =','{0:.16f}'.format(M1),'attained at q =',argM1) fileanalysis.write(" ***** MAX normalised values min/lbound(q) *****\n") fileanalysis.write("maximal value on column norm = ") fileanalysis.write(str('{0:.16f}'.format(M))) fileanalysis.write(" attained at q = ") fileanalysis.write(str(argM)) fileanalysis.write("\n") fileanalysis.write("second maximal value on column norm = ") fileanalysis.write(str('{0:.16f}'.format(M1))) fileanalysis.write(" attained at q = ") fileanalysis.write(str(argM1)) fileanalysis.write("\n") print(' ***** MIN normalised values min/lbound(q) *****') minima =df['norm'].nsmallest(2) m=minima.iloc[0] idxm=minima.idxmin() argm=df['q'].iloc[idxm] m1=minima.iloc[1] idxm1=minima.idxmax() argm1=df['q'].iloc[idxm1] print('minimal value on column norm =','{0:.16f}'.format(m),'attained at q =',argm) print('second minimal value on column norm =','{0:.16f}'.format(m1),'attained at q =',argm1) fileanalysis.write(" ***** Min normalised values min/lbound(q) *****\n") fileanalysis.write("minimal value on column norm = ") fileanalysis.write(str('{0:.16f}'.format(m))) fileanalysis.write(" attained at q = ") fileanalysis.write(str(argm)) fileanalysis.write("\n") fileanalysis.write("second minimal value on column norm = ") fileanalysis.write(str('{0:.16f}'.format(m1))) fileanalysis.write(" attained at q = ") fileanalysis.write(str(argm1)) fileanalysis.write("\n") del df['norm'] print(' ***** STATS Min derlog L *****') # number of minodd > mineven for Min derlog L total=float(len(df)) oddgreatereven = len(df.loc[df.minodd > df.mineven]) percent=float(oddgreatereven)/total print('number of odd > even :', oddgreatereven,'percentage =', percent*100) fileanalysis.write(" ***** STATS Min derlog L *****\n") fileanalysis.write("number of odd > even : ") fileanalysis.write(str(oddgreatereven)) fileanalysis.write(" percentage = ") fileanalysis.write(str(percent*100)) fileanalysis.write("\n") # number of even > odd for Min derlog L evengreaterodd = len(df.loc[df.mineven > df.minodd]) percent=float(evengreaterodd)/total print('number of even > odd :',evengreaterodd,'percentage =', percent*100) fileanalysis.write("number of even > odd : ") fileanalysis.write(str(evengreaterodd)) fileanalysis.write(" percentage = ") fileanalysis.write(str(percent*100)) fileanalysis.write("\n") # number of even = odd for Min derlog L evenequalodd = len(df.loc[df.mineven == df.minodd]) percent=float(evenequalodd)/total print('number of even = odd :',evenequalodd,'percentage =', percent*100) fileanalysis.write("number of even = odd : ") fileanalysis.write(str(evenequalodd)) fileanalysis.write(" percentage = ") fileanalysis.write(str(percent*100)) fileanalysis.write("\n") print('total number (oddgreatereven+evengreaterodd) =', oddgreatereven+evengreaterodd) print('total number (evenequalodd) =', evenequalodd) print('total number (number of rows) =', int(total)) fileanalysis.write("total number (oddgreatereven+evengreaterodd) = ") fileanalysis.write(str(oddgreatereven+evengreaterodd)) fileanalysis.write("\n") fileanalysis.write("total number (evenequalodd) = ") fileanalysis.write(str(evenequalodd)) fileanalysis.write("\n") fileanalysis.write("total number (number of rows) = ") fileanalysis.write(str(int(total))) fileanalysis.write("\n") # if there are cases of even == odd; compute and prints such cases if evenequalodd > 0: # create a new (temporary) column with difference even-odd df['even - odd'] = (df['mineven'] - df['minodd']) # Get list of index positions i.e. row & column of all occurrences of 0 in the dataframe listOfPositions = getIndexes(df, 0) print('Primes in which mineven==minodd (prints zero occurrences in mineven - minodd column) : ') print('*** recompute them with quadruple precision ***') for i in range(len(listOfPositions)): print(i+1, ': q = ', df['q'].iloc[listOfPositions[i][0]]) # print (listOfPositions) # remove the temporary column del df['even - odd'] #checking for small values checksmallvalues(flagsmall, eps, bound) #checking for lower and upper bounds checkbounds(flagbounds) # Check for possible wrong datas ******* # number of mineven > 1 for Minderlog greater1 = len(df.loc[df.mineven > 1]) if greater1 > 0 : print(' ***** Possible wrong datas *******') print('number of mineven > 1 :',greater1,'percentage =', percent*100) # number of odd > 1 for Minderlog greater1 = len(df.loc[df.minodd > 1]) if greater1 > 0 : print(' ***** Possible wrong datas *******') print('number of odd > 1 :',greater1,'percentage =', percent*100) print(' ***** Exported data frame for gnuplot *****') print(' ***** in file named: min-dati-totali-per-grafico.txt *****') df.to_csv (r'min-dati-totali-per-grafico.txt', sep='\t', index = False, header=True) print(' ***** End analysis script *****') fileanalysis.write("***** End analysis script *****") fileanalysis.close()