# load a set of common macros instructions # # Copyright (C) 2024 Alessandro Languasco # import sys import pandas as pd import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import matplotlib.animation as animation import scipy as sp from scipy.stats import norm import sympy as sy import humanfriendly as hu from humanfriendly import format_timespan import time from datetime import datetime now = datetime.now() # dd/mm/YY H:M:S dt_string = now.strftime("%d/%m/%Y %H:%M:%S") # use Latex for the captions and titles of matplotlib plt.rcParams['text.usetex'] = True plt.rcParams.update({ "text.usetex": True, "font.family": "mathptmx" }) plt.rc('text.latex', preamble=r'\usepackage{amssymb}') # prints 20 digits in pandas dataframes pd.set_option("display.precision", 20) print('--------------------------------------------------------------------------------') print(' Author of the Script: Alessandro Languasco; (C) 2023') print('--------------------------------------------------------------------------------') print(' Versions: ') print('python: {}'.format(sys.version)) print('matplotlib: {}'.format(mpl.__version__)) print('pandas: {}'.format(pd.__version__)) print('numpy: {}'.format(np.__version__)) print('scipy: {}'.format(sp.__version__)) print('sympy: {}'.format(sy.__version__)) print('humanfriendly: {}'.format(hu.__version__)) print('--------------------') print("date and time =", dt_string) print('--------------------') # function to Find indexes of an element in pandas dataframe # from https://thispointer.com/python-find-indexes-of-an-element-in-pandas-dataframe/ # to be used at the bottom of the script to detect when mineven == minodd and when there are small values def getIndexes(dfObj, value): ''' Get index positions of value in dataframe i.e. dfObj.''' listOfPos = list() # Get bool dataframe with True at positions where the given value exists result = dfObj.isin([value]) # Get list of columns that contains the value seriesObj = result.any() columnNames = list(seriesObj[seriesObj == True].index) # Iterate over list of columns and fetch the rows indexes where value exists for col in columnNames: rows = list(result[col][result[col] == True].index) for row in rows: listOfPos.append((row, col)) # Return a list of tuples indicating the positions of value in the dataframe return listOfPos # functions needed to create the normalised values def sqrt(x): y = np.sqrt(x) return y def exp(x): y = np.exp(x) return y def ln(x): y = np.log(x) return y def lnln(x): y = np.log(np.log(x)) return y def twothirdpow(x): expo = 2/(3.) y = np.power(x,expo) return y def threequarterpow(x): expo = 3/(4.) y = np.power(x,expo) return y start_time = time.monotonic() data=pd.read_csv(r"data/global-Kummer_h1.csv", sep=';',dtype='str') data.head(4) df = pd.DataFrame(data, columns= ['q','Kummer','logKummer','logh1']) # Kummer : = R(q) # logKummer = r(q): = r(q) #df = df.astype('float128') df = df.astype('float64') df = df.astype({"q": int}) # reads the list of Sophie Germain primes;first element data=pd.read_csv(r"data/sophie_germain_primes_first.csv", sep=';') data.head(1) dfaux = pd.DataFrame(data, columns= ['q']) #df = df.astype('float128') dfaux = dfaux.astype('int') dfgermain_first= df.merge(dfaux, on='q') del(dfgermain_first['Kummer']) del(dfgermain_first['logh1']) del (dfaux) # reads the list of p such that (p,2p-1) are both primes data=pd.read_csv(r"data/twiceprimeminusone_first.csv", sep=';') data.head(1) dfaux = pd.DataFrame(data, columns= ['q']) #df = df.astype('float128') dfaux = dfaux.astype('int') dftwicepminusone_first = df.merge(dfaux, on='q') del(dftwicepminusone_first['Kummer']) del(dftwicepminusone_first['logh1']) del(dfaux) # reads the list of p such that neither (p,2p+1) and (p,2p-1) are primes data=pd.read_csv(r"data/altri.csv", sep=';') data.head(1) dfaux = pd.DataFrame(data, columns= ['q']) #df = df.astype('float128') dfaux = dfaux.astype('int') dfothers = df.merge(dfaux, on='q') del(dfothers['Kummer']) del(dfothers['logh1']) del(dfaux) # reads the list of p such that neither (p,2p+1), (p,2p-1), (p,4p+1), (p,4p-1) are primes data=pd.read_csv(r"data/altri_altri.csv", sep=';') data.head(1) dfaux = pd.DataFrame(data, columns= ['q']) #df = df.astype('float128') dfaux = dfaux.astype('int') dfothers_others = df.merge(dfaux, on='q') del(dfothers_others['Kummer']) del(dfothers_others['logh1']) del(dfaux) # reads the list of p such that (p,4p-1) are both primes data=pd.read_csv(r"data/fourtimesprimeminusone_first.csv", sep=';') data.head(1) dfaux = pd.DataFrame(data, columns= ['q']) #df = df.astype('float128') dfaux = dfaux.astype('int') dffourtimespminusone_first = df.merge(dfaux, on='q') del(dffourtimespminusone_first['Kummer']) del(dffourtimespminusone_first['logh1']) del(dfaux) # reads the list of p such that (p,4p+1) are both primes data=pd.read_csv(r"data/fourtimesprimeplusone_first.csv", sep=';') data.head(1) dfaux = pd.DataFrame(data, columns= ['q']) #df = df.astype('float128') dfaux = dfaux.astype('int') dffourtimespplusone_first = df.merge(dfaux, on='q') del(dffourtimespplusone_first['Kummer']) del(dffourtimespplusone_first['logh1']) del(dfaux) #FINDING MAX and second MAX; MIN and second MIN; and where they are attained print(' ***** Statistics for R(q) *****') print('name source fileanalysis = global-Kummer_h1.csv') print(' ***** MAX *****') maxima =df.nlargest(10, 'Kummer', keep = 'all') M=maxima['Kummer'].iloc[0] argM=maxima['q'].iloc[0] M1=maxima['Kummer'].iloc[1] argM1=maxima['q'].iloc[1] print('maximal value on column R(q) =',M,'attained at q =',argM) print('second maximal value on column R(q) =',M1,'attained at q =',argM1) print(maxima) del maxima print(' ***** MIN *****') minima =df.nsmallest(10, 'Kummer', keep = 'all') m=minima['Kummer'].iloc[0] argm=minima['q'].iloc[0] m1=minima['Kummer'].iloc[1] argm1=minima['q'].iloc[1] print('minimal value on column R(q) =',m,'attained at q =',argm) print('second minimal value on column R(q) =',m1,'attained at q =',argm1) print(minima) del minima #FINDING MAX and second MAX; MIN and second MIN; and where they are attained print(' ***** Statistics for logR(q) *****') print('name source fileanalysis = global-Kummer_h1.csv') print(' ***** MAX *****') maxima =df.nlargest(10, 'logKummer', keep = 'all') M=maxima['logKummer'].iloc[0] argM=maxima['q'].iloc[0] M1=maxima['logKummer'].iloc[1] argM1=maxima['q'].iloc[1] print('maximal value on column r(q) =',M,'attained at q =',argM) print('second maximal value on column r(q) =',M1,'attained at q =',argM1) print(maxima) del maxima print(' ***** MIN *****') minima =df.nsmallest(10, 'logKummer', keep = 'all') m=minima['logKummer'].iloc[0] argm=minima['q'].iloc[0] m1=minima['logKummer'].iloc[1] argm1=minima['q'].iloc[1] print('minimal value on column r(q) =',m,'attained at q =',argm) print('second minimal value on column r(q) =',m1,'attained at q =',argm1) print(minima) del minima print(' ***** STATS R(q) *****') # number of values >1 for rq total=float(len(df)) greater1 = len(df['Kummer'].loc[df.Kummer > 1]) percent=float(greater1)/total print('number of R(q)>1 :', greater1,'percentage =', percent*100) # number of values <1 for rq less1 = len(df['Kummer'].loc[df.Kummer < 1]) percent=float(less1)/total print('number of R(q)<1 :',less1,'percentage =', percent*100) print('total number (less1+greater1) =', less1+greater1) print('total number (number of rows) =', int(total)) # number of R(q) > 2 greater2= len(df.loc[df.Kummer > 2]) if greater2 > 0 : print(' ***** Possible wrong datas *******') print('number of R(q) > 2 :',greater2,'percentage =', percent*100) # number of R(q) <0.5 greater2= len(df.loc[df.Kummer < 0.5]) if greater2 > 0 : print(' ***** Possible wrong datas *******') print('number of R(q) <0.5 :',greater2,'percentage =', percent*100) print('-----------------------------------------------------') print('Bump analysis on normalised values r(q):') print('-------------') print('Central bump:') dfclose = df['logKummer'].loc[ np.abs(df.logKummer) < 0.125] close = len(dfclose) central_bump = close percent = float(close)/total print('number of | r(q) | < 1/8 :', close,'; percentage =', percent*100) #print('list of such values:') #print(dfclose) del(dfclose) print('-------------') print('Bump in 0.25:') dfclose = df['logKummer'].loc[ (np.abs(df.logKummer - 0.25) < 0.125) ] close = len(dfclose) right_secondary_bump = close percent = float(close)/total print('number of | r(q) - 0.25 | < 1/8 :', close,'; percentage =', percent*100) #print('list of such values:') #print(dfclose) del(dfclose) print('-------------') print('Bump in -0.25:') dfclose = df['logKummer'].loc[ (np.abs(df.logKummer + 0.25) < 0.125) ] close = len(dfclose) left_secondary_bump = close percent = float(close)/total print('number of | r(q) + 0.25 | < 1/8 :', close,'; percentage =', percent*100) #print('list of such values:') #print(dfclose) del(dfclose) print('-------------') print('Right tail:') dfclose = df['logKummer'].loc[df.logKummer >= 0.375 ] close = len(dfclose) right_tail = close percent = float(close)/total print('number of r(q) >= 3/8 :', close,'; percentage =', percent*100) #print('list of such values:') #print(dfclose) del(dfclose) print('-------------') print('Left tail:') dfclose = df['logKummer'].loc[ df.logKummer <= -0.375 ] close = len(dfclose) left_tail = close percent = float(close)/total print('number of r(q) <= -3/8 :', close,'; percentage =', percent*100) #print('list of such values:') #print(dfclose) del(dfclose) print('-------------') print('Bumps analysis r(q) (summary):') print('number of left sec bump:', left_secondary_bump,'; percentage =', float(left_secondary_bump)/total*100) print('number of right sec bump:', right_secondary_bump,'; percentage =', float(right_secondary_bump)/total*100) print('left tail:', central_bump,'; percentage =', float(left_tail)/total*100) print('right tail:', central_bump,'; percentage =', float(right_tail)/total*100) print('secondary bumps:', left_secondary_bump+right_secondary_bump,'; percentage =', float(left_secondary_bump+right_secondary_bump)/total*100) print('central bump:', central_bump,'; percentage =', float(central_bump)/total*100) print('-----------------------------------------------------') print('******* MEAN, STANDARD DEVIATION, VARIANCE ********') #print(df.describe()) mean = df['logKummer'].mean() devst = df['logKummer'].std() variance = df.var()['logKummer'] print('mean (rq) = ',mean) print('standard deviation (rq) = ',devst) print('variance (rq) = ',variance) print('*******') print('********* plotting histogram and normal curves *********') #df = df.astype('float128') #mean = np.float128(mean) #devst = np.float128(devst) mean = np.float64(mean) devst = np.float64(devst) num_bins = 100 plt.grid(True, linestyle='--', axis = 'y') #plt.hist(df['logKummer'], num_bins, color = "skyblue", histtype='bar', ec='black', density="True") plt.hist(df['logKummer'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black', density="True") # Plot the Probability Density Function. plt.yticks([]) #xmin, xmax = plt.xlim() xmin, xmax = -0.5, 0.5 x = np.linspace(xmin, xmax, num_bins) bin_width = (x.max() - x.min()) / num_bins mass_correction = total * bin_width mass_correction = 1 print('******** black normal function data ******* ') devst_blue = 1.13*devst mass_correction_blue = mass_correction print('total data ', total) print('max x = ', x.max()) print('min x = ', x.min()) print('bin_width = ', bin_width) print('num bins = ', num_bins) print('mean = ', mean) print('standard deviation = ', devst) print('standard deviation black (1.13*devst) = ', devst_blue) print('mass correction = ', mass_correction) print('mass correction black (mass_correction) = ', mass_correction_blue) # Save a version without the density curves #title = "$r(q); q\le 10^7$: $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction) title = "$r(q); q\le 10^7$: $\sigma =$ %.3f" % (devst) plt.title(title) caption = " \par " caption = caption + " \par " caption = caption + " " plt.xlabel(caption) min_ylim, max_ylim = plt.ylim() plt.axvline(mean, color='r', linestyle='dashed', linewidth=1) plt.text(mean*1.2, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean)) plt.axvline(0.25, color='r', linestyle='dashed', linewidth=1) plt.text(0.25, max_ylim*0.9675, '$0.25$') plt.axvline(-0.25, color='r', linestyle='dashed', linewidth=1) plt.text(-0.25, max_ylim*0.9675, '$-0.25$') plt.savefig("plots_results/histo-rq.png", dpi=600, bbox_inches='tight') print('********* saved in histo-rq.png *********') print('******* MEAN, STANDARD DEVIATION, VARIANCE for the bumps ********') mean = np.float64(mean) devst = np.float64(devst) print('******* MEAN, STANDARD DEVIATION, VARIANCE ********') #print(df.describe()) mean = df['logKummer'].mean() devst = df['logKummer'].std() variance = df.var()['logKummer'] print('mean (rq) = ',mean) print('standard deviation (rq) = ',devst) print('variance (rq) = ',variance) print('*******') max_germain = dfgermain_first['logKummer'].max() min_germain = dfgermain_first['logKummer'].min() mean_germain = dfgermain_first['logKummer'].mean() devst_germain = dfgermain_first['logKummer'].std() variance_germain = dfgermain_first.var()['logKummer'] print('max_germain (rq) = ',max_germain) print('min_germain (rq) = ',min_germain) print('mean_germain (rq) = ',mean_germain) print('standard deviation_germain (rq) = ',devst_germain) print('variance_germain (rq) = ',variance_germain) print('*******') max_twicepminusone = dftwicepminusone_first['logKummer'].max() min_twicepminusone = dftwicepminusone_first['logKummer'].min() mean_twicepminusone = dftwicepminusone_first['logKummer'].mean() devst_twicepminusone = dftwicepminusone_first['logKummer'].std() variance_twicepminusone = dftwicepminusone_first.var()['logKummer'] print('max_twicepminusone (rq) = ',max_twicepminusone) print('min_twicepminusone (rq) = ',min_twicepminusone) print('mean_twicepminusone (rq) = ',mean_twicepminusone) print('standard deviation_twicepminusone (rq) = ',devst_twicepminusone) print('variance_twicepminusone (rq) = ',variance_twicepminusone) print('*******') print('******* highlitgthed contribution of the bumps ********') #xmin, xmax = plt.xlim() xmin, xmax = -0.5, 0.5 x = np.linspace(xmin, xmax, num_bins) bin_width = (x.max() - x.min()) / num_bins mass_correction = total * bin_width mass_correction = 1 blackcurve = mass_correction * norm.pdf(x, mean, devst) plt.plot(x, blackcurve, 'k', linewidth=1) hist0 = plt.hist(df['logKummer'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black') hist1 = plt.hist(dfgermain_first['logKummer'], num_bins, range=[-0.5, 0.5], color = "green", histtype='bar', ec='black') hist2 = plt.hist(dftwicepminusone_first['logKummer'], num_bins, range=[-0.5, 0.5], color = "yellow", histtype='bar', ec='black') min_ylim, max_ylim = plt.ylim() plt.axvline(mean, color='r', linestyle='dashed', linewidth=1) shift = 0.1 plt.text(mean*1.2-shift, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean)) plt.axvline(0.25, color='r', linestyle='dashed', linewidth=1) plt.text(0.25, max_ylim*0.9675, '$0.25$') plt.axvline(-0.25, color='r', linestyle='dashed', linewidth=1) plt.text(-0.25-shift+0.02, max_ylim*0.9675, '$-0.25$') plt.yticks([]) #title = "$r(q); q\le 10^7$; $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction) title = "$r(q); q\le 10^7$; $\sigma =$ %.3f" % (devst) plt.title(title) #plt.show() caption = "yellow bars: $r(q); q$ s.t. $q\ge 5$ and $q,2q-1$ are both primes; \par " caption = caption + "green bars: $r(q); q$ s.t. $q\ge 5$ and $q,2q+1$ are both primes; \par " caption = caption + "cerulean bars: $r(q)$" plt.xlabel(caption) plt.title(title) plt.savefig("plots_results/histo-rq-highlighted-bumps1.png", dpi=600, bbox_inches='tight') print('********* saved in plots_results/histo-rq-highlighted-bumps1.png *********') plt.cla() print('******* MEAN, STANDARD DEVIATION, VARIANCE for the bumps ********') mean = np.float64(mean) devst = np.float64(devst) print('******* MEAN, STANDARD DEVIATION, VARIANCE ********') #print(df.describe()) mean = df['logKummer'].mean() devst = df['logKummer'].std() variance = df.var()['logKummer'] print('mean (rq) = ',mean) print('standard deviation (rq) = ',devst) print('variance (rq) = ',variance) print('*******') print('*******') max_fourtimespminusone = dffourtimespminusone_first['logKummer'].max() min_fourtimespminusone = dffourtimespminusone_first['logKummer'].min() mean_fourtimespminusone = dffourtimespminusone_first['logKummer'].mean() devst_fourtimespminusone = dffourtimespminusone_first['logKummer'].std() variance_fourtimespminusone = dffourtimespminusone_first.var()['logKummer'] print('max_fourtimespminusone (rq) = ',max_fourtimespminusone) print('min_fourtimespminusone (rq) = ',min_fourtimespminusone) print('mean_fourtimespminusone (rq) = ',mean_fourtimespminusone) print('standard deviation_fourtimespminusone (rq) = ',devst_fourtimespminusone) print('variance_fourtimespminusone (rq) = ',variance_fourtimespminusone) print('*******') max_fourtimespplusone = dffourtimespplusone_first['logKummer'].max() min_fourtimespplusone = dffourtimespplusone_first['logKummer'].min() mean_fourtimespplusone = dffourtimespplusone_first['logKummer'].mean() devst_fourtimespplusone = dffourtimespplusone_first['logKummer'].std() variance_fourtimespplusone = dffourtimespplusone_first.var()['logKummer'] print('max_fourtimespplusone (rq) = ',max_fourtimespplusone) print('min_fourtimespplusone (rq) = ',min_fourtimespplusone) print('mean_fourtimespplusone (rq) = ',mean_fourtimespplusone) print('standard deviation_fourtimespplusone (rq) = ',devst_fourtimespplusone) print('variance_fourtimespplusone (rq) = ',variance_fourtimespplusone) print('*******') print('******* highlitgthed contribution of the bumps ********') #xmin, xmax = plt.xlim() xmin, xmax = -0.5, 0.5 x = np.linspace(xmin, xmax, num_bins) bin_width = (x.max() - x.min()) / num_bins mass_correction = total * bin_width mass_correction = 1 blackcurve = mass_correction * norm.pdf(x, mean, devst) plt.plot(x, blackcurve, 'k', linewidth=1) hist0 = plt.hist(df['logKummer'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black') #hist1 = plt.hist(dfgermain_first['logKummer'], num_bins, range=[-0.5, 0.5], color = "green", histtype='bar', ec='black') #hist2 = plt.hist(dftwicepminusone_first['logKummer'], num_bins, range=[-0.5, 0.5], color = "yellow", histtype='bar', ec='black') hist3 = plt.hist(dffourtimespminusone_first['logKummer'], num_bins, range=[-0.5, 0.5], color = "magenta", histtype='bar', ec='black') hist4 = plt.hist(dffourtimespplusone_first['logKummer'], num_bins, range=[-0.5, 0.5], color = "brown", histtype='bar', ec='black') min_ylim, max_ylim = plt.ylim() plt.axvline(mean, color='r', linestyle='dashed', linewidth=1) shift = 0.1 plt.text(mean*1.2-shift, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean)) #plt.axvline(0.25, color='r', linestyle='dashed', linewidth=1) #plt.text(0.25, max_ylim*0.9675, '$0.25$') #plt.axvline(-0.25, color='r', linestyle='dashed', linewidth=1) #plt.text(-0.25-shift+0.02, max_ylim*0.9675, '$-0.25$') plt.axvline(0.125, color='b', linestyle='dashed', linewidth=1) plt.text(0.125, max_ylim*0.9675, '$0.125$') plt.axvline(-0.125, color='b', linestyle='dashed', linewidth=1) plt.text(-0.125-shift, max_ylim*0.9675, '$-0.125$') plt.yticks([]) #title = "$r(q); q\le 10^7$; $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction) title = "$r(q); q\le 10^7$; $\sigma =$ %.3f" % (devst) plt.title(title) #plt.show() caption = "" caption = caption + "magenta bars: $r(q); q$ s.t. $q\ge 5$ and $q,4q-1$ are both primes; \par " caption = caption + "brown bars: $r(q); q$ s.t. $q\ge 5$ and $q,4q+1$ are both primes; \par " caption = caption + "cerulean bars: $r(q)$" plt.xlabel(caption) plt.title(title) plt.savefig("plots_results/histo-rq-highlighted-bumps2.png", dpi=600, bbox_inches='tight') print('********* saved in histo-rq-highlighted-bumps2.png *********') plt.cla() print('******* MEAN, STANDARD DEVIATION, VARIANCE: removed bumps ********') mean = np.float64(mean) devst = np.float64(devst) #print(df.describe()) #df['nobumps']= df['logKummer']-dfgermain_first['logKummer']-dftwicepminusone_first['logKummer'] #xmin, xmax = plt.xlim() total_nobumps=float(len(dfothers)) xmin, xmax = -0.5, 0.5 x = np.linspace(xmin, xmax, num_bins) bin_width = (x.max() - x.min()) / num_bins #mass_correction = total_nobumps * bin_width mass_correction = 1 mean = dfothers['logKummer'].mean() devst = dfothers['logKummer'].std() variance = dfothers.var()['logKummer'] print('total data (no bumps) ', total_nobumps) print('max x = ', x.max()) print('min x = ', x.min()) print('bin_width = ', bin_width) print('num bins = ', num_bins) print('mean (rq; no bumps) = ',mean) print('standard deviation (rq; no bumps) = ',devst) print('variance (rq; no bumps) = ',variance) print('*******') print('******* contribution without the bumps ********') blackcurve = mass_correction * norm.pdf(x, mean, devst) plt.plot(x, blackcurve, 'k', linewidth=1) plt.hist(dfothers['logKummer'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black', density="True") min_ylim, max_ylim = plt.ylim() plt.axvline(mean, color='r', linestyle='dashed', linewidth=1) plt.text(mean*1.2, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean)) blackcurve = mass_correction * norm.pdf(x, mean, devst) plt.plot(x, blackcurve, 'k', linewidth=1) plt.yticks([]) title = "$r(q); q\le 10^7$; $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction) title = "$r(q); q\le 10^7$; $\sigma =$ %.3f" % (devst) plt.title(title) #plt.show() caption = "$r(q); q$ s.t. both $2q + 1$ and $2q-1$ are not primes \par " #caption = caption + r'black curve : $\mathcal{M}\\cdot \mathcal{N}(x,\mu,\sigma)$' caption = caption + r'black curve : $\mathcal{N}(x,\mu,\sigma)$' #caption = caption + "green bars: $r(q); q$ s.t. $q,2q+1$ are both primes; \par " #caption = caption + "cerulean bars: $r(q)$" plt.xlabel(caption) plt.title(title) plt.savefig("plots_results/histo-rq-no-bumps1.png", dpi=600, bbox_inches='tight') print('********* saved in histo-rq-no-bumps1.png *********') plt.cla() print('******* MEAN, STANDARD DEVIATION, VARIANCE: removed bumps ********') mean = np.float64(mean) devst = np.float64(devst) #print(df.describe()) #df['nobumps']= df['logKummer']-dfgermain_first['logKummer']-dftwicepminusone_first['logKummer'] #xmin, xmax = plt.xlim() total_nobumps=float(len(dfothers_others)) xmin, xmax = -0.5, 0.5 x = np.linspace(xmin, xmax, num_bins) bin_width = (x.max() - x.min()) / num_bins #mass_correction = total_nobumps * bin_width mass_correction = 1 mean = dfothers_others['logKummer'].mean() devst = dfothers_others['logKummer'].std() variance = dfothers_others.var()['logKummer'] print('total data (no bumps) ', total_nobumps) print('max x = ', x.max()) print('min x = ', x.min()) print('bin_width = ', bin_width) print('num bins = ', num_bins) print('mean (rq; no bumps) = ',mean) print('standard deviation (rq; no bumps) = ',devst) print('variance (rq; no bumps) = ',variance) print('*******') print('******* contribution without the bumps ********') blackcurve = mass_correction * norm.pdf(x, mean, devst) plt.plot(x, blackcurve, 'k', linewidth=1) plt.hist(dfothers_others['logKummer'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black', density="True") min_ylim, max_ylim = plt.ylim() plt.axvline(mean, color='r', linestyle='dashed', linewidth=1) plt.text(mean*1.2, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean)) blackcurve = mass_correction * norm.pdf(x, mean, devst) plt.plot(x, blackcurve, 'k', linewidth=1) plt.yticks([]) #title = "$r(q); q\le 10^7$; $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction) title = "$r(q); q\le 10^7$; $\sigma =$ %.3f" % (devst) plt.title(title) #plt.show() caption = "$r(q); q$ s.t. $2q + 1$, $2q-1$, $4q + 1$, $4q-1$ are not primes \par " #caption = caption + r'black curve : $\mathcal{M}\\cdot \mathcal{N}(x,\mu,\sigma)$' caption = caption + r'black curve : $\mathcal{N}(x,\mu,\sigma)$' #caption = caption + "green bars: $r(q); q$ s.t. $q,2q+1$ are both primes; \par " #caption = caption + "cerulean bars: $r(q)$" plt.xlabel(caption) plt.title(title) plt.savefig("plots_results/histo-rq-no-bumps2.png", dpi=600, bbox_inches='tight') print('********* saved in histo-rq-no-bumps2.png *********') plt.cla() # scatter plot print('********') print('******** Print scatter plot rq ******* ') #setting dimentions plt.figure(figsize=(11, 8)) # setting ticks million=1000000 # parameter ax = plt.axes() ax.set_xticks([0,million,2*million, 3*million, 4*million, 5*million, 6*million, 7*million, 8*million , 9*million, 10*million]) ax.set_xticklabels(['$0$', '$10^6$','$2\cdot 10^6$','$3\cdot 10^6$','$4\cdot 10^6$','$5\cdot 10^6$','$6\cdot 10^6$','$7\cdot 10^6$','$8\cdot 10^6$','$9\cdot 10^6$','$10^7$']) avg = df['logKummer'].mean() M = df['logKummer'].max() m = df['logKummer'].min() ax.set_yticks([m,-0.5,-0.4,-0.3,-0.2, -0.1, avg, 0.1, 0.2,0.3, 0.4, 0.5, M]) ax.set_yticklabels(['{0:.3f}'.format(m),'$-0.5$','$-0.4$','$-0.3$','$-0.2$', '$-0.1$', '{0:.6f}'.format(avg), '$0.1$', '$0.2$', '$0.3$', '$0.4$', '$0.5$', '{0:.3f}'.format(M)]) # setting bounds plt.xlim(0,10*million) plt.ylim(-0.57, 0.61) # building the grid - horizontal lines count=7 for n in range(count): ax.axhline(y=-0.1*n, linewidth = 1, color='k', linestyle='dotted') ax.axhline(y=0.1*n, linewidth = 1, color='k', linestyle='dotted') ## red line in y=0 #ax.axhline(y=0.0, linewidth = 1, color='r', linestyle='dotted') # building the grid - vertical lines count=10 for n in range(count): ax.axvline(x=million*n, linewidth = 1, color='k', linestyle='dotted') # plotting the scatter plot plt.scatter(df['q'], df['logKummer'], s=1, c = 'blue', marker = '.', label = "$r(q)$") # plotting max and min maxrq = df['logKummer'].max() minrq = df['logKummer'].min() plt.plot(argM, maxrq, marker='o', markersize=4, color="red",label=" max ", linestyle="None") plt.plot(argm, minrq, marker='o', markersize=4, color="black",label=" min ", linestyle="None") ax.axhline(y=maxrq, linewidth = 1, color='r', linestyle='solid') ax.axhline(y=minrq, linewidth = 1, color='k', linestyle='solid') ax.axhline(y=avg, linewidth = 1, color='r', linestyle='dashed', label ="$\mu$ (mean value)") ax.legend(loc="upper right", title="", frameon=True, ncol = len(ax.lines)) # inserting a title and a caption title = "$r(q)$: scatter plot" plt.title(title) caption = "" plt.xlabel(caption) # save plot on file plt.savefig("plots_results/rq-scat.png", dpi=600, bbox_inches='tight', orientation='landscape') plt.savefig("plots_results/rq-scat-300dpi.png", dpi=300, bbox_inches='tight', orientation='landscape') # clear plot plt.cla() print('********* saved in rq-scat.png *********') print(' ***** Statistics for rq *****') print(' ----------------------- ') print(' ***** sum of rq *****') S=df['logKummer'].sum() print('Value of sum r(q) = ',S) df['absrq'] = np.abs(df['logKummer']) S = df['absrq'].sum() print('Value of sum | rq | = ',S) del(df['absrq']) # scatter plot print(' ******************************') print(' ******************************\n') print('******** Print scatter plot deltaq := \kappa(q) - r(q) ******* ') data=pd.read_csv(r"data/global-EK.csv", sep=';') data.head(4) df1 = pd.DataFrame(data, columns= ['q','EK', 'EKplus', 'EKdiff']) #df = df1.astype('float128') df1 = df1.astype('float64') df1 = df1.astype({"q": int}) # create a new column with normalised values df1['EKdiffnorm'] = (df1['EKdiff'] / ln(df1['q']) ) df['deltaq'] = - df1['EKdiffnorm'] - df['logKummer'] print(' ***** Statistics for kappaq *****') print(' ----------------------- ') print(' ***** sum of kappaq *****') S = df1['EKdiffnorm'].sum() print('Value of sum kappaq = ',-S) print(' -- ') df1['abskappaq'] = np.abs(df1['EKdiffnorm']) S = df1['abskappaq'].sum() print('Value of sum | kappaq | = ',S) del(df1['abskappaq']) print(' ***** Statistics for gammaq *****') print(' ----------------------- ') print(' ***** sum of gammaq *****') S = df1['EK'].sum() print('Value of sum gammaq = ',S) print(' -- ') df1['absgammaq'] = np.abs(df1['EK']) S = df1['absgammaq'].sum() print('Value of sum | gammaq | = ',S) del(df1['absgammaq']) print(' ***** Statistics for gammaq-plus *****') print(' ----------------------- ') print(' ***** sum of gammaq_plus *****') S = df1['EKplus'].sum() print('Value of sum gammaq_plus = ',S) print(' -- ') df1['absgammaqplus'] = np.abs(df1['EKplus']) S = df1['absgammaqplus'].sum() print('Value of sum | gammaq_plus | = ',S) del(df1['absgammaqplus']) #FINDING MAX and second MAX; MIN and second MIN; and where they are attained print(' ***** Statistics for deltaq *****') print(' ----------------------- ') print(' ***** sum of deltaq *****') S = df['deltaq'].sum() print('Value of sum deltaq = ',S) print(' -- ') df['absdeltaq'] = np.abs(df['deltaq']) S = df['absdeltaq'].sum() print('Value of sum | deltaq | = ',S) del(df['absdeltaq']) # number of values >0 for deltaq total=float(len(df)) greater0 = len(df['deltaq'].loc[df.deltaq > 0]) percent=float(greater0)/total print('number of deltaq > 0 :', greater0,'percentage =', percent*100) print(' -- ') # number of values <0 for deltaq less0 = len(df['deltaq'].loc[df.deltaq < 0]) percent=float(less0)/total print('number of deltaq < 0 :',less0,'percentage =', percent*100) print('total number (less0+greater0) =', less0+greater0) print('total number (number of rows) =', int(total)) print(' -- ') # number of values |deltaq | < 0.08 bound = 0.08 lessbound = len(df['deltaq'].loc[np.abs(df.deltaq) < bound]) percent=float(lessbound)/total print('number of |deltaq | < 0.08 :',lessbound,'percentage =', percent*100) print('number of |deltaq | >= 0.08 :', int(total) - lessbound) print('total number (number of rows) =', int(total)) print(' -- ') print(' ----------------------- ') print(' ***** MAX deltaq *****') maxima =df.nlargest(10, 'deltaq', keep = 'all') M=maxima['deltaq'].iloc[0] argM=maxima['q'].iloc[0] M1=maxima['deltaq'].iloc[1] argM1=maxima['q'].iloc[1] print('maximal value on column deltaq =',M,'attained at q =',argM) print('second maximal value on column deltaq =',M1,'attained at q =',argM1) print(maxima) del maxima print(' ***** MIN deltaq *****') minima =df.nsmallest(10, 'deltaq', keep = 'all') m=minima['deltaq'].iloc[0] argm=minima['q'].iloc[0] m1=minima['deltaq'].iloc[1] argm1=minima['q'].iloc[1] print('minimal value on column deltaq =',m,'attained at q =',argm) print('second minimal value on column deltaq =',m1,'attained at q =',argm1) print(minima) del minima #setting dimentions plt.figure(figsize=(11, 8)) # setting ticks million=1000000 # parameter ax = plt.axes() ax.set_xticks([0,million,2*million, 3*million, 4*million, 5*million, 6*million, 7*million, 8*million , 9*million, 10*million]) ax.set_xticklabels(['$0$', '$10^6$','$2\cdot 10^6$','$3\cdot 10^6$','$4\cdot 10^6$','$5\cdot 10^6$','$6\cdot 10^6$','$7\cdot 10^6$','$8\cdot 10^6$','$9\cdot 10^6$','$10^7$']) avg = df['deltaq'].mean() M = df['deltaq'].max() m = df['deltaq'].min() ax.set_yticks([-0.14, m,-0.12,-0.1,-0.08, -0.06, -0.04, -0.02, avg, 0.02, 0.04,0.06, 0.08, 0.1, 0.12, 0.14, 0.16, M, 0.18]) ax.set_yticklabels(['$-0.14$','{0:.3f}'.format(m),'$-0.12$','$-0.1$','$-0.08$','$-0.06$', '$-0.04$', '$-0.02$','{0:.6f}'.format(avg), '$0.02$', '$0.04$', '$0.06$', '$0.08$', '$0.1$', '$0.12$', '$0.14$', '$0.16$','{0:.3f}'.format(M), '$0.18$']) # setting bounds plt.xlim(0,10*million) plt.ylim(-0.08, 0.08) # building the grid - horizontal lines count=7 for n in range(count): ax.axhline(y=-0.02*n, linewidth = 1, color='k', linestyle='dotted') ax.axhline(y=0.02*n, linewidth = 1, color='k', linestyle='dotted') count=3 for n in range(count): ax.axhline(y=0.12+0.02*n, linewidth = 1, color='k', linestyle='dotted') ## red line in y=0 #ax.axhline(y=0.0, linewidth = 1, color='r', linestyle='dotted') # building the grid - vertical lines count=10 for n in range(count): ax.axvline(x=million*n, linewidth = 1, color='k', linestyle='dotted') # plotting the scatter plot plt.scatter(df['q'], df['deltaq'], s=1, c = 'blue', marker = '.', label = "$\Delta_q$") # plotting max and min maxdeltaq = df['deltaq'].max() mindeltaq = df['deltaq'].min() #plt.plot(argM, maxdeltaq, marker='o', markersize=4, color="red",label=" max ", linestyle="None") #plt.plot(argm, mindeltaq, marker='o', markersize=4, color="black",label=" min ", linestyle="None") #ax.axhline(y=maxdeltaq, linewidth = 1, color='r', linestyle='solid') #ax.axhline(y=mindeltaq, linewidth = 1, color='k', linestyle='solid') ax.axhline(y=avg, linewidth = 1, color='r', linestyle='dashed', label ="$\mu$ (mean value)") ax.legend(loc="upper right", title="", frameon=True, ncol = len(ax.lines)) # inserting a title and a caption title = "$\Delta_q:= \kappa(q) - r(q)$; truncated at $ | \Delta_q | \le 0.08$: scatter plot" plt.title(title) caption = "" plt.xlabel(caption) # save plot on file plt.savefig("plots_results/deltaq-scat.png", dpi=600, bbox_inches='tight', orientation='landscape') plt.savefig("plots_results/deltaq-scat-300dpi.png", dpi=300, bbox_inches='tight', orientation='landscape') # clear plot plt.cla() print('********* saved in deltaq-scat.png *********') print('*******') #df = df.astype('float128') #mean = np.float128(mean) #devst = np.float128(devst) mean = np.float64(mean) devst = np.float64(devst) print('******* MEAN, STANDARD DEVIATION, VARIANCE ********') #print(df.describe()) mean = df['deltaq'].mean() devst = df['deltaq'].std() variance = df.var()['deltaq'] print('mean (deltaq) = ',mean) print('standard deviation (deltaq) = ',devst) print('variance (deltaq) = ',variance) print('*******') print('********* plotting histogram and normal curves deltaq *********') #num_bins = 100 plt.grid(True, linestyle='--', axis = 'y') #plt.hist(df['deltaq'], num_bins, color = "skyblue", histtype='bar', ec='black', density="True") plt.hist(df['deltaq'], num_bins, range=[-0.05, 0.05], color = "skyblue", histtype='bar', ec='black', density="True") plt.yticks([]) # Plot the Probability Density Function. xmin, xmax = plt.xlim() x = np.linspace(xmin, xmax, num_bins) bin_width = (x.max() - x.min()) / num_bins mass_correction = total * bin_width mass_correction = 1 print('******** black normal function data ******* ') devst_blue = 1.13*devst mass_correction_blue = mass_correction print('total data ', total) print('max x = ', x.max()) print('min x = ', x.min()) print('bin_width = ', bin_width) print('num bins = ', num_bins) print('mean = ', mean) print('standard deviation = ', devst) print('standard deviation black (1.13*devst) = ', devst_blue) print('mass correction = ', mass_correction) print('mass correction black (mass_correction) = ', mass_correction_blue) # Save a version without the density curves #title = "$\Delta_q:=\kappa(q) - r(q); q\le 10^7$: $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction) title = "$\Delta_q:=\kappa(q) - r(q); q\le 10^7$: $\sigma =$ %.3f " % (devst) plt.title(title) min_ylim, max_ylim = plt.ylim() plt.axvline(mean, color='r', linestyle='dashed', linewidth=1) plt.text(mean*1.2, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean)) plt.savefig("plots_results/histo-deltaq-nocurves.png", dpi=600, bbox_inches='tight') blackcurve = mass_correction * norm.pdf(x, mean, devst) plt.plot(x, blackcurve, 'k', linewidth=1) bluecurve = mass_correction_blue * norm.pdf(x, mean, devst_blue) #plt.plot(x, bluecurve, 'b', linewidth=1) print('******** red normal function data ******* ') devst_red = 0.725*devst mass_correction_red = mass_correction*0.9 print('total data ', total) print('max x = ', x.max()) print('min x = ', x.min()) print('bin_width = ', bin_width) print('num bins = ', num_bins) print('mean = ', mean) print('standard deviation = ', devst) print('standard deviation red (0.7*devst) = ', devst_red) print('mass correction = ', mass_correction) print('mass correction red (0.9*mass_correction) = ', mass_correction_red) redcurve = mass_correction_red * norm.pdf(x, mean, devst_red) #plt.plot(x, redcurve, 'r"data/, linewidth=1) print('******** yellow normal function data ******* ') devst_yellow = 2*devst mass_correction_yellow = mass_correction*0.52 print('total data ', total) print('max x = ', x.max()) print('min x = ', x.min()) print('bin_width = ', bin_width) print('num bins = ', num_bins) print('mean = ', mean) print('standard deviation = ', devst) print('standard deviation yellow (2*devst) = ', devst_yellow) print('mass correction = ', mass_correction) print('mass correction yellow (0.52*mass_correction) = ', mass_correction_yellow) yellowcurve = mass_correction_yellow * norm.pdf(x, mean, devst_yellow) #plt.plot(x, yellowcurve, 'y', linewidth=1) #min_ylim, max_ylim = plt.ylim() #plt.axvline(mean, color='r', linestyle='dashed', linewidth=1) #plt.text(mean*1.2, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean)) #title = "$\Delta_q:=\kappa(q) - r(q); q\le 10^7$: $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction) title = "$\Delta_q:=\kappa(q) - r(q); q\le 10^7$: $\sigma =$ %.3f " % (devst) plt.title(title) #plt.show() #caption = "black curve : $\mathcal{M}\cdot \mathcal{N}(x,\mu,\sigma)$" caption = "black curve : $\mathcal{N}(x,\mu,\sigma)$" #caption = caption + " - blue curve : $\mathcal{M}\cdot \mathcal{N}(x,\mu,1.13\cdot\sigma)$\par " #caption = caption + "red curve : $0.9\cdot\mathcal{M}\cdot \mathcal{N}(x,\mu,0.725\cdot\sigma)$" #caption = caption + " - yellow curve : $0.52\cdot\mathcal{M}\cdot \mathcal{N}(x,\mu,2\cdot\sigma)$" plt.xlabel(caption) #plt.ylabel(caption) plt.savefig("plots_results/histo-deltaq.png", dpi=600, bbox_inches='tight') print('********* saved in histo-deltaq.png *********') # clear the plot plt.cla() end_time = time.monotonic() elapsed_time = end_time - start_time print("Total Execution time : ", format_timespan(elapsed_time)) print('------------------------------------------') print(' ***** End PYTHON script *****')