In [2]:
import os
import io
import sys
import numpy as np
import pandas as pd
import pickle
import zipfile
import lzma
import gzip
import time
import re
import dimod
np.__version__, pd.__version__

('1.21.6', '1.3.5')

In [3]:
analysis_results_dir = 'SubmissionResults/'
files = sorted([f for f in os.listdir(analysis_results_dir) if f.endswith('.txt')])

problems = {}
magnetizations = {}
correlations = {}
correlationsall = {}
start = time.time()
for i,f in enumerate(files):
    try:
        if (i < 1000 and i%100 == 1) or (i%1000 == 1):
            print(f'Analyzing i={i} / {len(files)} (elapsed time so far: {time.time()-start:.2f} s, {(time.time()-start)/i:.2f} s per data set)')
        results = dimod.SampleSet.from_serializable(pickle.load(lzma.open(analysis_results_dir + f,'rb'))).to_pandas_dataframe().to_numpy()
        # NOTE: to analyze the from from disk, simply replace with
        # results = pd.read_csv(f'Results/{f}', sep='\t')
        if len(results) != 1000:  # we can also identify problems in the files with this check
            print(f'\tfile i={i}\n\t\t{f}\n\t does not have 1000 but {len(results)} entries')
            problems[i] = f
            continue
        data = results[:,:-3]  # we don't need energy and chain break fraction
        num_occurrences = results[:,-1]
        if (num_occurrences != 1).any():  # results were not aggregated, so we can identify problems e.g. like this
            print(f'\tfile i={i}\n\t\t{f}\n\twith {len(results)} entries does not have all num_occurrences == 1')
            problems[i] = f
            continue
        # compute magnetizations
        magnetizations[i] = {}  # magnetizations[i] = {'mean': data.mean(axis=0), 'std': data.std(axis=0)}  # TODO update with [i][j] = ... if we see it's fast enough
        mag_sum = np.sum(data,axis=1)
        mag_mean = mag_sum.mean()
        mag_std = mag_sum.std()
        mag_qnn = data.shape[1]/4. + np.sum(data * np.roll(data,1,axis=1), axis=1).mean()/4. - mag_mean/2.
        
        Pup0 = (1+data)/2
        Pdown1 = (1-np.roll(data,1,axis=1))/2
        Pup2, Pdown2 = (1+np.roll(data,2,axis=1))/2, (1-np.roll(data,2,axis=1))/2
        Pup3, Pdown3 = (1+np.roll(data,3,axis=1))/2, (1-np.roll(data,3,axis=1))/2
        Pup4, Pdown4 = (1+np.roll(data,4,axis=1))/2, (1-np.roll(data,4,axis=1))/2
        Pup5, Pdown5 = (1+np.roll(data,5,axis=1))/2, (1-np.roll(data,5,axis=1))/2
        Pup6, Pdown6 = (1+np.roll(data,6,axis=1))/2, (1-np.roll(data,6,axis=1))/2
        Pup7, Pdown7 = (1+np.roll(data,7,axis=1))/2, (1-np.roll(data,7,axis=1))/2
        Pup8, Pdown8 = (1+np.roll(data,8,axis=1))/2, (1-np.roll(data,8,axis=1))/2
        Pup9, Pdown9 = (1+np.roll(data,9,axis=1))/2, (1-np.roll(data,9,axis=1))/2
        Pup10, Pdown10 = (1+np.roll(data,10,axis=1))/2, (1-np.roll(data,10,axis=1))/2
        Pup11, Pdown11 = (1+np.roll(data,11,axis=1))/2, (1-np.roll(data,11,axis=1))/2
        Pup12, Pdown12 = (1+np.roll(data,12,axis=1))/2, (1-np.roll(data,12,axis=1))/2
        Pup13, Pdown13 = (1+np.roll(data,13,axis=1))/2, (1-np.roll(data,13,axis=1))/2
        Pup14, Pdown14 = (1+np.roll(data,14,axis=1))/2, (1-np.roll(data,14,axis=1))/2
        Pup15, Pdown15 = (1+np.roll(data,15,axis=1))/2, (1-np.roll(data,15,axis=1))/2
        Pup16, Pdown16 = (1+np.roll(data,16,axis=1))/2, (1-np.roll(data,16,axis=1))/2
        Pup17, Pdown17 = (1+np.roll(data,17,axis=1))/2, (1-np.roll(data,17,axis=1))/2
        Pup18, Pdown18 = (1+np.roll(data,18,axis=1))/2, (1-np.roll(data,18,axis=1))/2
        Pup19, Pdown19 = (1+np.roll(data,19,axis=1))/2, (1-np.roll(data,19,axis=1))/2
        Pup20, Pdown20 = (1+np.roll(data,20,axis=1))/2, (1-np.roll(data,20,axis=1))/2
        Pup21 = (1+np.roll(data,21,axis=1))/2
        
        mag_l1 = (np.sum(Pup0 * Pdown1 * Pup2, axis=1).mean())/data.shape[1]
        mag_l2 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pup3, axis=1).mean())/data.shape[1]
        mag_l3 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pup4, axis=1).mean())/data.shape[1]
        mag_l4 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pup5, axis=1).mean())/data.shape[1]
        mag_l5 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pup6, axis=1).mean())/data.shape[1]
        mag_l6 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pup7, axis=1).mean())/data.shape[1]
        mag_l7 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pdown7 * Pup8, axis=1).mean())/data.shape[1]
        mag_l8 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pdown7 * Pdown8 * Pup9, axis=1).mean())/data.shape[1]
        mag_l9 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pdown7 * Pdown8 * Pdown9 * Pup10, axis=1).mean())/data.shape[1]
        mag_l10 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pdown7 * Pdown8 * Pdown9 * Pdown10 * Pup11, axis=1).mean())/data.shape[1]
        mag_l11 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pdown7 * Pdown8 * Pdown9 * Pdown10 * Pdown11 * Pup12, axis=1).mean())/data.shape[1]
        mag_l12 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pdown7 * Pdown8 * Pdown9 * Pdown10 * Pdown11 * Pdown12 * Pup13, axis=1).mean())/data.shape[1]
        mag_l13 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pdown7 * Pdown8 * Pdown9 * Pdown10 * Pdown11 * Pdown12 * Pdown13 * Pup14, axis=1).mean())/data.shape[1]
        mag_l14 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pdown7 * Pdown8 * Pdown9 * Pdown10 * Pdown11 * Pdown12 * Pdown13 * Pdown14 * Pup15, axis=1).mean())/data.shape[1]
        mag_l15 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pdown7 * Pdown8 * Pdown9 * Pdown10 * Pdown11 * Pdown12 * Pdown13 * Pdown14 * Pdown15 * Pup16, axis=1).mean())/data.shape[1]
        mag_l16 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pdown7 * Pdown8 * Pdown9 * Pdown10 * Pdown11 * Pdown12 * Pdown13 * Pdown14 * Pdown15 * Pdown16 * Pup17, axis=1).mean())/data.shape[1]
        mag_l17 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pdown7 * Pdown8 * Pdown9 * Pdown10 * Pdown11 * Pdown12 * Pdown13 * Pdown14 * Pdown15 * Pdown16 * Pdown17 * Pup18, axis=1).mean())/data.shape[1]
        mag_l18 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pdown7 * Pdown8 * Pdown9 * Pdown10 * Pdown11 * Pdown12 * Pdown13 * Pdown14 * Pdown15 * Pdown16 * Pdown17 * Pdown18 * Pup19, axis=1).mean())/data.shape[1]
        mag_l19 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pdown7 * Pdown8 * Pdown9 * Pdown10 * Pdown11 * Pdown12 * Pdown13 * Pdown14 * Pdown15 * Pdown16 * Pdown17 * Pdown18 * Pdown19 * Pup20, axis=1).mean())/data.shape[1]
        mag_l20 = (np.sum(Pup0 * Pdown1 * Pdown2 * Pdown3 * Pdown4 * Pdown5 * Pdown6 * Pdown7 * Pdown8 * Pdown9 * Pdown10 * Pdown11 * Pdown12 * Pdown13 * Pdown14 * Pdown15 * Pdown16 * Pdown17 * Pdown18 * Pdown19 * Pdown20 * Pup21, axis=1).mean())/data.shape[1]
        
        magnetizations[i] = {'mean': mag_mean, 'std': mag_std, 'Qnn': mag_qnn, 'l1': mag_l1, 'l2': mag_l2, 'l3': mag_l3, 'l4': mag_l4, 'l5': mag_l5, 'l6': mag_l6, 'l7': mag_l7, 'l8': mag_l8, 'l9': mag_l9, 'l10': mag_l10, 'l11': mag_l11, 'l12': mag_l12, 'l13': mag_l13, 'l14': mag_l14, 'l15': mag_l15, 'l16': mag_l16, 'l17': mag_l17, 'l18': mag_l18, 'l19': mag_l19, 'l20': mag_l20}
        """
        # compute correlations
        correlations[i] = {}
        correlationsall[i] = {}
        for d in range(-nos//2+1, nos//2+1):
            # compute correlations with spin 0 (NOTE: is 0 really the middle spin? is it a ring? or is 0 "on the boundary"...?)
            corr = data[:,0] * data[:,(d + nos)%nos]
            correlations[i][d] = {'mean': corr.mean(), 'std': corr.std()}
            # compute correlations averaged over all spins (NOTE: this makes it 30 times slower so commented out, probably possible to speed this up too, see above)
            #corrall = (data * np.roll(data, -d, axis=1)).mean(axis=1)
            #correlationsall[i][d] = {'mean': corrall.mean(), 'std': corrall.std()}
        """
    except Exception as e:
        print(f'\terror parsing i={i}\n\t\t{f}:\n\t\t{e}')
        problems[i] = f
        
print(f'{len(problems)} files are problematic')

p = re.compile(r'FVD_TFIM_L_(?P<nos>.*)_J_(?P<jv>.*)_hx_(?P<hxv>.*)'
            r'_hz_(?P<hzv>.*)_is_(?P<init_state>.*)_as_(?P<anneal_schedule_string>.*)_s_'
            r'(?P<sv>.*)_irt_(?P<irt>.*)_pt_'
            r'(?P<pt>.*)_mt_(?P<mt>.*)_hss_(?P<h_schedule_string>.*)_qt_'
            r'(?P<qt>.*)_nr_(?P<nr>.*)_emb_(?P<embedding_string_short>.*)_LQ_(?P<LQ>.*)'
            r'_PQ_(?P<embnpqs>.*)_am_(?P<answer_mode>.*)_as_(?P<auto_scale>.*)_rt_'
            r'(?P<readout_thermalization>.*)_rs_(?P<reinitialize_state>.*)'
)

fvd_mags = {}
fvd_corrs = {}
start = time.time()
for i in magnetizations:
    if (i < 1000 and i%100 == 1) or (i%1000 == 1):
        print(f'Adding i={i} to dicts (elapsed time so far: {time.time()-start:.2f} s)')
    r = p.search(files[i])
    nos = int(r.group('nos'))
    jv = float(r.group('jv').replace('_','.'))
    hxv = float(r.group('hxv').replace('_','.'))
    hzv = float(r.group('hzv').replace('_','.'))
    irt = float(r.group('irt').replace('_','.'))
    pt = float(r.group('pt').replace('_','.'))
    mt = float(r.group('mt').replace('_','.'))
    qt = float(r.group('qt').replace('_','.'))
    nr = 1000
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'mean')] = magnetizations[i]['mean']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'std')] = magnetizations[i]['std']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'Qnn')] = magnetizations[i]['Qnn']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l1')] = magnetizations[i]['l1']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l2')] = magnetizations[i]['l2']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l3')] = magnetizations[i]['l3']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l4')] = magnetizations[i]['l4']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l5')] = magnetizations[i]['l5']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l6')] = magnetizations[i]['l6']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l7')] = magnetizations[i]['l7']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l8')] = magnetizations[i]['l8']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l9')] = magnetizations[i]['l9']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l10')] = magnetizations[i]['l10']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l11')] = magnetizations[i]['l11']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l12')] = magnetizations[i]['l12']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l13')] = magnetizations[i]['l13']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l14')] = magnetizations[i]['l14']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l15')] = magnetizations[i]['l15']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l16')] = magnetizations[i]['l16']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l17')] = magnetizations[i]['l17']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l18')] = magnetizations[i]['l18']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l19')] = magnetizations[i]['l19']
    fvd_mags[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,'l20')] = magnetizations[i]['l20']
    """
    for d in range(-nos//2+1, nos//2+1):
        fvd_corrs[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,d,'mean')] = correlations[i][d]['mean']
        fvd_corrs[(nos,jv,hxv,hzv,irt,pt,mt,qt,nr,d,'std')] = correlations[i][d]['std']
    """

# another 2-3 GBs with a lot of redundancy, save compressed version
sys.getsizeof(fvd_mags) / 1024**3

analysis_results_dir = 'Data/'

with gzip.open(analysis_results_dir + 'fvd_analyzed_example.txt.gz', 'w') as f:
    f.write(str(fvd_mags).encode())

0 files are problematic


In [5]:
analysis_results_dir = 'Data/'

with gzip.open(analysis_results_dir + 'fvd_analyzed_example.txt.gz', "r") as f:
    fvd_mags = eval(f.read())

fvd_mags

{(5564, -1.0, 0.002, -0.01, 100.0, 0.0, 0.1805, 0.0, 1000, 'mean'): 5563.972,
 (5564,
  -1.0,
  0.002,
  -0.01,
  100.0,
  0.0,
  0.1805,
  0.0,
  1000,
  'std'): 0.5720279713440594,
 (5564,
  -1.0,
  0.002,
  -0.01,
  100.0,
  0.0,
  0.1805,
  0.0,
  1000,
  'Qnn'): 0.010000000000218279,
 (5564,
  -1.0,
  0.002,
  -0.01,
  100.0,
  0.0,
  0.1805,
  0.0,
  1000,
  'l1'): 3.5945363048166787e-07,
 (5564, -1.0, 0.002, -0.01, 100.0, 0.0, 0.1805, 0.0, 1000, 'l2'): 0.0,
 (5564, -1.0, 0.002, -0.01, 100.0, 0.0, 0.1805, 0.0, 1000, 'l3'): 0.0,
 (5564,
  -1.0,
  0.002,
  -0.01,
  100.0,
  0.0,
  0.1805,
  0.0,
  1000,
  'l4'): 1.7972681524083394e-07,
 (5564, -1.0, 0.002, -0.01, 100.0, 0.0, 0.1805, 0.0, 1000, 'l5'): 0.0,
 (5564, -1.0, 0.002, -0.01, 100.0, 0.0, 0.1805, 0.0, 1000, 'l6'): 0.0,
 (5564, -1.0, 0.002, -0.01, 100.0, 0.0, 0.1805, 0.0, 1000, 'l7'): 0.0,
 (5564,
  -1.0,
  0.002,
  -0.01,
  100.0,
  0.0,
  0.1805,
  0.0,
  1000,
  'l8'): 1.7972681524083394e-07,
 (5564, -1.0, 0.002, -0.01, 100