compb-dla-data-analysis/tools/calc-fd.py

69 lines
1.6 KiB
Python

from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
from glob import glob
import sys
def read_xy(path: str):
df = pd.read_csv(path, skipinitialspace=True)
df['N'] = df.index + 1
df['r'] = (df.x ** 2 + df.y ** 2) ** 0.5
df['cr'] = df.r.cummax()
df['fd'] = np.log(df.N) / np.log(df.cr)
return df
def read_all(load_dir: str):
paths = glob(f'{load_dir}/*.csv')
return [read_xy(path) for path in paths]
def stick_prob_from_load_dir(load_dir: str):
return float(Path(load_dir).name)
def read_sp_dir(load_dir: str):
return stick_prob_from_load_dir(load_dir), read_all(load_dir)
def read_sp_full(probabilities_dir: str):
a = [read_sp_dir(load_dir) for load_dir in glob(f'{probabilities_dir}/*')]
b = [fd_of_dfs(dfs) for (p, dfs) in a]
ps = [p for (p, dfs) in a]
means, stds = list(zip(*b))
c = pd.DataFrame(zip(ps, means, stds), columns=['p', 'fd_mean', 'fd_std'])
c.sort_values(by='p', inplace=True)
return c
def convergent_tail_index(series, tol):
diffs = np.abs(np.ediff1d(series))
for i in range(0, len(diffs)):
if np.max(diffs[i:]) <= tol:
return i
# No convergence found
return None
def mean_of_tail(series, tol=0.05):
tail_index = convergent_tail_index(series, tol)
return np.mean(series[tail_index:])
def fd_of_dfs(dfs):
fds = [mean_of_tail(df.fd, 0.001) for df in dfs]
fds_clean = [f for f in fds if f < np.inf]
return np.mean(fds_clean), np.std(fds_clean)
print(sys.argv)
np.seterr(divide='ignore')
argv_ = sys.argv[1]
print(read_sp_full(argv_))