compb-dla-data-analysis/notebooks/stick-probability.ipynb

177 lines
4.4 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import scipy\n",
"from glob import glob"
]
},
{
"cell_type": "markdown",
"source": [
"Prior code from mva:"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 105,
"outputs": [],
"source": [
"def read_xy(path: str):\n",
" df = pd.read_csv(path, skipinitialspace=True)\n",
" df['N'] = df.index + 1\n",
" df['r'] = (df.x ** 2 + df.y ** 2) ** 0.5\n",
" df['cr'] = df.r.cummax()\n",
" df['fd'] = np.log(df.N) / np.log(df.cr)\n",
"\n",
" return df\n",
"\n",
"\n",
"def read_all(load_dir: str):\n",
" paths = glob(f'{load_dir}/*.csv')\n",
" return [read_xy(path) for path in paths]\n",
"\n",
"\n",
"# We expect the format of sp data to be:\n",
"# ROOT/{probability}/{RUN_ID}.csv\n",
"\n",
"def read_sp_dir(load_dir: str):\n",
" probability = float(Path(load_dir).name)\n",
" return probability, read_all(load_dir)\n",
"\n",
"\n",
"def read_sp_full(root: str):\n",
" raw_data = [read_sp_dir(load_dir) for load_dir in glob(f'{root}/*')]\n",
" fd_data = [fd_of_dfs(dfs) for (p, dfs) in raw_data]\n",
" probabilities = [p for (p, dfs) in raw_data]\n",
" fd_means, fd_stds = list(zip(*fd_data))\n",
" out = pd.DataFrame(zip(probabilities, fd_means, fd_stds), columns=['p', 'fd_mean', 'fd_std'])\n",
" out.sort_values(by='p', inplace=True)\n",
"\n",
" return out"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"def convergent_tail_index(series, tol):\n",
" diffs = np.abs(np.ediff1d(series))\n",
" for i in range(0, len(diffs)):\n",
" if np.max(diffs[i:]) <= tol:\n",
" return i\n",
"\n",
" # No convergence found\n",
" return None\n",
"\n",
"\n",
"def mean_of_tail(series, tol=0.05):\n",
" tail_index = convergent_tail_index(series, tol)\n",
" return np.mean(series[tail_index:])\n",
"\n",
"\n",
"def fd_of_dfs(dfs):\n",
" fds = [mean_of_tail(df.fd, 0.001) for df in dfs]\n",
" fds_clean = [f for f in fds if f < np.inf]\n",
" return np.mean(fds_clean), np.std(fds_clean)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"c_sticking_probability = read_sp_full(\"../data/stick-probability\")"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"rust_sticking_probability = read_sp_full(\"../data/rust-stick-probability\")"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"plt.plot(c_sticking_probability.p, c_sticking_probability.fd_mean)\n",
"plt.plot(rust_sticking_probability.p, rust_sticking_probability.fd_mean)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"plt.errorbar(c_sticking_probability.p, c_sticking_probability.fd_mean, c_sticking_probability.fd_std, fmt=\"o\")\n",
"plt.errorbar(rust_sticking_probability.p, rust_sticking_probability.fd_mean, rust_sticking_probability.fd_std, fmt=\"o\")"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"Hmmm even with a different RNG I still get a dip around 0.6, I think theres something hiding there"
],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}