From e348362b574fbe4851eefddd7820a7875044b8c3 Mon Sep 17 00:00:00 2001 From: mariag <33653146+MariaGorodetski@users.noreply.github.com> Date: Mon, 13 May 2024 08:43:18 +0300 Subject: [PATCH] Align sex age (#28) * align sex_age with all dataframes in a dataset * add new bulk data mapping to tre config * nbdev_rxport * use __concat__ to merge the self.dfs tables * suffixes --------- Co-authored-by: Maria Gorodetski --- nbs/05_pheno_loader.ipynb | 45 ++++++++++++++------ pheno_utils/config_setup/config_tre.json | 2 + pheno_utils/config_setup/config_tre_mbz.json | 2 + pheno_utils/pheno_loader.py | 40 ++++++++++++----- 4 files changed, 67 insertions(+), 22 deletions(-) diff --git a/nbs/05_pheno_loader.ipynb b/nbs/05_pheno_loader.ipynb index 62a218a..f8452db 100644 --- a/nbs/05_pheno_loader.ipynb +++ b/nbs/05_pheno_loader.ipynb @@ -80,6 +80,7 @@ "metadata": {}, "outputs": [], "source": [ + "\n", "#| export\n", "\n", "class PhenoLoader:\n", @@ -517,14 +518,19 @@ " continue\n", " \n", " if table_name == 'age_sex':\n", - " keep_undefined = True\n", + " # The 'age_sex' table does not contain 'undefined', so the merge will not cause a Cartesian product\n", + " keep_undefined = True \n", + " # Left join to keep only rows with real data points\n", + " how = 'left'\n", " else: \n", " keep_undefined = keep_undefined_research_stage\n", + " how = 'outer'\n", " \n", " data = self.__concat__(\n", " data, \n", " df_fields, \n", - " keep_undefined\n", + " keep_undefined, \n", + " how\n", " )\n", " renamed_cols += duplicated_fields\n", " \n", @@ -577,13 +583,13 @@ " return False\n", " \n", " @staticmethod\n", - " def join_and_filter_undefined_research_stage(df1, df2):\n", + " def join_and_filter_undefined_research_stage(df1, df2, how='outer', lsuffix='', rsuffix=''):\n", " df1_defined = df1[df1.index.get_level_values('research_stage') != 'undefined']\n", " df2_defined = df2[df2.index.get_level_values('research_stage') != 'undefined']\n", "\n", - " return df1_defined.join(df2_defined, how='outer')\n", + " return df1_defined.join(df2_defined, how=how, lsuffix=lsuffix, rsuffix=rsuffix)\n", "\n", - " def __concat__(self, df1, df2, keep_undefined_research_stage=False):\n", + " def __concat__(self, df1, df2, keep_undefined_research_stage=False, how='outer', lsuffix='', rsuffix=''):\n", "\n", " if df1.empty:\n", " return df2\n", @@ -594,18 +600,33 @@ " self.is_value_in_index(df2, 'undefined', 'research_stage') and not keep_undefined_research_stage:\n", " \n", " warnings.warn('filtering \"undefined\" research_stage..')\n", - " df = self.join_and_filter_undefined_research_stage(df1, df2)\n", + " df = self.join_and_filter_undefined_research_stage(df1, df2, how, lsuffix, rsuffix)\n", " return df\n", " \n", - " return df1.join(df2, how='outer')\n", - " \n", + " return df1.join(df2, how=how, lsuffix=lsuffix, rsuffix=rsuffix)\n", + " \n", + " def merge_all_tables(self) -> pd.DataFrame:\n", + " # merge all tables in self.dfs dictionary\n", + " align_df = None\n", + " for name, df in self.dfs.items():\n", + " if align_df is None:\n", + " align_df = df\n", + " else:\n", + " # Join the table with an 'undefined' research_stage to keep the maximum number of data points\n", + " align_df = self.__concat__(align_df, df, keep_undefined_research_stage=True, how='outer', lsuffix='', rsuffix= name) \n", + " return align_df\n", + "\n", " def __load_age_sex__(self) -> None:\n", " \"\"\"\n", " Add sex and compute age from birth date.\n", " \"\"\"\n", " age_path = os.path.join(self.__get_dataset_path__(self.age_sex_dataset), 'events.parquet')\n", - " align_df = self.dfs[list(self.dfs)[0]]\n", - "\n", + " \n", + " if len(self.dfs) > 1: \n", + " align_df = self.merge_all_tables()\n", + " else: \n", + " align_df = self.dfs[list(self.dfs)[0]]\n", + " \n", " if ('research_stage' in align_df.columns) or ('research_stage' in align_df.index.names):\n", " try:\n", " age_df = pd.read_parquet(age_path)\n", @@ -1848,7 +1869,7 @@ }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeEAAAHiCAYAAADf3nSgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAATlUlEQVR4nO3cS691B13H8d/aa+29z3muvUDRlipFuQdIVRIlkjDDmDhw4BvwVTjwJTgycWQckpgYJ8pIowkwImIUjAQULBVaS7H2afs857Jvy0ETh30W8d/+uXw+453fXnudtfZ3r8kZ5nmeAwC841bdBwAAP6tEGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE2mpS8chuHtPI6fGuOP4Xk6+qdo8BNrWtV9p5yKvgtOvlIWWfIPKT0JA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0CTqfsAfhxsxrrfIrvjqWyryt2zmj/zrc1YspMkV1Xnaa6ZSZK5aOt0qjuo60PNeRpKVt60Khobq4aSHIrO+dm67hrfFR3TG9eHkp1KU9Hfrurv9pPMkzAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgydR/A/8f5VPMb4vJwKtlJkrNxKNn53K/9cslOktzYbkt2ppqPliQ5H8eSnavdvmQnSXbHmutgd30o2UmSw2ku2RnHut/bZZdBzUdLkkxF3wWb7bpkJ0nmueYDvnL/omQnSb70jedLdi52NffKpvC6rLp/32mehAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgyTDP87zohcNQ8obbqa7714dTyc7Tj90s2UmSz37imZKdx2/fKNlJkvuXh5Kdy/2+ZCdJ1qua6+B0qrkGkuTquubz1dwpb1qtatZWRfdvkiz8ynioofBMrddjzVDheaqaOj/f1gwluXdxVbLzxX/+dsnOC/cuS3aSZDPWfKfsjnXfKUvuFU/CANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgyTDP87zkhWfTWPKG18dTyU6SPPnIjZKd3/nNj5fsJMl2PZXs3Lt/VbKTJMdTzW+tTeFPttN8qBqq2UkyzzXX5nFf9NmSTGPNfbeZ6v54h6p7eKg7pnEcSnaOx7rr6bjsq/WhxqLv3iSZpprvp/uXNd9Pf/PVb5bsJMkrD/YlO9Oq5lpKkv2Ce8WTMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0GeZ5nhe9cBje7mP5kf3+554t2Xn33VslO0ly72JfsnM81Z3vB7tDyc75pu432/l6LNm5vrwu2UmSy8urkp3KW2UqGltvppKdJMmib4yHq/xOOS37GnuoOYXHVLRT+bdbr2u2tkXH9OIrr5fsJMmf//3XSnbGVd01cDg+/CrwJAwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJpM7/Qb/u6vf7hs6+knHi/ZuffgQclOkpxvak7p5XEo2UmScVWztV3X/WarmjptxpqhJIdTzdZmVfjbtugyOB5ONUNJNut1yc6cuWQnSaZV0d9uuynZSZL98ViyM8+F52mquTbnoebCfO97HivZSZLPfOKZkp0vf/25kp2lPAkDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQJNp6Qvvni9+6Vt69sPPlOwkyXa9LdmZ1puSnSR5/eK6ZGd/OJXsJMmdbc3nm3Is2UmS/e6qZGce5pKdJNlsaq7xcRhKdpJkGmt+Jw81t0qSZLMuOk9T4TPAMJbMHOe662k+1OwMQ915GlY112bVdbmaav5uSfLRX3qyZOfLX3+uZGcpT8IA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaDJtPSFn/rI+0re8PYjd0t2kmR3OJbsDDUzSZL5+lSysxoOJTtJMhz3JTtThpKdJNncOC/Zubq6LNlJkow1v0mHVd1v2/k0l+wMQ81OkhxTc40f9jU7STKta875KXXnKVPNMY3TumQnScZVzT1cdY2vVmPJTpI8/uitkp3f+NjTJTtLeRIGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCbT0hd+4Kl3lbzhPAwlO0myG9clO8dx8Wl4qO2tsWTncHVdspMk+4t9yc7qVLOTJIdD0dBqWzSUZKq5nq4evFaykySnzCU7U91tl2lVNDbW3CtJMh9PJTunwu+nYVXzjFPzyd60Kvt8NTurqmspybiquZ7e99S7S3aW8iQMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaTEtfePfWeckbjvOxZCdJ7t6+XbKzHet+ixwuXi/Zeeyjj5XsJMnz/zmW7HzvP35QspMkx/lUNHSo2UmyO16W7Fzt9iU7SZK5ZmZTcwkkSU7rmrFxVXffDUUnquiqfNNcc0zTUHieis75ajWU7FQdT5JstuuSnUceuVWys5QnYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoMi194fFU84YPLq5rhpI88Z7HSnaeeu97SnaSZLV/tGTnqQ++u2QnSe7vas75C8+9ULKTJPO4+NJ7S4f9rmQnSa6u9yU7b1weSnaSZDjV3Hg3b6xLdpJkHmqOaT7Wnacb2ZTsnIaSmSTJsWhsWNU9K63WZyU74ziW7FSe7+t9zf273dZcS0t5EgaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE2mpS8cV0PJG67Guu5f7OaSnfuXp5KdJDk/25bsvPjyg5KdJLl+7bpk59Gb5yU7SXLv+liyMxReT+NqLNm5urwo2UmS6wc118H5+pGSnSTJtPhr4y0Nhc8Ax9OhZOdQ91WQw7Hm+2k91VyXSTKer0t2NmPNNXB9qDlHSTJtaj7brRs1rVvKkzAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANJmWvnC7GUve8LFH75bsJMnpVLPzX999sWYoydnNbcnOnfO630frU83WuDkr2UmSx7Mv2XkwlMwkSfZFH+/xR27UDCXJnZrr6cbZ4lv9oeainXGsu8arjmlYFV5Qh5ovqPl0LNlJkvlQc98dVjV/u9Vcdw3c2G5Kdi4ui8KykCdhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQZFr6wrP1WPKGN7eL3/Khbt88L9m5dadmJ0leee1Byc75uC7ZSZLd7bslO9tXXi/ZSZJpqPn9tz7fluwkyfpsV7Jzq/CYLnf7mqHjsWYnSTKXrBxONTtJcij6eHPhY8m4rvmuOw5DyU6SvL47lexMx5p7ZT3WdCVJzvaHkp0HF9clO0t5EgaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJtPSF17cu1/yho/+wlCykyTXh33JzvGq7rfI7c1YsrM/HEt2kmSz2pTsPPHYoyU7SXLv3r2SnfWp7jzdHGv+dpmKdpIMp7lk57iqu8ZPc80xDfOpZCdJcqjZOhYe0mpdcx1st+uSnSQZq67xovN08+ZZzVCSaaxpy6uv17RuKU/CANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgybT0hV/73kslb/jZZz9SspMkw35XsrO7nkt2kuT2nVslOy/84NWSnSTZX51Kdo7Hut9s87gu2dkOJTNJkv1+X7IzFR7TZqwZ26fuoPZzzdZYeEzjaizZOdZ9FWRfNVZz+yZJxnXNPbw535TsDEPdNXA4Hkp2nn/hhyU7S3kSBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAm09IX/tVXvl3yhp/+2AdKdpLkMx/8xZKd43FXspMkm3Eu2XnysZslO0nyjX99vmTntYwlO0lyvl6X7Mw1pztJsplqxnaHY8lOkgzHmq3b203JTpJcnWqOabevO0+noWio8Ho6nGrGhqrPlmR7XnPfbTeL0/GWztd11+XF5b5k5+/+4d9KdpbyJAwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJpM7/Qb/vt3Xirb+pVn3l8zNA01O0l2L98r2VlPdX+azXos2ZkudyU7SXL71s2Snf2p7nfkrRtnJTs37tZ8tiS5vv+gZGe+uirZSZKh6DI429Zcl0lyWNXcwxfXx5KdJJnWNce0Guu+C9ZzzdZ6rLlXzs/q7pV//Jdvlm29kzwJA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0CTafErh5o3/LMv/lPNUJKPf+j9JTu/9elPluwkyf56V7Lzysv3SnaS5O6dx2uG1lc1O0nO5pqdm7e2NUNJpmks2dkfDiU7SXK5qvl8481TyU6SnM81W/NQdBEkGYea54nrU90x7YumVmPNdZkkq1XN1nralOx867svlewkyV/87VdKdtZ1p3sRT8IA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaDJtPSFdzfrkjd87XpfspMkf/T5vyzZ+eh775bsJMnHn3m6ZOfeWDKTJDleXZfs/PytbclOklzvDiU7F5c1ny1JdplLdk7HU8lOkgxFxzRNdRfUdh5KdurOUjKONc8Td84WfyU+1IPrmmt8LjrfSXLjZs09/J3v/3fJzp98/q9LdiqtVoVfvkve7x19NwDg/4gwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmgzzPM+LXjgMJW/4xM1tyU6SvPzgumTnk4/XfLYk+YPf++2SnSff9WjJTpK8enUo2blzti7ZSZJhPZbsXB2OJTtJsj8tuhUeanWsOd9JcnGsOaaz7VSykyRzao5pv+yrZ5HDquYePptqrsskOcynkp0HV/uSnST57gv/U7Lzp1/4UsnO91/dlewkye2zTcnOG1d1x7Qkr56EAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAk2Ge53nRC4fh7T6WH9mTd85Kdl58/apkp9If/vavlm196kPvK9mZtuuSnSRZjTXX0zTU/Y68Op1KdjaFt8r9q33N0KruoG6eb0p2dln01bPIZqy5Dk7HmmsgSV59cFGy85Vvfb9kJ0n++AtfLduqcPus7jvljap7pdCSvHoSBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmwzzP86IXDsPbfSxtfu7Wpmzrpfu7sq0qzz55t2Rn3NSdp8Oyy+6hNqu663J3OJbsrKepZCdJqj7e5dW+ZijJdlPz+Yax7hlgM9acqIVfh4u8+MPXSnaeu3dVslPp9lnNNfDG1aFk58fVkuvJkzAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBnmeZ4XvXAY3u5j+alwe13zu+biuOjPssjxVLcFvMMKv3vPx5qty8OpZOen3ZK8ehIGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNpqUvnOf57TwOAPiZ40kYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJr8L8H89Up0ZsB2AAAAAElFTkSuQmCC", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeEAAAHiCAYAAADf3nSgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAATlUlEQVR4nO3cS691B13H8d/aa+29z3muvUDRlipFuQdIVRIlkjDDmDhw4BvwVTjwJTgycWQckpgYJ8pIowkwImIUjAQULBVaS7H2afs857Jvy0ETh30W8d/+uXw+453fXnudtfZ3r8kZ5nmeAwC841bdBwAAP6tEGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE2mpS8chuHtPI6fGuOP4Xk6+qdo8BNrWtV9p5yKvgtOvlIWWfIPKT0JA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0CTqfsAfhxsxrrfIrvjqWyryt2zmj/zrc1YspMkV1Xnaa6ZSZK5aOt0qjuo60PNeRpKVt60Khobq4aSHIrO+dm67hrfFR3TG9eHkp1KU9Hfrurv9pPMkzAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgydR/A/8f5VPMb4vJwKtlJkrNxKNn53K/9cslOktzYbkt2ppqPliQ5H8eSnavdvmQnSXbHmutgd30o2UmSw2ku2RnHut/bZZdBzUdLkkxF3wWb7bpkJ0nmueYDvnL/omQnSb70jedLdi52NffKpvC6rLp/32mehAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgyTDP87zohcNQ8obbqa7714dTyc7Tj90s2UmSz37imZKdx2/fKNlJkvuXh5Kdy/2+ZCdJ1qua6+B0qrkGkuTquubz1dwpb1qtatZWRfdvkiz8ynioofBMrddjzVDheaqaOj/f1gwluXdxVbLzxX/+dsnOC/cuS3aSZDPWfKfsjnXfKUvuFU/CANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgyTDP87zkhWfTWPKG18dTyU6SPPnIjZKd3/nNj5fsJMl2PZXs3Lt/VbKTJMdTzW+tTeFPttN8qBqq2UkyzzXX5nFf9NmSTGPNfbeZ6v54h6p7eKg7pnEcSnaOx7rr6bjsq/WhxqLv3iSZpprvp/uXNd9Pf/PVb5bsJMkrD/YlO9Oq5lpKkv2Ce8WTMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0GeZ5nhe9cBje7mP5kf3+554t2Xn33VslO0ly72JfsnM81Z3vB7tDyc75pu432/l6LNm5vrwu2UmSy8urkp3KW2UqGltvppKdJMmib4yHq/xOOS37GnuoOYXHVLRT+bdbr2u2tkXH9OIrr5fsJMmf//3XSnbGVd01cDg+/CrwJAwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJpM7/Qb/u6vf7hs6+knHi/ZuffgQclOkpxvak7p5XEo2UmScVWztV3X/WarmjptxpqhJIdTzdZmVfjbtugyOB5ONUNJNut1yc6cuWQnSaZV0d9uuynZSZL98ViyM8+F52mquTbnoebCfO97HivZSZLPfOKZkp0vf/25kp2lPAkDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQJNp6Qvvni9+6Vt69sPPlOwkyXa9LdmZ1puSnSR5/eK6ZGd/OJXsJMmdbc3nm3Is2UmS/e6qZGce5pKdJNlsaq7xcRhKdpJkGmt+Jw81t0qSZLMuOk9T4TPAMJbMHOe662k+1OwMQ915GlY112bVdbmaav5uSfLRX3qyZOfLX3+uZGcpT8IA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaDJtPSFn/rI+0re8PYjd0t2kmR3OJbsDDUzSZL5+lSysxoOJTtJMhz3JTtThpKdJNncOC/Zubq6LNlJkow1v0mHVd1v2/k0l+wMQ81OkhxTc40f9jU7STKta875KXXnKVPNMY3TumQnScZVzT1cdY2vVmPJTpI8/uitkp3f+NjTJTtLeRIGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCbT0hd+4Kl3lbzhPAwlO0myG9clO8dx8Wl4qO2tsWTncHVdspMk+4t9yc7qVLOTJIdD0dBqWzSUZKq5nq4evFaykySnzCU7U91tl2lVNDbW3CtJMh9PJTunwu+nYVXzjFPzyd60Kvt8NTurqmspybiquZ7e99S7S3aW8iQMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaTEtfePfWeckbjvOxZCdJ7t6+XbKzHet+ixwuXi/Zeeyjj5XsJMnz/zmW7HzvP35QspMkx/lUNHSo2UmyO16W7Fzt9iU7SZK5ZmZTcwkkSU7rmrFxVXffDUUnquiqfNNcc0zTUHieis75ajWU7FQdT5JstuuSnUceuVWys5QnYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoMi194fFU84YPLq5rhpI88Z7HSnaeeu97SnaSZLV/tGTnqQ++u2QnSe7vas75C8+9ULKTJPO4+NJ7S4f9rmQnSa6u9yU7b1weSnaSZDjV3Hg3b6xLdpJkHmqOaT7Wnacb2ZTsnIaSmSTJsWhsWNU9K63WZyU74ziW7FSe7+t9zf273dZcS0t5EgaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE2mpS8cV0PJG67Guu5f7OaSnfuXp5KdJDk/25bsvPjyg5KdJLl+7bpk59Gb5yU7SXLv+liyMxReT+NqLNm5urwo2UmS6wc118H5+pGSnSTJtPhr4y0Nhc8Ax9OhZOdQ91WQw7Hm+2k91VyXSTKer0t2NmPNNXB9qDlHSTJtaj7brRs1rVvKkzAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANJmWvnC7GUve8LFH75bsJMnpVLPzX999sWYoydnNbcnOnfO630frU83WuDkr2UmSx7Mv2XkwlMwkSfZFH+/xR27UDCXJnZrr6cbZ4lv9oeainXGsu8arjmlYFV5Qh5ovqPl0LNlJkvlQc98dVjV/u9Vcdw3c2G5Kdi4ui8KykCdhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQZFr6wrP1WPKGN7eL3/Khbt88L9m5dadmJ0leee1Byc75uC7ZSZLd7bslO9tXXi/ZSZJpqPn9tz7fluwkyfpsV7Jzq/CYLnf7mqHjsWYnSTKXrBxONTtJcij6eHPhY8m4rvmuOw5DyU6SvL47lexMx5p7ZT3WdCVJzvaHkp0HF9clO0t5EgaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJtPSF17cu1/yho/+wlCykyTXh33JzvGq7rfI7c1YsrM/HEt2kmSz2pTsPPHYoyU7SXLv3r2SnfWp7jzdHGv+dpmKdpIMp7lk57iqu8ZPc80xDfOpZCdJcqjZOhYe0mpdcx1st+uSnSQZq67xovN08+ZZzVCSaaxpy6uv17RuKU/CANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgybT0hV/73kslb/jZZz9SspMkw35XsrO7nkt2kuT2nVslOy/84NWSnSTZX51Kdo7Hut9s87gu2dkOJTNJkv1+X7IzFR7TZqwZ26fuoPZzzdZYeEzjaizZOdZ9FWRfNVZz+yZJxnXNPbw535TsDEPdNXA4Hkp2nn/hhyU7S3kSBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAm09IX/tVXvl3yhp/+2AdKdpLkMx/8xZKd43FXspMkm3Eu2XnysZslO0nyjX99vmTntYwlO0lyvl6X7Mw1pztJsplqxnaHY8lOkgzHmq3b203JTpJcnWqOabevO0+noWio8Ho6nGrGhqrPlmR7XnPfbTeL0/GWztd11+XF5b5k5+/+4d9KdpbyJAwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJpM7/Qb/vt3Xirb+pVn3l8zNA01O0l2L98r2VlPdX+azXos2ZkudyU7SXL71s2Snf2p7nfkrRtnJTs37tZ8tiS5vv+gZGe+uirZSZKh6DI429Zcl0lyWNXcwxfXx5KdJJnWNce0Guu+C9ZzzdZ6rLlXzs/q7pV//Jdvlm29kzwJA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0CTafErh5o3/LMv/lPNUJKPf+j9JTu/9elPluwkyf56V7Lzysv3SnaS5O6dx2uG1lc1O0nO5pqdm7e2NUNJpmks2dkfDiU7SXK5qvl8481TyU6SnM81W/NQdBEkGYea54nrU90x7YumVmPNdZkkq1XN1nralOx867svlewkyV/87VdKdtZ1p3sRT8IA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaDJtPSFdzfrkjd87XpfspMkf/T5vyzZ+eh775bsJMnHn3m6ZOfeWDKTJDleXZfs/PytbclOklzvDiU7F5c1ny1JdplLdk7HU8lOkgxFxzRNdRfUdh5KdurOUjKONc8Td84WfyU+1IPrmmt8LjrfSXLjZs09/J3v/3fJzp98/q9LdiqtVoVfvkve7x19NwDg/4gwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmgzzPM+LXjgMJW/4xM1tyU6SvPzgumTnk4/XfLYk+YPf++2SnSff9WjJTpK8enUo2blzti7ZSZJhPZbsXB2OJTtJsj8tuhUeanWsOd9JcnGsOaaz7VSykyRzao5pv+yrZ5HDquYePptqrsskOcynkp0HV/uSnST57gv/U7Lzp1/4UsnO91/dlewkye2zTcnOG1d1x7Qkr56EAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAk2Ge53nRC4fh7T6WH9mTd85Kdl58/apkp9If/vavlm196kPvK9mZtuuSnSRZjTXX0zTU/Y68Op1KdjaFt8r9q33N0KruoG6eb0p2dln01bPIZqy5Dk7HmmsgSV59cFGy85Vvfb9kJ0n++AtfLduqcPus7jvljap7pdCSvHoSBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmwzzP86IXDsPbfSxtfu7Wpmzrpfu7sq0qzz55t2Rn3NSdp8Oyy+6hNqu663J3OJbsrKepZCdJqj7e5dW+ZijJdlPz+Yax7hlgM9acqIVfh4u8+MPXSnaeu3dVslPp9lnNNfDG1aFk58fVkuvJkzAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBnmeZ4XvXAY3u5j+alwe13zu+biuOjPssjxVLcFvMMKv3vPx5qty8OpZOen3ZK8ehIGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNpqUvnOf57TwOAPiZ40kYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJr8L8H89Up0ZsB2AAAAAElFTkSuQmCC", "text/plain": [ "
" ] @@ -1858,7 +1879,7 @@ }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeEAAAHiCAYAAADf3nSgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAATlUlEQVR4nO3cS691B13H8d/aa+29z3muvUDRlipFuQdIVRIlkjDDmDhw4BvwVTjwJTgycWQckpgYJ8pIowkwImIUjAQULBVaS7H2afs857Jvy0ETh30W8d/+uXw+453fXnudtfZ3r8kZ5nmeAwC841bdBwAAP6tEGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE2mpS8chuHtPI6fGuOP4Xk6+qdo8BNrWtV9p5yKvgtOvlIWWfIPKT0JA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0CTqfsAfhxsxrrfIrvjqWyryt2zmj/zrc1YspMkV1Xnaa6ZSZK5aOt0qjuo60PNeRpKVt60Khobq4aSHIrO+dm67hrfFR3TG9eHkp1KU9Hfrurv9pPMkzAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgydR/A/8f5VPMb4vJwKtlJkrNxKNn53K/9cslOktzYbkt2ppqPliQ5H8eSnavdvmQnSXbHmutgd30o2UmSw2ku2RnHut/bZZdBzUdLkkxF3wWb7bpkJ0nmueYDvnL/omQnSb70jedLdi52NffKpvC6rLp/32mehAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgyTDP87zohcNQ8obbqa7714dTyc7Tj90s2UmSz37imZKdx2/fKNlJkvuXh5Kdy/2+ZCdJ1qua6+B0qrkGkuTquubz1dwpb1qtatZWRfdvkiz8ynioofBMrddjzVDheaqaOj/f1gwluXdxVbLzxX/+dsnOC/cuS3aSZDPWfKfsjnXfKUvuFU/CANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgyTDP87zkhWfTWPKG18dTyU6SPPnIjZKd3/nNj5fsJMl2PZXs3Lt/VbKTJMdTzW+tTeFPttN8qBqq2UkyzzXX5nFf9NmSTGPNfbeZ6v54h6p7eKg7pnEcSnaOx7rr6bjsq/WhxqLv3iSZpprvp/uXNd9Pf/PVb5bsJMkrD/YlO9Oq5lpKkv2Ce8WTMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0GeZ5nhe9cBje7mP5kf3+554t2Xn33VslO0ly72JfsnM81Z3vB7tDyc75pu432/l6LNm5vrwu2UmSy8urkp3KW2UqGltvppKdJMmib4yHq/xOOS37GnuoOYXHVLRT+bdbr2u2tkXH9OIrr5fsJMmf//3XSnbGVd01cDg+/CrwJAwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJpM7/Qb/u6vf7hs6+knHi/ZuffgQclOkpxvak7p5XEo2UmScVWztV3X/WarmjptxpqhJIdTzdZmVfjbtugyOB5ONUNJNut1yc6cuWQnSaZV0d9uuynZSZL98ViyM8+F52mquTbnoebCfO97HivZSZLPfOKZkp0vf/25kp2lPAkDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQJNp6Qvvni9+6Vt69sPPlOwkyXa9LdmZ1puSnSR5/eK6ZGd/OJXsJMmdbc3nm3Is2UmS/e6qZGce5pKdJNlsaq7xcRhKdpJkGmt+Jw81t0qSZLMuOk9T4TPAMJbMHOe662k+1OwMQ915GlY112bVdbmaav5uSfLRX3qyZOfLX3+uZGcpT8IA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaDJtPSFn/rI+0re8PYjd0t2kmR3OJbsDDUzSZL5+lSysxoOJTtJMhz3JTtThpKdJNncOC/Zubq6LNlJkow1v0mHVd1v2/k0l+wMQ81OkhxTc40f9jU7STKta875KXXnKVPNMY3TumQnScZVzT1cdY2vVmPJTpI8/uitkp3f+NjTJTtLeRIGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCbT0hd+4Kl3lbzhPAwlO0myG9clO8dx8Wl4qO2tsWTncHVdspMk+4t9yc7qVLOTJIdD0dBqWzSUZKq5nq4evFaykySnzCU7U91tl2lVNDbW3CtJMh9PJTunwu+nYVXzjFPzyd60Kvt8NTurqmspybiquZ7e99S7S3aW8iQMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaTEtfePfWeckbjvOxZCdJ7t6+XbKzHet+ixwuXi/Zeeyjj5XsJMnz/zmW7HzvP35QspMkx/lUNHSo2UmyO16W7Fzt9iU7SZK5ZmZTcwkkSU7rmrFxVXffDUUnquiqfNNcc0zTUHieis75ajWU7FQdT5JstuuSnUceuVWys5QnYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoMi194fFU84YPLq5rhpI88Z7HSnaeeu97SnaSZLV/tGTnqQ++u2QnSe7vas75C8+9ULKTJPO4+NJ7S4f9rmQnSa6u9yU7b1weSnaSZDjV3Hg3b6xLdpJkHmqOaT7Wnacb2ZTsnIaSmSTJsWhsWNU9K63WZyU74ziW7FSe7+t9zf273dZcS0t5EgaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE2mpS8cV0PJG67Guu5f7OaSnfuXp5KdJDk/25bsvPjyg5KdJLl+7bpk59Gb5yU7SXLv+liyMxReT+NqLNm5urwo2UmS6wc118H5+pGSnSTJtPhr4y0Nhc8Ax9OhZOdQ91WQw7Hm+2k91VyXSTKer0t2NmPNNXB9qDlHSTJtaj7brRs1rVvKkzAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANJmWvnC7GUve8LFH75bsJMnpVLPzX999sWYoydnNbcnOnfO630frU83WuDkr2UmSx7Mv2XkwlMwkSfZFH+/xR27UDCXJnZrr6cbZ4lv9oeainXGsu8arjmlYFV5Qh5ovqPl0LNlJkvlQc98dVjV/u9Vcdw3c2G5Kdi4ui8KykCdhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQZFr6wrP1WPKGN7eL3/Khbt88L9m5dadmJ0leee1Byc75uC7ZSZLd7bslO9tXXi/ZSZJpqPn9tz7fluwkyfpsV7Jzq/CYLnf7mqHjsWYnSTKXrBxONTtJcij6eHPhY8m4rvmuOw5DyU6SvL47lexMx5p7ZT3WdCVJzvaHkp0HF9clO0t5EgaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJtPSF17cu1/yho/+wlCykyTXh33JzvGq7rfI7c1YsrM/HEt2kmSz2pTsPPHYoyU7SXLv3r2SnfWp7jzdHGv+dpmKdpIMp7lk57iqu8ZPc80xDfOpZCdJcqjZOhYe0mpdcx1st+uSnSQZq67xovN08+ZZzVCSaaxpy6uv17RuKU/CANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgybT0hV/73kslb/jZZz9SspMkw35XsrO7nkt2kuT2nVslOy/84NWSnSTZX51Kdo7Hut9s87gu2dkOJTNJkv1+X7IzFR7TZqwZ26fuoPZzzdZYeEzjaizZOdZ9FWRfNVZz+yZJxnXNPbw535TsDEPdNXA4Hkp2nn/hhyU7S3kSBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAm09IX/tVXvl3yhp/+2AdKdpLkMx/8xZKd43FXspMkm3Eu2XnysZslO0nyjX99vmTntYwlO0lyvl6X7Mw1pztJsplqxnaHY8lOkgzHmq3b203JTpJcnWqOabevO0+noWio8Ho6nGrGhqrPlmR7XnPfbTeL0/GWztd11+XF5b5k5+/+4d9KdpbyJAwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJpM7/Qb/vt3Xirb+pVn3l8zNA01O0l2L98r2VlPdX+azXos2ZkudyU7SXL71s2Snf2p7nfkrRtnJTs37tZ8tiS5vv+gZGe+uirZSZKh6DI429Zcl0lyWNXcwxfXx5KdJJnWNce0Guu+C9ZzzdZ6rLlXzs/q7pV//Jdvlm29kzwJA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0CTafErh5o3/LMv/lPNUJKPf+j9JTu/9elPluwkyf56V7Lzysv3SnaS5O6dx2uG1lc1O0nO5pqdm7e2NUNJpmks2dkfDiU7SXK5qvl8481TyU6SnM81W/NQdBEkGYea54nrU90x7YumVmPNdZkkq1XN1nralOx867svlewkyV/87VdKdtZ1p3sRT8IA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaDJtPSFdzfrkjd87XpfspMkf/T5vyzZ+eh775bsJMnHn3m6ZOfeWDKTJDleXZfs/PytbclOklzvDiU7F5c1ny1JdplLdk7HU8lOkgxFxzRNdRfUdh5KdurOUjKONc8Td84WfyU+1IPrmmt8LjrfSXLjZs09/J3v/3fJzp98/q9LdiqtVoVfvkve7x19NwDg/4gwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmgzzPM+LXjgMJW/4xM1tyU6SvPzgumTnk4/XfLYk+YPf++2SnSff9WjJTpK8enUo2blzti7ZSZJhPZbsXB2OJTtJsj8tuhUeanWsOd9JcnGsOaaz7VSykyRzao5pv+yrZ5HDquYePptqrsskOcynkp0HV/uSnST57gv/U7Lzp1/4UsnO91/dlewkye2zTcnOG1d1x7Qkr56EAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAk2Ge53nRC4fh7T6WH9mTd85Kdl58/apkp9If/vavlm196kPvK9mZtuuSnSRZjTXX0zTU/Y68Op1KdjaFt8r9q33N0KruoG6eb0p2dln01bPIZqy5Dk7HmmsgSV59cFGy85Vvfb9kJ0n++AtfLduqcPus7jvljap7pdCSvHoSBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmwzzP86IXDsPbfSxtfu7Wpmzrpfu7sq0qzz55t2Rn3NSdp8Oyy+6hNqu663J3OJbsrKepZCdJqj7e5dW+ZijJdlPz+Yax7hlgM9acqIVfh4u8+MPXSnaeu3dVslPp9lnNNfDG1aFk58fVkuvJkzAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBnmeZ4XvXAY3u5j+alwe13zu+biuOjPssjxVLcFvMMKv3vPx5qty8OpZOen3ZK8ehIGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNpqUvnOf57TwOAPiZ40kYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJr8L8H89Up0ZsB2AAAAAElFTkSuQmCC", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeEAAAHiCAYAAADf3nSgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAATlUlEQVR4nO3cS691B13H8d/aa+29z3muvUDRlipFuQdIVRIlkjDDmDhw4BvwVTjwJTgycWQckpgYJ8pIowkwImIUjAQULBVaS7H2afs857Jvy0ETh30W8d/+uXw+453fXnudtfZ3r8kZ5nmeAwC841bdBwAAP6tEGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE2mpS8chuHtPI6fGuOP4Xk6+qdo8BNrWtV9p5yKvgtOvlIWWfIPKT0JA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0CTqfsAfhxsxrrfIrvjqWyryt2zmj/zrc1YspMkV1Xnaa6ZSZK5aOt0qjuo60PNeRpKVt60Khobq4aSHIrO+dm67hrfFR3TG9eHkp1KU9Hfrurv9pPMkzAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgydR/A/8f5VPMb4vJwKtlJkrNxKNn53K/9cslOktzYbkt2ppqPliQ5H8eSnavdvmQnSXbHmutgd30o2UmSw2ku2RnHut/bZZdBzUdLkkxF3wWb7bpkJ0nmueYDvnL/omQnSb70jedLdi52NffKpvC6rLp/32mehAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgyTDP87zohcNQ8obbqa7714dTyc7Tj90s2UmSz37imZKdx2/fKNlJkvuXh5Kdy/2+ZCdJ1qua6+B0qrkGkuTquubz1dwpb1qtatZWRfdvkiz8ynioofBMrddjzVDheaqaOj/f1gwluXdxVbLzxX/+dsnOC/cuS3aSZDPWfKfsjnXfKUvuFU/CANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgyTDP87zkhWfTWPKG18dTyU6SPPnIjZKd3/nNj5fsJMl2PZXs3Lt/VbKTJMdTzW+tTeFPttN8qBqq2UkyzzXX5nFf9NmSTGPNfbeZ6v54h6p7eKg7pnEcSnaOx7rr6bjsq/WhxqLv3iSZpprvp/uXNd9Pf/PVb5bsJMkrD/YlO9Oq5lpKkv2Ce8WTMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0GeZ5nhe9cBje7mP5kf3+554t2Xn33VslO0ly72JfsnM81Z3vB7tDyc75pu432/l6LNm5vrwu2UmSy8urkp3KW2UqGltvppKdJMmib4yHq/xOOS37GnuoOYXHVLRT+bdbr2u2tkXH9OIrr5fsJMmf//3XSnbGVd01cDg+/CrwJAwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJpM7/Qb/u6vf7hs6+knHi/ZuffgQclOkpxvak7p5XEo2UmScVWztV3X/WarmjptxpqhJIdTzdZmVfjbtugyOB5ONUNJNut1yc6cuWQnSaZV0d9uuynZSZL98ViyM8+F52mquTbnoebCfO97HivZSZLPfOKZkp0vf/25kp2lPAkDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQJNp6Qvvni9+6Vt69sPPlOwkyXa9LdmZ1puSnSR5/eK6ZGd/OJXsJMmdbc3nm3Is2UmS/e6qZGce5pKdJNlsaq7xcRhKdpJkGmt+Jw81t0qSZLMuOk9T4TPAMJbMHOe662k+1OwMQ915GlY112bVdbmaav5uSfLRX3qyZOfLX3+uZGcpT8IA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaDJtPSFn/rI+0re8PYjd0t2kmR3OJbsDDUzSZL5+lSysxoOJTtJMhz3JTtThpKdJNncOC/Zubq6LNlJkow1v0mHVd1v2/k0l+wMQ81OkhxTc40f9jU7STKta875KXXnKVPNMY3TumQnScZVzT1cdY2vVmPJTpI8/uitkp3f+NjTJTtLeRIGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCbT0hd+4Kl3lbzhPAwlO0myG9clO8dx8Wl4qO2tsWTncHVdspMk+4t9yc7qVLOTJIdD0dBqWzSUZKq5nq4evFaykySnzCU7U91tl2lVNDbW3CtJMh9PJTunwu+nYVXzjFPzyd60Kvt8NTurqmspybiquZ7e99S7S3aW8iQMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaTEtfePfWeckbjvOxZCdJ7t6+XbKzHet+ixwuXi/Zeeyjj5XsJMnz/zmW7HzvP35QspMkx/lUNHSo2UmyO16W7Fzt9iU7SZK5ZmZTcwkkSU7rmrFxVXffDUUnquiqfNNcc0zTUHieis75ajWU7FQdT5JstuuSnUceuVWys5QnYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoMi194fFU84YPLq5rhpI88Z7HSnaeeu97SnaSZLV/tGTnqQ++u2QnSe7vas75C8+9ULKTJPO4+NJ7S4f9rmQnSa6u9yU7b1weSnaSZDjV3Hg3b6xLdpJkHmqOaT7Wnacb2ZTsnIaSmSTJsWhsWNU9K63WZyU74ziW7FSe7+t9zf273dZcS0t5EgaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE2mpS8cV0PJG67Guu5f7OaSnfuXp5KdJDk/25bsvPjyg5KdJLl+7bpk59Gb5yU7SXLv+liyMxReT+NqLNm5urwo2UmS6wc118H5+pGSnSTJtPhr4y0Nhc8Ax9OhZOdQ91WQw7Hm+2k91VyXSTKer0t2NmPNNXB9qDlHSTJtaj7brRs1rVvKkzAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANJmWvnC7GUve8LFH75bsJMnpVLPzX999sWYoydnNbcnOnfO630frU83WuDkr2UmSx7Mv2XkwlMwkSfZFH+/xR27UDCXJnZrr6cbZ4lv9oeainXGsu8arjmlYFV5Qh5ovqPl0LNlJkvlQc98dVjV/u9Vcdw3c2G5Kdi4ui8KykCdhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQZFr6wrP1WPKGN7eL3/Khbt88L9m5dadmJ0leee1Byc75uC7ZSZLd7bslO9tXXi/ZSZJpqPn9tz7fluwkyfpsV7Jzq/CYLnf7mqHjsWYnSTKXrBxONTtJcij6eHPhY8m4rvmuOw5DyU6SvL47lexMx5p7ZT3WdCVJzvaHkp0HF9clO0t5EgaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJtPSF17cu1/yho/+wlCykyTXh33JzvGq7rfI7c1YsrM/HEt2kmSz2pTsPPHYoyU7SXLv3r2SnfWp7jzdHGv+dpmKdpIMp7lk57iqu8ZPc80xDfOpZCdJcqjZOhYe0mpdcx1st+uSnSQZq67xovN08+ZZzVCSaaxpy6uv17RuKU/CANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgybT0hV/73kslb/jZZz9SspMkw35XsrO7nkt2kuT2nVslOy/84NWSnSTZX51Kdo7Hut9s87gu2dkOJTNJkv1+X7IzFR7TZqwZ26fuoPZzzdZYeEzjaizZOdZ9FWRfNVZz+yZJxnXNPbw535TsDEPdNXA4Hkp2nn/hhyU7S3kSBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAm09IX/tVXvl3yhp/+2AdKdpLkMx/8xZKd43FXspMkm3Eu2XnysZslO0nyjX99vmTntYwlO0lyvl6X7Mw1pztJsplqxnaHY8lOkgzHmq3b203JTpJcnWqOabevO0+noWio8Ho6nGrGhqrPlmR7XnPfbTeL0/GWztd11+XF5b5k5+/+4d9KdpbyJAwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJpM7/Qb/vt3Xirb+pVn3l8zNA01O0l2L98r2VlPdX+azXos2ZkudyU7SXL71s2Snf2p7nfkrRtnJTs37tZ8tiS5vv+gZGe+uirZSZKh6DI429Zcl0lyWNXcwxfXx5KdJJnWNce0Guu+C9ZzzdZ6rLlXzs/q7pV//Jdvlm29kzwJA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0CTafErh5o3/LMv/lPNUJKPf+j9JTu/9elPluwkyf56V7Lzysv3SnaS5O6dx2uG1lc1O0nO5pqdm7e2NUNJpmks2dkfDiU7SXK5qvl8481TyU6SnM81W/NQdBEkGYea54nrU90x7YumVmPNdZkkq1XN1nralOx867svlewkyV/87VdKdtZ1p3sRT8IA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaDJtPSFdzfrkjd87XpfspMkf/T5vyzZ+eh775bsJMnHn3m6ZOfeWDKTJDleXZfs/PytbclOklzvDiU7F5c1ny1JdplLdk7HU8lOkgxFxzRNdRfUdh5KdurOUjKONc8Td84WfyU+1IPrmmt8LjrfSXLjZs09/J3v/3fJzp98/q9LdiqtVoVfvkve7x19NwDg/4gwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmgzzPM+LXjgMJW/4xM1tyU6SvPzgumTnk4/XfLYk+YPf++2SnSff9WjJTpK8enUo2blzti7ZSZJhPZbsXB2OJTtJsj8tuhUeanWsOd9JcnGsOaaz7VSykyRzao5pv+yrZ5HDquYePptqrsskOcynkp0HV/uSnST57gv/U7Lzp1/4UsnO91/dlewkye2zTcnOG1d1x7Qkr56EAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAk2Ge53nRC4fh7T6WH9mTd85Kdl58/apkp9If/vavlm196kPvK9mZtuuSnSRZjTXX0zTU/Y68Op1KdjaFt8r9q33N0KruoG6eb0p2dln01bPIZqy5Dk7HmmsgSV59cFGy85Vvfb9kJ0n++AtfLduqcPus7jvljap7pdCSvHoSBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmwzzP86IXDsPbfSxtfu7Wpmzrpfu7sq0qzz55t2Rn3NSdp8Oyy+6hNqu663J3OJbsrKepZCdJqj7e5dW+ZijJdlPz+Yax7hlgM9acqIVfh4u8+MPXSnaeu3dVslPp9lnNNfDG1aFk58fVkuvJkzAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBFhAGgiwgDQRIQBoIkIA0ATEQaAJiIMAE1EGACaiDAANBnmeZ4XvXAY3u5j+alwe13zu+biuOjPssjxVLcFvMMKv3vPx5qty8OpZOen3ZK8ehIGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNRBgAmogwADQRYQBoIsIA0ESEAaCJCANAExEGgCYiDABNpqUvnOf57TwOAPiZ40kYAJqIMAA0EWEAaCLCANBEhAGgiQgDQBMRBoAmIgwATUQYAJr8L8H89Up0ZsB2AAAAAElFTkSuQmCC", "text/plain": [ "
" ] diff --git a/pheno_utils/config_setup/config_tre.json b/pheno_utils/config_setup/config_tre.json index acd8ece..8639129 100644 --- a/pheno_utils/config_setup/config_tre.json +++ b/pheno_utils/config_setup/config_tre.json @@ -8,6 +8,8 @@ "s3://.*/fundus/10k/": "/home/ec2-user/studies/fundus_bulk/", "s3://.*/human_genetics/10k/": "/home/ec2-user/studies/human_genetics_bulk/", "s3://.*/gut_microbiome/10k/": "/home/ec2-user/studies/gut_microbiome_bulk/", + "s3://.*/oral_microbiome/10k/": "/home/ec2-user/studies/oral_microbiome_bulk/", + "s3://.*/rna_seq/10k/": "/home/ec2-user/studies/rna_seq_bulk/", "\\./": "/home/ec2-user/studies/hpp_datasets/{dataset}/" } } \ No newline at end of file diff --git a/pheno_utils/config_setup/config_tre_mbz.json b/pheno_utils/config_setup/config_tre_mbz.json index ce967d9..f7c51e7 100644 --- a/pheno_utils/config_setup/config_tre_mbz.json +++ b/pheno_utils/config_setup/config_tre_mbz.json @@ -7,6 +7,8 @@ "s3://.*/fundus/10k/": "/home/ec2-user/studies/fundus_bulk/", "s3://.*/human_genetics/10k/": "/home/ec2-user/studies/human_genetics_bulk/", "s3://.*/gut_microbiome/10k/": "/home/ec2-user/studies/gut_microbiome_bulk/", + "s3://.*/oral_microbiome/10k/": "/home/ec2-user/studies/oral_microbiome_bulk/", + "s3://.*/rna_seq/10k/": "/home/ec2-user/studies/rna_seq_bulk/", "\\./": "/home/ec2-user/studies/{dataset}/" } } \ No newline at end of file diff --git a/pheno_utils/pheno_loader.py b/pheno_utils/pheno_loader.py index 7548979..c603337 100644 --- a/pheno_utils/pheno_loader.py +++ b/pheno_utils/pheno_loader.py @@ -467,14 +467,19 @@ def get(self, fields: Union[str,List[str]], flexible: bool=None, not_bulk_field= continue if table_name == 'age_sex': - keep_undefined = True + # The 'age_sex' table does not contain 'undefined', so the merge will not cause a Cartesian product + keep_undefined = True + # Left join to keep only rows with real data points + how = 'left' else: keep_undefined = keep_undefined_research_stage + how = 'outer' data = self.__concat__( data, df_fields, - keep_undefined + keep_undefined, + how ) renamed_cols += duplicated_fields @@ -527,13 +532,13 @@ def is_value_in_index(self, df, value, index_name): return False @staticmethod - def join_and_filter_undefined_research_stage(df1, df2): + def join_and_filter_undefined_research_stage(df1, df2, how='outer', lsuffix='', rsuffix=''): df1_defined = df1[df1.index.get_level_values('research_stage') != 'undefined'] df2_defined = df2[df2.index.get_level_values('research_stage') != 'undefined'] - return df1_defined.join(df2_defined, how='outer') + return df1_defined.join(df2_defined, how=how, lsuffix=lsuffix, rsuffix=rsuffix) - def __concat__(self, df1, df2, keep_undefined_research_stage=False): + def __concat__(self, df1, df2, keep_undefined_research_stage=False, how='outer', lsuffix='', rsuffix=''): if df1.empty: return df2 @@ -544,18 +549,33 @@ def __concat__(self, df1, df2, keep_undefined_research_stage=False): self.is_value_in_index(df2, 'undefined', 'research_stage') and not keep_undefined_research_stage: warnings.warn('filtering "undefined" research_stage..') - df = self.join_and_filter_undefined_research_stage(df1, df2) + df = self.join_and_filter_undefined_research_stage(df1, df2, how, lsuffix, rsuffix) return df - return df1.join(df2, how='outer') - + return df1.join(df2, how=how, lsuffix=lsuffix, rsuffix=rsuffix) + + def merge_all_tables(self) -> pd.DataFrame: + # merge all tables in self.dfs dictionary + align_df = None + for name, df in self.dfs.items(): + if align_df is None: + align_df = df + else: + # Join the table with an 'undefined' research_stage to keep the maximum number of data points + align_df = self.__concat__(align_df, df, keep_undefined_research_stage=True, how='outer', lsuffix='', rsuffix= name) + return align_df + def __load_age_sex__(self) -> None: """ Add sex and compute age from birth date. """ age_path = os.path.join(self.__get_dataset_path__(self.age_sex_dataset), 'events.parquet') - align_df = self.dfs[list(self.dfs)[0]] - + + if len(self.dfs) > 1: + align_df = self.merge_all_tables() + else: + align_df = self.dfs[list(self.dfs)[0]] + if ('research_stage' in align_df.columns) or ('research_stage' in align_df.index.names): try: age_df = pd.read_parquet(age_path)