summaryrefslogtreecommitdiff
path: root/notebooks/disease_extraction_comparison.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'notebooks/disease_extraction_comparison.ipynb')
-rw-r--r--notebooks/disease_extraction_comparison.ipynb750
1 files changed, 750 insertions, 0 deletions
diff --git a/notebooks/disease_extraction_comparison.ipynb b/notebooks/disease_extraction_comparison.ipynb
new file mode 100644
index 0000000..dcf68d7
--- /dev/null
+++ b/notebooks/disease_extraction_comparison.ipynb
@@ -0,0 +1,750 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "860ebc4a-63e5-462d-b6ab-9bae23d10afb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "os.chdir('..')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "851d771c-15b4-4168-acf5-86bdd15d9610",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/root/anaconda3/envs/kg_rag_test_2/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+ " from .autonotebook import tqdm as notebook_tqdm\n"
+ ]
+ }
+ ],
+ "source": [
+ "from kg_rag.utility import *\n",
+ "from tqdm import tqdm\n",
+ "import pandas as pd\n",
+ "import spacy\n",
+ "import scispacy\n",
+ "from scispacy.linking import EntityLinker\n",
+ "from transformers import pipeline\n",
+ "from transformers import AutoModelForTokenClassification\n",
+ "from IPython.display import clear_output"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f242aeb6-99f7-496a-8fd8-1f0d964a2556",
+ "metadata": {},
+ "source": [
+ "## List the NER methods to compare"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "45fdafb8-65cc-44dd-b22d-f17e5e807b49",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "method_list = ['gpt', 'biomed-ner-all', 'scispacy']\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ddc073a0-2508-410e-8e39-4bd94020bf8a",
+ "metadata": {},
+ "source": [
+ "## Load spacy and bert based models"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "07a1ccc1-826a-4986-b7a7-f7bf26dd1d8c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nlp = spacy.load(\"en_core_sci_sm\") \n",
+ "nlp.add_pipe(\"scispacy_linker\", config={\"resolve_abbreviations\": True, \"linker_name\": \"umls\"})\n",
+ "\n",
+ "\n",
+ "biomed_ner_all_tokenizer = AutoTokenizer.from_pretrained(\"d4data/biomedical-ner-all\",\n",
+ " revision=\"main\",\n",
+ " cache_dir=config_data['LLM_CACHE_DIR'])\n",
+ "biomed_ner_all_model = AutoModelForTokenClassification.from_pretrained(\"d4data/biomedical-ner-all\", \n",
+ " torch_dtype=torch.float16,\n",
+ " revision=\"main\",\n",
+ " cache_dir=config_data['LLM_CACHE_DIR']\n",
+ " )\n",
+ "clear_output()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4cf3589a-6dec-41e5-9f43-703b0171e79c",
+ "metadata": {},
+ "source": [
+ "## Load evaluation dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "108796a6-5887-464b-8394-04e04b017d0b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = pd.read_csv('data/dataset_for_entity_retrieval_accuracy_analysis.csv')\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ed5fca24-2b5d-4696-bcd4-ba911dce6624",
+ "metadata": {},
+ "source": [
+ "## Custom functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "6e262f71-eac1-4894-8390-f1f4c2e8f84f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def entity_extraction(text, method):\n",
+ " if method == 'gpt':\n",
+ " start_time = time.time()\n",
+ " entity = disease_entity_extractor_compare_version(text)\n",
+ " run_time = time.time()-start_time\n",
+ " elif method == 'scispacy':\n",
+ " start_time = time.time()\n",
+ " entity = disease_entity_extractor_scispacy(text)\n",
+ " run_time = time.time()-start_time\n",
+ " elif method == 'biomed-ner-all':\n",
+ " start_time = time.time()\n",
+ " entity = disease_entity_extractor_biomed_ner(text)\n",
+ " run_time = time.time()-start_time\n",
+ " return entity, run_time\n",
+ "\n",
+ "def get_GPT_response_compare_version(instruction, system_prompt, chat_model_id, chat_deployment_id, temperature=0):\n",
+ " return fetch_GPT_response(instruction, system_prompt, chat_model_id, chat_deployment_id, temperature)\n",
+ " \n",
+ "def disease_entity_extractor_compare_version(text):\n",
+ " chat_model_id, chat_deployment_id = get_gpt35()\n",
+ " prompt_updated = system_prompts[\"DISEASE_ENTITY_EXTRACTION\"] + \"\\n\" + \"Sentence : \" + text\n",
+ " resp = get_GPT_response_compare_version(prompt_updated, system_prompts[\"DISEASE_ENTITY_EXTRACTION\"], chat_model_id, chat_deployment_id, temperature=0)\n",
+ " try:\n",
+ " entity_dict = json.loads(resp)\n",
+ " return entity_dict[\"Diseases\"]\n",
+ " except:\n",
+ " return None\n",
+ "\n",
+ "def disease_entity_extractor_scispacy(text):\n",
+ " doc = nlp(text)\n",
+ " disease_semantic_types = {\"T047\", \"T191\"} \n",
+ " entity = []\n",
+ " for ent in doc.ents:\n",
+ " if ent._.kb_ents:\n",
+ " umls_cui = ent._.kb_ents[0][0]\n",
+ " umls_entity = nlp.get_pipe(\"scispacy_linker\").kb.cui_to_entity[umls_cui]\n",
+ " if any(t in disease_semantic_types for t in umls_entity.types):\n",
+ " entity.append(ent.text)\n",
+ " return entity\n",
+ "\n",
+ "def disease_entity_extractor_biomed_ner(text):\n",
+ " pipe = pipeline(\"ner\", model=biomed_ner_all_model, tokenizer=biomed_ner_all_tokenizer, aggregation_strategy=\"simple\", device=0)\n",
+ " out = pipe(text)\n",
+ " return list(filter(None, map(lambda x:x['word'] if x['entity_group']=='Disease_disorder' or x['entity_group']=='Sign_symptom' else None, out)))\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "65c46409-f3dd-45e6-9ea8-da84cd8db212",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Processing method : gpt, 1/3\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "322it [03:07, 1.71it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Processing method : biomed-ner-all, 2/3\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "322it [00:05, 63.45it/s] \n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Processing method : scispacy, 3/3\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "322it [00:04, 72.89it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 11.3 s, sys: 1.71 s, total: 13 s\n",
+ "Wall time: 3min 17s\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "comparison_out = []\n",
+ "for method_index, method in enumerate(method_list):\n",
+ " print(f'Processing method : {method}, {method_index+1}/{len(method_list)}')\n",
+ " for row_index, row in tqdm(data.iterrows()):\n",
+ " entity, run_time = entity_extraction(row['text'], method)\n",
+ " comparison_out.append((row['text'], row['node_hits'], entity, run_time, method))\n",
+ "\n",
+ "comparison_out_df = pd.DataFrame(comparison_out, columns=['input_text', 'node_hits', 'entity_extracted', 'run_time_per_text', 'ner_method'])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "3fbfbfed-3dd6-4e86-8fac-e0ee40d2c363",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>input_text</th>\n",
+ " <th>node_hits</th>\n",
+ " <th>entity_extracted</th>\n",
+ " <th>run_time_per_text</th>\n",
+ " <th>ner_method</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>LIRAGLUTIDE TREATS OBESITY</td>\n",
+ " <td>obesity</td>\n",
+ " <td>[obesity]</td>\n",
+ " <td>2.214761</td>\n",
+ " <td>gpt</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>disease ontology identifier for central diabet...</td>\n",
+ " <td>central diabetes insipidus</td>\n",
+ " <td>[central diabetes insipidus]</td>\n",
+ " <td>0.549497</td>\n",
+ " <td>gpt</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>Xeroderma pigmentosum, group G is not associat...</td>\n",
+ " <td>xeroderma pigmentosum</td>\n",
+ " <td>[Xeroderma pigmentosum]</td>\n",
+ " <td>0.926769</td>\n",
+ " <td>gpt</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>3</th>\n",
+ " <td>cherubism is not a autosomal dominant disease</td>\n",
+ " <td>cherubism</td>\n",
+ " <td>[cherubism, autosomal dominant disease]</td>\n",
+ " <td>0.675068</td>\n",
+ " <td>gpt</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>4</th>\n",
+ " <td>MASA SYNDROME (DISORDER) IS NOT ASSOCIATED WIT...</td>\n",
+ " <td>MASA syndrome</td>\n",
+ " <td>[MASA SYNDROME]</td>\n",
+ " <td>0.465556</td>\n",
+ " <td>gpt</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>...</th>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " <td>...</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>961</th>\n",
+ " <td>antineoplastic agents treats osteosarcoma</td>\n",
+ " <td>osteosarcoma</td>\n",
+ " <td>[osteosarcoma]</td>\n",
+ " <td>0.012946</td>\n",
+ " <td>scispacy</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>962</th>\n",
+ " <td>timothy syndrome associates gene cacna1c</td>\n",
+ " <td>Timothy syndrome</td>\n",
+ " <td>[syndrome]</td>\n",
+ " <td>0.011308</td>\n",
+ " <td>scispacy</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>963</th>\n",
+ " <td>piebaldism is a autosomal dominant disease</td>\n",
+ " <td>piebaldism</td>\n",
+ " <td>[autosomal dominant disease]</td>\n",
+ " <td>0.012271</td>\n",
+ " <td>scispacy</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>964</th>\n",
+ " <td>Disease ontology identifier for Loeys-Dietz sy...</td>\n",
+ " <td>Loeys-Dietz syndrome</td>\n",
+ " <td>[Loeys-Dietz syndrome]</td>\n",
+ " <td>0.012468</td>\n",
+ " <td>scispacy</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>965</th>\n",
+ " <td>NOONAN SYNDROME ASSOCIATES GENE PTPN11</td>\n",
+ " <td>Noonan syndrome</td>\n",
+ " <td>[NOONAN SYNDROME]</td>\n",
+ " <td>0.010858</td>\n",
+ " <td>scispacy</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "<p>966 rows × 5 columns</p>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " input_text \\\n",
+ "0 LIRAGLUTIDE TREATS OBESITY \n",
+ "1 disease ontology identifier for central diabet... \n",
+ "2 Xeroderma pigmentosum, group G is not associat... \n",
+ "3 cherubism is not a autosomal dominant disease \n",
+ "4 MASA SYNDROME (DISORDER) IS NOT ASSOCIATED WIT... \n",
+ ".. ... \n",
+ "961 antineoplastic agents treats osteosarcoma \n",
+ "962 timothy syndrome associates gene cacna1c \n",
+ "963 piebaldism is a autosomal dominant disease \n",
+ "964 Disease ontology identifier for Loeys-Dietz sy... \n",
+ "965 NOONAN SYNDROME ASSOCIATES GENE PTPN11 \n",
+ "\n",
+ " node_hits entity_extracted \\\n",
+ "0 obesity [obesity] \n",
+ "1 central diabetes insipidus [central diabetes insipidus] \n",
+ "2 xeroderma pigmentosum [Xeroderma pigmentosum] \n",
+ "3 cherubism [cherubism, autosomal dominant disease] \n",
+ "4 MASA syndrome [MASA SYNDROME] \n",
+ ".. ... ... \n",
+ "961 osteosarcoma [osteosarcoma] \n",
+ "962 Timothy syndrome [syndrome] \n",
+ "963 piebaldism [autosomal dominant disease] \n",
+ "964 Loeys-Dietz syndrome [Loeys-Dietz syndrome] \n",
+ "965 Noonan syndrome [NOONAN SYNDROME] \n",
+ "\n",
+ " run_time_per_text ner_method \n",
+ "0 2.214761 gpt \n",
+ "1 0.549497 gpt \n",
+ "2 0.926769 gpt \n",
+ "3 0.675068 gpt \n",
+ "4 0.465556 gpt \n",
+ ".. ... ... \n",
+ "961 0.012946 scispacy \n",
+ "962 0.011308 scispacy \n",
+ "963 0.012271 scispacy \n",
+ "964 0.012468 scispacy \n",
+ "965 0.010858 scispacy \n",
+ "\n",
+ "[966 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "comparison_out_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "ebb106e7-9f63-475c-acec-61dffbda4f98",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "comparison_out_df_gpt = comparison_out_df[comparison_out_df.ner_method=='gpt']\n",
+ "comparison_out_df_biomed_ner_all = comparison_out_df[comparison_out_df.ner_method=='biomed-ner-all']\n",
+ "comparison_out_df_scispacy = comparison_out_df[comparison_out_df.ner_method=='scispacy']\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "b6524af1-912a-44e0-8687-3e9ff65d14e3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "def calculate_precision_recall(row):\n",
+ " # Convert to lowercase and split node_hits into a list\n",
+ " true_entities = set([row['node_hits'].lower()])\n",
+ " \n",
+ " # Convert extracted_entity list to lowercase\n",
+ " predicted_entities = set([entity.lower() for entity in row['entity_extracted']])\n",
+ " \n",
+ " # Calculate true positives, false positives, and false negatives\n",
+ " true_positives = len(true_entities.intersection(predicted_entities))\n",
+ " false_positives = len(predicted_entities - true_entities)\n",
+ " false_negatives = len(true_entities - predicted_entities)\n",
+ " \n",
+ " # Calculate precision and recall\n",
+ " precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0\n",
+ " recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0\n",
+ " \n",
+ " return pd.Series({'precision': precision, 'recall': recall})\n",
+ "\n",
+ "comparison_out_df_gpt[['precision', 'recall']] = comparison_out_df_gpt.apply(calculate_precision_recall, axis=1)\n",
+ "comparison_out_df_biomed_ner_all[['precision', 'recall']] = comparison_out_df_biomed_ner_all.apply(calculate_precision_recall, axis=1)\n",
+ "comparison_out_df_scispacy[['precision', 'recall']] = comparison_out_df_scispacy.apply(calculate_precision_recall, axis=1)\n",
+ "\n",
+ "clear_output()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "09112cfd-43a3-4bdd-8128-e872f5ede03a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.9549689440993789\n",
+ "0.9968944099378882\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(comparison_out_df_gpt.precision.mean())\n",
+ "print(comparison_out_df_gpt.recall.mean())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "3814b05f-8708-428c-8c37-27160feb3ed7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.23680124223602483\n",
+ "0.2795031055900621\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(comparison_out_df_biomed_ner_all.precision.mean())\n",
+ "print(comparison_out_df_biomed_ner_all.recall.mean())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "9496c72c-2976-4bdd-bde5-6b8612461853",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.5910973084886129\n",
+ "0.6428571428571429\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(comparison_out_df_scispacy.precision.mean())\n",
+ "print(comparison_out_df_scispacy.recall.mean())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "075fa7ce-e463-459c-88e6-00d3db62682f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAekAAAEiCAYAAADd4SrgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA6w0lEQVR4nO3deVhU9f4H8PewoyBiCqgXI0PNpQBRcUGvJq5dvaalqbnmkuUWaoqZ5M8U65ZiuWDX7ep1KzU1UUwxS9TUVHDLLRfQAMEFBGUQ+Pz+4HJ0YlBAZuYw8349D8/DfOec4TMDbz5nzpzzPRoRERAREZHqWJm6ACIiItKPTZqIiEil2KSJiIhUik2aiIhIpdikiYiIVIpNmoiISKXYpImIiFSKTZqIiEil2KSJiIhUik2aiIhIpUzapH/55Rd069YNNWrUgEajwZYtW566zr59+9C4cWPY29vD29sbK1euNHidRPRkzDKRYZi0SWdmZsLHxwcLFy4s1vJXrlzBa6+9hnbt2iE2Nhbjx4/HsGHDsGvXLgNXSkRPwiwTGYZGLRfY0Gg0+P7779GjR48il5k8eTIiIyNx+vRpZeytt97C3bt3ERUVZYQqiehpmGWislOuPpM+dOgQgoKCdMY6deqEQ4cOmagiIioNZpmoeGxMXUBJJCUlwd3dXWfM3d0d6enpePDgARwdHQuto9VqodVqldsiguzsbFStWhUajcbgNRNRYcwyUfGUqyZdGmFhYZgxY0ah8bS0NFSqVMkEFRFRaZQmy15TIguN3d67FPeObinxz3du2gNVXh2mM3Z1zmslfhyikihXu7s9PDyQnJysM5acnIxKlSrp3fIGgJCQEKSlpSlfCQkJxiiViJ6AWSYqnnL1TrpFixbYsWOHztju3bvRokWLItext7eHvb29oUsjohIwZZYrNeuJig3alng9a6cqz/yziUrKpE06IyMDly5dUm5fuXIFsbGxqFKlCmrVqoWQkBDcuHEDq1atAgC8++67WLBgAT788EMMHToUe/fuxbfffovIyMK7tIjIeMpTlm2cqsCGDZfKCZPu7v7tt9/g5+cHPz8/AEBwcDD8/Pwwffp0AEBiYiLi4+OV5V944QVERkZi9+7d8PHxwZdffomlS5eiU6dOJqmfiPIxy0SGoZrzpI0lPT0dLi4uJT5wLDExEYmJiSX+edWrV0f16tVLvB4RPVlxsqzvwLGyxAPHyNDK1WfSprRkyRK9R5Y+TWhoKD755JOyL4iIiMwem3QxjRw5Et27d9cZe/DgAQIDAwEAMTExeo9K5btoIiIqLTZpPfTtIsvJuI3cjNs6Y/IwW/m+16KD0NjaFVrPWs9BKtxFRkRExcEmXUwZsTuRdmBdkfcnr/1Q77hLq76oHNjfUGUREZEZY5MuJiffLnD0Dijxejy3koiISotNuph4biURERlbuZoWlIiIyJKwSRMREakUmzQREZFKsUkTERGpFJs0ERGRSrFJExERqRSbNBERkUqxSRMREakUmzQREZFKsUkTERGpFJs0ERGRSrFJExERqRSbNBERkUqxSRMREakUmzQREZFKsUkTERGpFJs0ERGRSrFJExERqRSbNBERkUqxSRMREamUyZv0woUL4eXlBQcHBwQEBODIkSNPXD48PBz16tWDo6MjPD098cEHHyArK8tI1RJRUZhlorJn0ia9YcMGBAcHIzQ0FMePH4ePjw86deqEmzdv6l1+7dq1mDJlCkJDQ/H7779j2bJl2LBhA6ZOnWrkyonoccwykWGYtEnPnTsXw4cPx5AhQ9CgQQNERESgQoUKWL58ud7lDx48iFatWqFfv37w8vJCx44d0bdv36dusRORYTHLRIZhsiadnZ2NY8eOISgo6FExVlYICgrCoUOH9K7TsmVLHDt2TAny5cuXsWPHDnTt2tUoNRNRYcwykeHYmOoHp6amIjc3F+7u7jrj7u7uOHfunN51+vXrh9TUVAQGBkJEkJOTg3ffffeJu8i0Wi20Wq1yOz09vWyeABEBYJaJDMnkB46VxL59+zB79mwsWrQIx48fx+bNmxEZGYmZM2cWuU5YWBhcXFyUL09PTyNWTET6MMtExWOyd9JVq1aFtbU1kpOTdcaTk5Ph4eGhd52PP/4YAwYMwLBhwwAAL7/8MjIzMzFixAh89NFHsLIqvM0REhKC4OBg5XZ6ejrDTVSGmGUiwzHZO2k7Ozv4+/sjOjpaGcvLy0N0dDRatGihd5379+8XCq+1tTUAQET0rmNvb49KlSrpfBFR2WGWiQzHZO+kASA4OBiDBg1CkyZN0KxZM4SHhyMzMxNDhgwBAAwcOBA1a9ZEWFgYAKBbt26YO3cu/Pz8EBAQgEuXLuHjjz9Gt27dlIATkfExy0SGYdIm3adPH6SkpGD69OlISkqCr68voqKilANQ4uPjdba2p02bBo1Gg2nTpuHGjRuoVq0aunXrhlmzZpnqKRARmGUiQ9FIUfuWzFR6ejpcXFyQlpZW5O4yrymRBq3h6pzXDPr4RJaAWSZLUK6O7iYiIrIkbNJEREQqxSZNRESkUmzSREREKsUmTUREpFJs0kRERCrFJk1ERKRSbNJEREQqxSZNRESkUmzSREREKsUmTUREpFJs0kRERCrFJk1ERKRSbNJEREQqxSZNRESkUmzSREREKsUmTUREpFI2pi6AiIhILRITE5GYmFji9apXr47q1auXeT1s0kRERP+zZMkSzJgxo8TrhYaG4pNPPinzetikiYiI/mfkyJHo3r27ztiDBw8QGBgIAIiJiYGjo2Oh9QzxLhpgkyYiIlLo222dmZmpfO/r64uKFSsarR4eOEZERKRSpXonnZubi5UrVyI6Oho3b95EXl6ezv179+4tk+KIiIgsWama9Lhx47By5Uq89tpraNSoETQaTVnXRUREZFBeUyKLtVxedpbyff2Po2Bl51Cs9a7Oea1UdT2uVE16/fr1+Pbbb9G1a9dnLoCIiIj0K9Vn0nZ2dvD29i7rWoiIiOgxpWrSEyZMwPz58yEiz1zAwoUL4eXlBQcHBwQEBODIkSNPXP7u3bt4//33Ub16ddjb26Nu3brYsWPHM9dB9KwSExNx/PjxEn+VZuIENWKWicpeqXZ3x8TE4KeffsLOnTvRsGFD2Nra6ty/efPmYj3Ohg0bEBwcjIiICAQEBCA8PBydOnXC+fPn4ebmVmj57OxsdOjQAW5ubti4cSNq1qyJa9euoXLlyqV5GkRlSm2TIBgTs0zmIifjNnIzbuuMycNs5fvs5MvQ2NoVWs/aqQpsnKqUeT2latKVK1fG66+//sw/fO7cuRg+fDiGDBkCAIiIiEBkZCSWL1+OKVOmFFp++fLluH37Ng4ePKhsGHh5eT1zHURlQW2TIBgTs0zmIiN2J9IOrCvy/uS1H+odd2nVF5UD+5d5PaVq0itWrHjmH5ydnY1jx44hJCREGbOyskJQUBAOHTqkd51t27ahRYsWeP/997F161ZUq1YN/fr1w+TJk2Ftbf3MNRE9C7VNgmAszDKZEyffLnD0DijxetYGeBcNPOOMYykpKTh//jwAoF69eqhWrVqx101NTUVubi7c3d11xt3d3XHu3Dm961y+fBl79+5F//79sWPHDly6dAnvvfceHj58iNDQUL3raLVaaLVa5XZ6enqxaySip2OWyZzYGGi3dWmV6sCxzMxMDB06FNWrV0ebNm3Qpk0b1KhRA++88w7u379f1jUq8vLy4Obmhm+++Qb+/v7o06cPPvroI0RERBS5TlhYGFxcXJQvT09Pg9VHRMXDLBMVT6neSQcHB+Pnn3/GDz/8gFatWgHI/7xt7NixmDBhAhYvXvzUx6hatSqsra2RnJysM56cnAwPDw+961SvXh22trY6u8Pq16+PpKQkZGdnw86u8If5ISEhCA4OVm6np6cz3PTMDDkJQllMgPAkJ0+eLPayr7zyylOXYZaJDKdUTXrTpk3YuHEj2rZtq4x17doVjo6O6N27d7GatJ2dHfz9/REdHY0ePXoAyN+6jo6OxujRo/Wu06pVK6xduxZ5eXmwssrfCXDhwgVUr15db6gBwN7eHvb29iV7gkRmzNfXFxqNpshTKAvu02g0yM3NferjMctEhlOqJn3//v1Cnz8BgJubW4l2dwcHB2PQoEFo0qQJmjVrhvDwcGRmZipHiA4cOBA1a9ZEWFgYAGDUqFFYsGABxo0bhzFjxuDixYuYPXs2xo4dW5qnQWSRrly5UuaPySwTGUapmnSLFi0QGhqKVatWwcEhf/fdgwcPMGPGDLRo0aLYj9OnTx+kpKRg+vTpSEpKgq+vL6KiopQNgPj4eGUrGwA8PT2xa9cufPDBB3jllVdQs2ZNjBs3DpMnTy7N0yCySM8//3yZPyazTGQYpWrS8+fPR6dOnfC3v/0NPj4+AIC4uDg4ODhg165dJXqs0aNHF7lLbN++fYXGWrRogV9//bXENRMZmtomQSjKtm3bir3sX8/7fhJmmajslapJN2rUCBcvXsSaNWuUUyz69u2L/v37652sgcgSqG0ShKIUfG78NMX9TJqIDKfU50lXqFABw4cPL8taiMo1tU2CUJS/Xv+diNSr2E1627Zt6NKlC2xtbZ+6u6wku8iIzIXaJkEgovKv2E26R48eSEpKgpub2xN3l3EXGVH5kpmZiZ9//hnx8fHIzs7WuY9HWxOZVrGb9OO7yLi7jMg8nDhxAl27dsX9+/eRmZmJKlWqIDU1FRUqVICbmxubNJGJlWpaUH3u3r1bVg9FREbywQcfoFu3brhz5w4cHR3x66+/4tq1a/D398cXX3xh6vKILF6pmvRnn32GDRs2KLfffPNNVKlSBTVr1kRcXFyZFUdEhhUbG4sJEybAysoK1tbW0Gq18PT0xOeff46pU6eaujwii1eqJh0REaHMmbt7927s2bMHUVFR6NKlCyZNmlSmBRKR4dja2iqTjLi5uSE+Ph4A4OLigoSEBFOWRkQo5SlYSUlJSpPevn07evfujY4dO8LLywsBASU/BYWITMPPzw9Hjx5FnTp18Pe//x3Tp09HamoqVq9ejUaNGpm6PCKLV6p30q6urspWdlRUFIKCggAAIsIju4nKkdmzZ6N69eoAgFmzZsHV1RWjRo1CSkoKlixZYuLqiKhU76R79uyJfv36oU6dOrh16xa6dOkCIP9IUW9v7zItkIgMp0mTJsr3bm5uiIqKMmE1RPRXpWrS8+bNg5eXFxISEvD555/DyckJAJCYmIj33nuvTAskIsO5cuUKcnJyUKdOHZ3xixcvwtbWFl5eXqYpjIgAlLJJ29raYuLEiYXGP/jgg2cuiIiMZ/DgwRg6dGihJn348GEsXbpU74UxiMh4OC0okQU7ceIEWrVqVWi8efPmRV7RioiMh9OCElkwjUaDe/fuFRpPS0tjjolUoNhHd+fl5cHNzU35vqgvBpuo/GjTpg3CwsJ0cpubm4uwsDAEBgaasDIiAp7hUpVEVP599tlnaNOmDerVq4fWrVsDAPbv34/09HTs3bvXxNURUanOkx47diy++uqrQuMLFizA+PHjn7UmIjKSBg0a4OTJk+jduzdu3ryJe/fuYeDAgTh37hwnMyFSgVK9k960aZPeg8datmyJOXPmIDw8/FnrIiIjqVGjBmbPnm3qMohIj1K9k7516xZcXFwKjVeqVAmpqanPXBQRGc/+/fvx9ttvo2XLlrhx4wYAYPXq1YiJiTFxZURUqibt7e2td2ainTt3onbt2s9cFBEZx6ZNm9CpUyc4Ojri+PHj0Gq1APKP7ua7ayLTK9Xu7uDgYIwePRopKSl49dVXAQDR0dH48ssvuaubqBz59NNPERERgYEDB2L9+vXKeKtWrfDpp5+asDIiAkrZpIcOHQqtVotZs2Zh5syZAAAvLy8sXrwYAwcOLNMCichwzp8/jzZt2hQad3Fxwd27d41fEBHpKPUpWKNGjVKuluPo6KjM301E5YeHhwcuXbpUaI7umJgYfnRFpAKl+kwaAHJycrBnzx5s3rwZIgIA+PPPP5GRkVFmxRGRYQ0fPhzjxo3D4cOHodFo8Oeff2LNmjWYMGECRo0aZeryiCxeqd5JX7t2DZ07d0Z8fDy0Wi06dOgAZ2dnfPbZZ9BqtYiIiCjrOonIAKZMmYK8vDy0b98e9+/fR5s2bWBvb49JkyZh2LBhpi6PyOKV6p30uHHj0KRJE9y5cweOjo7K+Ouvv47o6OgSP97ChQvh5eUFBwcHBAQE4MiRI8Vab/369dBoNE+cS5yIiqbRaPDRRx/h9u3bOH36NH799VekpKTAxcUFL7zwQokeizkmKnulatL79+/HtGnTYGdnpzPu5eWlnGdZXBs2bEBwcDBCQ0Nx/Phx+Pj4oFOnTrh58+YT17t69SomTpyoTGVIRMWn1WoREhKCJk2aoFWrVtixYwcaNGiAM2fOoF69epg/f36JLj3LHBMZRqmadFEX0rh+/TqcnZ1L9Fhz587F8OHDMWTIEDRo0AARERGoUKECli9fXuQ6ubm56N+/P2bMmMGDW4hKYfr06Vi8eDG8vLxw5coVvPnmmxgxYgTmzZuHL7/8EleuXMHkyZOL/XjMMZFhlKpJd+zYUed8aI1Gg4yMDISGhqJr167Ffpzs7GwcO3YMQUFBjwqyskJQUBAOHTpU5Hr/93//Bzc3N7zzzjulKZ/I4n333XdYtWoVNm7ciB9//BG5ubnIyclBXFwc3nrrLVhbWxf7sZhjIsMp1YFjX3zxBTp37owGDRogKysL/fr1w8WLF1G1alWsW7eu2I+TmpqK3NxcuLu764y7u7vj3LlzeteJiYnBsmXLEBsbW6yfodVqlVmUACA9Pb3Y9RGZq+vXr8Pf3x8A0KhRI9jb2+ODDz6ARqMp8WMZI8cAs0yWqVRN2tPTE3FxcdiwYQPi4uKQkZGBd955B/3799c5kKys3bt3DwMGDMC///1vVK1atVjrhIWFYcaMGQariag8ys3N1TmmxMbGxmhzHZQmxwCzTJapxE364cOHeOmll7B9+3b0798f/fv3L/UPr1q1KqytrZGcnKwznpycDA8Pj0LL//HHH7h69Sq6deumjOXl5QHI/ydz/vx5vPjiizrrhISEIDg4WLmdnp4OT0/PUtdMZA5EBIMHD4a9vT0AICsrC++++y4qVqyos9zmzZuf+ljGyDHALJNlKnGTtrW1RVZWVpn8cDs7O/j7+yM6Olo5/SIvLw/R0dEYPXp0oeVfeuklnDp1Smds2rRpuHfvHubPn683sPb29so/IiLKN2jQIJ3bb7/9dqkfyxg5Bphlskyl2t39/vvv47PPPsPSpUthY1PqmUUB5F+sY9CgQWjSpAmaNWuG8PBwZGZmYsiQIQCAgQMHombNmggLC4ODg0OhC9FXrlwZAHiBeqISWLFiRZk+HnNMZBil6rBHjx5FdHQ0fvzxR7z88sul2kVWoE+fPkhJScH06dORlJQEX19fREVFKQehxMfHw8qq1LOXEpERMMdEhlGqJl25cmX06tWrzIoYPXq03t1iALBv374nrrty5coyq4OISo85Jip7JWrSeXl5+Ne//oULFy4gOzsbr776Kj755BODHtFNRERkqUq0/2nWrFmYOnUqnJycULNmTXz11Vd4//33DVUbERGRRStRk161ahUWLVqEXbt2YcuWLfjhhx+wZs0a5fQJIiIiKjslatLx8fE6034GBQUp16AlIiKislWiJp2TkwMHBwedMVtbWzx8+LBMiyIiIqISHjj211mKAP0zFZXkFCwiIiLSr0RN+q+zFAHPNlMRERERFa1ETbqsZykiIiKionEKICIiIpVikyYiIlKpZ7s6BhERlWuJiYlITEws8XrVq1dH9erVDVARPY5NmojIgi1ZsgQzZswo8XqhoaH45JNPyr4g0sEmTURkwUaOHInu3bvrjD148ACBgYEAgJiYGL3XZ+C7aONgkyYismD6dltnZmYq3/v6+ha6HDEZDw8cIyIiUik2aSIiIpXi7m4iIgvhNSWyWMvlZWcp39f/OApWdg5PWPqRq3NeK1VdVDS+kyYiIlIpNmkiIiKV4u5uKhVOgEBEZHhs0lQqnACByDzkZNxGbsZtnTF5mK18n518GRpbu0LrWTtVgY1TFYPXZ+nYpKlUOAECkXnIiN2JtAPrirw/ee2HesddWvVF5cD+hiqL/odNmkqFEyAQmQcn3y5w9A4o8XrWfBdtFGzSREQWzIa7rVWNTZqeiudWEhGZBk/BIiIiUilVNOmFCxfCy8sLDg4OCAgIwJEjR4pc9t///jdat24NV1dXuLq6Iigo6InLE5FxMMdEZc/kTXrDhg0IDg5GaGgojh8/Dh8fH3Tq1Ak3b97Uu/y+ffvQt29f/PTTTzh06BA8PT3RsWNH3Lhxw8iVW7acjNvQJl3S+cpOvqzcn518udD92qRLyPnLqR5kHphjIsMw+WfSc+fOxfDhwzFkyBAAQEREBCIjI7F8+XJMmTKl0PJr1qzRub106VJs2rQJ0dHRGDhwoFFqJp62QbqYYyLDMGmTzs7OxrFjxxASEqKMWVlZISgoCIcOHSrWY9y/fx8PHz5ElSo8OtGYeNoGFWCOiQzHpE06NTUVubm5cHd31xl3d3fHuXPnivUYkydPRo0aNRAUFKT3fq1WC61Wq9xOT08vfcGk4GkbVMAYOQaYZbJMJv9M+lnMmTMH69evx/fffw8HB/2n+4SFhcHFxUX58vT0NHKVRPQkxckxwCyTZTJpk65atSqsra2RnJysM56cnAwPD48nrvvFF19gzpw5+PHHH/HKK68UuVxISAjS0tKUr4SEhDKpnYjyGSPHALNMlsmkTdrOzg7+/v6Ijo5WxvLy8hAdHY0WLVoUud7nn3+OmTNnIioqCk2aNHniz7C3t0elSpV0voio7BgjxwCzTJbJ5Ed3BwcHY9CgQWjSpAmaNWuG8PBwZGZmKkeJDhw4EDVr1kRYWBgA4LPPPsP06dOxdu1aeHl5ISkpCQDg5OQEJycnkz0PIkvGHBMZhsmbdJ8+fZCSkoLp06cjKSkJvr6+iIqKUg5CiY+Ph5XVozf8ixcvRnZ2Nt544w2dx+ElEIlMhzkmMgyTN2kAGD16NEaPHq33vn379uncvnr1quELIqISY46Jyl65PrqbiIjInLFJExERqRSbNBERkUqxSRMREakUmzQREZFKsUkTERGpFJs0ERGRSrFJExERqRSbNBERkUqxSRMREakUmzQREZFKsUkTERGpFJs0ERGRSrFJExERqRSbNBERkUqxSRMREakUmzQREZFKsUkTERGpFJs0ERGRSrFJExERqRSbNBERkUqxSRMREakUmzQREZFKsUkTERGpFJs0ERGRSrFJExERqZQqmvTChQvh5eUFBwcHBAQE4MiRI09c/rvvvsNLL70EBwcHvPzyy9ixY4eRKiWiojDHRGXP5E16w4YNCA4ORmhoKI4fPw4fHx906tQJN2/e1Lv8wYMH0bdvX7zzzjs4ceIEevTogR49euD06dNGrpyICjDHRIZh8iY9d+5cDB8+HEOGDEGDBg0QERGBChUqYPny5XqXnz9/Pjp37oxJkyahfv36mDlzJho3bowFCxYYuXIiKsAcExmGSZt0dnY2jh07hqCgIGXMysoKQUFBOHTokN51Dh06pLM8AHTq1KnI5YnIsJhjIsOxMeUPT01NRW5uLtzd3XXG3d3dce7cOb3rJCUl6V0+KSlJ7/JarRZarVa5nZaWBgBIT08vsq487f1i1V9aT/rZasTXozBDvibFfT2cnZ2h0WgMVkdxGSPHALNcFvh66FLD6/G0HJu0SRtDWFgYZsyYUWjc09PTBNXkcwk32Y9WJb4euor7eqSlpaFSpUoGrUVNmGX14+uhqzivx9NybNImXbVqVVhbWyM5OVlnPDk5GR4eHnrX8fDwKNHyISEhCA4OVm7n5eXh9u3beO6558rkXUh6ejo8PT2RkJBgUf8wi8LXozBDvSbOzs5l9ljPwhg5BphlY+ProctUOTZpk7azs4O/vz+io6PRo0cPAPnBi46OxujRo/Wu06JFC0RHR2P8+PHK2O7du9GiRQu9y9vb28Pe3l5nrHLlymVRvo5KlSrxD/kxfD0KM9fXxBg5BphlU+Hrocvor4eY2Pr168Xe3l5WrlwpZ8+elREjRkjlypUlKSlJREQGDBggU6ZMUZY/cOCA2NjYyBdffCG///67hIaGiq2trZw6dcok9aelpQkASUtLM8nPVxu+HoVZwmtS3nMsYhm/p5Lg66HLVK+HyT+T7tOnD1JSUjB9+nQkJSXB19cXUVFRykEl8fHxsLJ6dBB6y5YtsXbtWkybNg1Tp05FnTp1sGXLFjRq1MhUT4HI4jHHRAZi1E0CM5SVlSWhoaGSlZVl6lJUga9HYXxNygf+nnTx9dBlqtdDIyJi6g0FIiIiKszkM44RERGRfmzSREREKsUmTUREpFJs0kRERCrFJk1PpO+4Qh5rmI+vA5UXzHHR1P468OhuPVJTU+Ho6IiKFSuauhSTysvLg5WVFe7cuYNbt27Bzs4Of/vb32BlZaXcR6RmzDJzXN7xt/MXx48fR61atRAbG2vqUkyqILynT5/GP/7xD3To0AEdO3bE9OnT8fDhQwb7f+bPn4+JEyeaugzSg1lmjotLzTnmb+gxcXFxaNu2LUaOHIlWrVoVut9SdjoUBDsuLg4tW7ZE06ZNERERgYYNG2LBggXYunWrqUtUhezsbPz55584c+YMHjx4YDF/H+UBs8wcF5fqc2zUqVNU7OTJk+Ls7CwffvihiIjk5ubK77//Lvv375eLFy8qy+Xl5ZmqRKM6d+6cVKxYUaZOnaqMnT17VjQajXz88cc6y+bk5Bi7PNU4ePCgODg4yNatW01dCv0Ps/wIc1w8as4x30kDePjwIcaMGYOsrCx89tlnEBF0794dAwYMQJs2bfDGG28ol8gri0viqVlubi5yc3Pxr3/9C/b29mjYsKFy36ZNmwAASUlJWLRoEfbu3QutVgtra2tTlWs0eXl5esdbtGiBYcOGYfHixbh9+7aRq6K/YpbzMcf6lcccm/wCG2pga2uLBQsWoFu3bujQoQNyc3Ph4OCAL7/8Evb29oiJicG8efNQuXJlTJ8+3dTlGkTBrrGHDx/CwcEBM2bMwL179xAREQFHR0ecP38eX375JcaPH4/GjRtj0aJFEBEkJCTg73//OwYPHowOHTqY+mkYTMFndzNnzoSNjQ1at26NwMBAAECHDh0QGRmJxMREVKlShQfjmJClZ5k5frJymWPTvpFXl7Nnz8rzzz8vPj4+cuPGDWU8PT1dRo0aJUFBQXLv3j0TVmgYubm5IiJy4sQJadq0qfLcExMTpWfPnlKnTh2pUKGC7Nq1S1lHq9VKWlqazJw5U3r16iW///67SWo3tAMHDsjChQvl66+/ltTUVJkxY4Y0btxY6tatKwMGDJAjR46IiEjPnj3lH//4h4mrpQKWmGXmuGjlOcds0n9x6dIl2bx5s2RnZ4vIo8+tpk+fLn5+fmZ3RZiCYMfGxoqjo6Nyzd+C8cTEROndu7f4+PjIqlWrlPUe//zK3F6TAitWrBBvb28ZPHiwLFu2THlNEhISZPfu3dKsWTNp1qyZtG7dWiZNmiS+vr5y8uRJEbGMzzvVzpKyzBwXrbznmE1aD32/mGHDhsmQIUOUwJuDx7e8HR0ddQ4uERG5c+eOiDzaEm/durUsX75cuf/hw4dGq9XY1q5dKxUqVJDvvvtOtFqt3mWys7MlJiZGRowYIW5ubqLRaOSTTz4xcqX0JJaQZea4aOaQYzbpp0hOTpapU6dK1apV5cyZM6Yup8ydPXtW7OzsJCwsTGc8PDxcgoODJTMzU0QeBbxdu3ayaNEiU5RqNDdu3JCWLVvK7NmzdcYf/4df8I+xQFxcnISGhkqdOnXk7NmzRqmTSsacs8wcF2YuObaoJp2cnCy//PKLHDx4UK5fv/7U5ffu3StDhw6VGjVqyIkTJwxfoJFlZGRIz549pWLFijqvR1hYmFSsWFF++uknEXn0h5yUlCTt27eXrl27yt27d01RslEcP35c3N3d5ZdfftF7f0HI79+/rzN+6tQpqVOnjkRFRRm8RkvHLD/CHOtnLjm2mCZ98uRJqV+/vjRq1Eg8PDykf//+OgeUiBTeqrp48aKsWrVK/vjjD2OWalB/fY4bNmyQLl26SGBgoGRkZMiCBQukSpUqOgeXPO7GjRuSkJBgjFJNZtu2bVKlShW5cuWKiOg/fzQpKUnCw8Pl3r17Oq+pr6+vzJkzx1ilWiRmmTkuDnPJsUU06TNnzkjVqlVl8uTJkpCQICtWrJAaNWooBwf81eLFi5UjP/8aBnNw7do12bJli3J7y5Yt0qFDB6lVq5Y4OTkpRzo+vlto1qxZcvjwYaPXagqnT58WjUYj8+bNU8b++tnm119/LSNGjND5+9i4caNUqVLFbI+QVQNm+RHm+MnMJcdm36Tv3Lkj7du3l9GjR+uMd+zYUdavXy+7d++W06dPi0j+L/DIkSPi6ekpPXr0kLy8PFUc3VeWsrKy5N1335UXX3xRvvvuO2V8y5YtEhQUJA0bNpT4+HgRebTl+cknn4hGozG73YT65OXlSUZGhrz55pvi7u4u33//vTJeICsrS3r16iUfffSRzrqnTp0ym3dqasQsP8IcP5k55djsm/TNmzdl+fLlEhsbq4x9+umnotFoxM/PT/z8/MTNzU12794tIvm/uHXr1qnql1TWDh06JIMGDZImTZrIhg0blPGCLfHAwEC5dOmSiIhMmzZNHBwc5NixY6Yq16CKencVGRkpPj4+4uXlJf/9739FJP+zv9jYWOnUqZM0btxYOSrWnP75qxmzrIs5fsScc2z2TVpE5Pbt28r369evFxsbG9m8ebOkp6fLH3/8IW+88Yb07NlT0tPTTVilYRT1x3v48GF5++23iwx4+/btZeTIkeLo6Ci//fabsco1qsdfm61bt8qKFStkwYIFytgPP/wgbdu2FY1GI6+88op4eXlJQECAtGnTRjl9x5LnOzYFS80yc1w0c8+xWV9PWkQKzc+bm5uLkydPws/PTxl75513EB8fj927dxu7RIMqmNbu6tWruHHjBqpXr47atWsr9x85cgTz58/H+fPnMWnSJPTp0wcAsG3bNnz66ac4e/YsfvnlFzRu3NhUT8FgHv/bmDJlCtatWwcPDw/cuXMHlStXxrfffgsvLy8kJCQgNjYWv/zyC5ycnODv748uXbrA2toaOTk5sLHhzLrGYMlZZo6LZhE5NuUWgiGcP39e1q5dqzNWsBvjr1ujBZ9TDRs2TCZMmCA5OTmq3eVRWjdu3BCNRiMajUaqVq0qQ4cOldmzZ0tSUpLk5eVJfHy8vP3229KiRQud123Hjh1y7do1E1ZuHHPnzpXq1asr7zJWr16t7D69cOFCkeupecvbXDDLjzDHT2bOOVbB7OFlJzY2Fq+88orOVUxyc3Oh0WiQnp6OnJwcneVzcnIQGhqKyMhIDBs2DNbW1mZ3ZRxXV1e0bdsWbm5u6NGjB+7evYtNmzbB19cXzZo1Q1RUFLy9vdGgQQOEh4dj8+bNAIAuXbqgVq1aJq7esG7evImLFy9i3rx58Pf3x9atW/H+++/j888/h4igX79+uHDhAoDC1x+2hCsGmRKzrIs5LprZ59i02whlJy4uTipUqKBcQ1bk0XR3V69elebNm8vOnTuV+3bt2iXvvfeeuLm5yfHjx41erzEUbCVmZmZKu3btpGPHjvLdd99Jbm6u7Ny5U0JDQ8XHx0fq1q0rNjY2otFopGnTppKRkWFW70Ke5IcffpDExEQ5fvy41K5dW/ksa+nSpaLRaKRWrVrKUbJkHMyyLub46cw5x2bRpM+ePStVq1aVAQMGiIjurrA//vhDPD09ZcSIETp/sNu2bZNJkyap5lw4QykIeEZGhrRr104aN24s27dvV/7pJSUlSXx8vMyZM0dGjBihnMJibp52jmxERIQEBQVJSkqKiOQflDRq1Ch59913y8UuMXPBLOvHHOezxByX+wPH4uLi0LJlS7i6ukKr1WLnzp1o0qQJcnNzYW1tjaFDhyIrKwtr1qwptPtLq9XC3t7eRJUbhhRxgI21tTUyMzPxz3/+E2lpaQgJCUG3bt1ga2tbaDlz8/hrsmrVKpw6dQo1atRAo0aNlGvnTpw4Ed9++y3OnTuH7OxsDBo0CE2aNMHHH38MwHxfGzVhlh9hjguz2BybcgvhWR0/flwqVKggU6dOlYyMDOndu7dUqlRJjh49qrOcJezymT17ts6Vbf7q8S3xV199VZo2bSpbt2416yvgiOj+7qdMmSLOzs4SFBQk/v7+UqVKFZk1a5aIiPz555/i6ekpVatWFW9vb2nUqJHZXCWpPGCW8zHH+llyjsttk87JyZHAwECZNGmSMlZwzdTHw23uoRbJv9Ta2LFjRaPRyLp164pc7vGAd+zYUerUqSPbt283VplG9/jv/vDhw9K1a1c5cOCAiOSHOTw8XGxsbOSLL74QkfyLNnz55ZfyzTffKP/0zP2fnxowy/mYY/0sPcflsklnZGRITk6OzqT6Bb/I5ORk6dOnjzg7O+udu9Zcpaeny9SpU8XKykrWrFlT5HIFAb93757885//lMuXLxurRJP5z3/+I127dpXAwEBJS0tTxjMyMuTTTz+V+vXry7lz5wqtV14/wypPmGVdzHHRLDXH5a5JnzlzRjp27Cje3t7i7+8vS5YsKXSpscfDbe5b4Y8fSHHlyhUZN26cWFlZ6Uy8/1dqnwbvWZ08eVIOHjyo3F68eLHUrl1bnJyc5Ndff9VZdv/+/VK5cuVC42R4zPIjzHFhzHG+ctWkY2NjpVKlSjJo0CCZO3euNG3aVNzc3JTp8B7/Q09OTpb+/fuLRqMxy9My/ur777+XJk2aSK9evcTa2lqsrKyeuMvMXK1Zs0b8/Pxk7NixOkf7bty4UerVqye9e/fWmb84ISFBateuLT/++KMpyrVYzLJ+zHE+5viRctOkz5w5I87OzjJlyhSd8Vq1askbb7yhd53ExEQZOnSo3l0g5uTYsWPi4OAgS5YskeTkZDl27JiMGjXK4gK+bNkycXJykoULFyoXFnj8Xcby5cvFz89P2rVrJ2vWrJHIyEjp2rWrNGrUqNzvEitPmGX9mON8zLGuctGk8/LypFevXmJvby8//fST5ObmKrt6hg4dKv/85z8L7SYrYG7XkNVn48aN4uPjI5mZmcpYamqqjBgxQqysrGTbtm0mrM44jhw5Is8//7ysX7++0H13795Vvv/vf/8rtWvXFhsbG+nevbtMmjSpXEyyby6Y5aIxx8yxPuWiSYvkX/2mbdu20qpVK9m6dauIiKSkpIiDg4N89dVXJq7OtLZu3SrW1tbKJfkKtjpjYmKU+X4fv+asOSn4B7969WoJCAjQOaBk586dMn78ePH19ZXXX39dOThp7dq14uPjI2PHjpWTJ0/qPA4ZHrOsH3PMHOuj6rm7r1+/jjVr1mDRokVwdHTExo0bISKYN28eVqxYAV9fXwwbNgxjxowBUHheVnOk7zm2bt0azZs3R1hYGK5du6ac8F+zZk307t0boaGhaNiwobFLNbi7d+8qV6/RarXIyspS5ugdN24cZs2ahaNHj6J9+/Y4deoUevToAQDo27cvxowZg5iYGHz99dc4ceKEuq+CYwaYZV3M8SPM8VOYcgvhSU6fPi0+Pj7y9ttvy4cffqjs6rpz5460a9dONBqNdO3a1Wx3cejz+JZ1eHi4fPjhh7Jr1y7Jzc2V//znP9K8eXMZPHiwnDlzRpKTk2Xq1KnSsmVLs7u2rojIqFGjpEePHsrt2NhYadCggTRo0EA8PDzk+eefl2XLlkliYqKIiBw8eFDs7Oxk3759yjqrV6+W2rVry5gxY0Sr1Rr9OVgKZlkXc/wIc/x0qmzSp0+fFldXV5k2bZrObo/NmzfLwYMH5f79+9KhQwdp3ry5REZGKqE311MRHrdp0yblqNhXX31VGjduLEOGDBERka+//lrat28vGo1GGjRoIK6urnLixAnTFmwgsbGxyj/1e/fuiUj+RAcrVqyQefPmSVpams7fw65du8TX11cuXbqkM75u3TqLOMfUVJhl/ZjjfMzx06muSd+6dUvatGkjo0eP1hmfM2eOaDQaadOmjRw4cEAyMjKkbdu2EhgYKJs3bzb7UIvkX1+3du3aEhERISIily5dkooVK8rEiROVZe7fvy+7d++WPXv2lNurvpTEypUrxc3NTdnS1ufBgwfSvXt3ef3115UmYO4HIakBs6wfc1wYc1w01TXps2fPyosvvih79+5VfgGLFy8WW1tbWbhwoXTo0EE6duwoBw8elMzMTHn55Zelc+fOkpGRYeLKy07B8y7Y7Vdw++eff5aXX35ZREQuX74stWrVkuHDhyvrHTlyxCwPnHjcX0O5f/9+CQgIkEaNGklycrKIiM6WeUxMjHTu3FlnDl9LCLYaWHqWmeOiMcfFp7omvXr1arG2ttbZmk5ISJBffvlFREROnTol7du3Fz8/P7l586bcunVLrly5YqJqy17B8z5//rwEBwfr7ML5+eefpUOHDnLx4kXlkn0F/wB+/fVXmTBhgnJkqDl6PJT79u2TCxcuiEj+c2/durXUr19fCfj9+/dl+PDhEhQUJK+//rpZzOFb3lhylpnjojHHJaO6Jr1//36xt7eXTZs2iYjuZ1MFv9xvvvlGmjZtKgkJCSap0dDu3LkjL730kjg4OMi7776rnNCfmJgorq6uotFoZMyYMTrrjB8/Xl599VW5deuWKUo2uMf/DiZPniwvvfSSrFixQu7duyd5eXly4MABad26tTRo0EAJ+MmTJ2XPnj3K340lBVsNLD3LzHFhzHHJqa5JJyQkiJubm3Tv3l2uXr2qd5kJEybIm2++aZZHO4rkhzsgIECqVasmPXr0kGHDhsnFixdFRGT37t3i6uoqI0aMkFOnTslvv/0mEyZMEBcXFzl16pSJKze8zz//XNzc3OTnn38utFv0wIED0qZNG2nYsKHOBRtELGfXmJpYepaZ46Ixx8WnuiYtkj/zjp2dnQwYMEDOnDmjjKelpcmkSZPE1dVVTp8+bcIKDafgj3Dv3r3SrFkzGT58uLRq1UqGDx+u7DLbtGmTVKtWTTw9PaVevXri7+9vtkd/ZmVlKd+np6dL27ZtJTw8XGeZx0/ZOXz4sNStW1f69esnIuZ/lLDaWWqWmWNdzHHpaUTUN2tAbm4uli5ditGjR8Pb2xstW7aEra0tbty4gd9++w07duyAn5+fqcssE3l5ebCyskJubi6sra2VSQ6uXLmCkJAQDB48GLdu3UJ4eDgaN26MqVOnwsvLCykpKbhy5QqcnZ3h5uaG5557zsTPpOz9+OOPiIuLw9///nc0a9YMd+7cga+vL8LCwtCvXz/lNQOArKwsXL9+Hd7e3jh79izq1aun3EemYylZZo6Lxhw/G1XOOGZtbY2RI0ciJiYGDRo0wLFjx3DmzBk0atQI+/fvN4tQA/mzDllZWeHixYuYPHky9u/fD41GA41Gg9q1a+OFF17AhAkT8NZbb2HkyJGIi4vD7Nmz8ccff6BatWpo1qwZ6tevb5bBXrFiBYYOHYorV67Ayir/z9TV1RXPPfcctmzZAiD/7yQvLw8AcP78efz3v/9FSkoKGjRoAGtra+Tm5pqqfPofS8gyc1w05rgMmPJtfHGY++xDqampUrduXdFoNFKhQgWZOHGifP311yKSv4vojTfeUA68mTdvngQGBspbb70l165dM2XZBrVu3TqpUKGCbNiwQZkAo2B31/r16+XFF1+UsWPHKuMPHjyQzp07yz/+8Q+L3i2mduacZea4MOa4bKh+otOCrS8gf4u1YD5bc+Hg4IDXXnsNp06dgpWVFfLy8rBp0yasWrUKw4cPx+3bt7Fnzx707NkT48ePR3Z2Nvbs2QNbW1tTl24QKSkpWLJkCT7//HP07t1bGc/MzMQff/yBihUr4r333sOKFSvg5+eHWrVqITk5Gffv38exY8eg0WjM8u/EHJhzlpljXcxx2VHlZ9KWouCP8N69e5g5cyZOnjyJl19+GTNmzMAXX3yB69evY9myZRARnDt3DnXr1gUA3LlzB66uriau3jBSUlLQtm1bzJo1S5lIf/Hixdi7dy82bdoEb29vODs745tvvsGaNWuQnZ0Nd3d3hISEwMbGBjk5OeY5yT6pFnNcGHNcdvgqqICzszM++ugjzJ49G9HR0ahcuTKmT58OEUHHjh1hZWWFunXrKgenmGuwC6SnpyMyMhKVKlXCokWLcOHCBQQGBiIqKgppaWkICQnB/v37MXfuXJ31cnNzGWwyGeZYF3NcNvhKmMDjW94VK1ZEZmYmXFxcEBISAmtra2zZsgWZmZmYMWMG3njjDWW9x3cXmqtq1aph5cqV6NWrF/bu3QtnZ2eEh4fDx8cHzz33HO7cuYPZs2fj1q1bhda19KNAybiY46Ixx2WHTdrICoK9fft2fPXVV7h16xaee+45jB49Gt27d0dISAhEBHv37oVGo8GMGTMsbquyffv2uHjxIjIyMvDCCy8Uur9SpUrw9PQ0QWVE+Zjjp2OOy4b5b9KpjEajwY4dO9CrVy+0bt0a3bt3h6enJ3r06IEFCxbA2dkZkydPRlBQEDZu3IjZs2ebumSTqFatWqFgp6SkYMCAAdBqtXjnnXdMVBkRc1xczPGzs6xNOxPQarWwt7dXbmdnZ2Pp0qV477338PHHHwPInwihYcOGGDt2LF588UV06dIFkyZNgr29PQYMGGCq0lUjNTUVS5cuRUxMDG7evIkDBw4o509y1xgZA3P87Jjj0uE7aQOaOnUqhg8fjscPoM/OzsaFCxfg4uICAMp948aNQ9++fbF8+XLls61p06bBy8vLFKWryvXr13HgwAF4e3vj4MGDsLW1RU5ODoNNRsEclw3muHT4TtqAevbsCTs7O2g0GmVr0cnJCW3atMGePXswYsQI1KhRA0D+wSTVqlVDXFwcKlasCAA8R/B/fH19sXr1ari4uCivpaV9vkemwxyXDea4dPhO2oCaNGmCV155BXv27EGfPn1w9+5dAPkHVOTk5GDu3LlISkpSQqzVauHm5oasrCzw9HVdlStXViY44JY3GRNzXHaY45LjZkwZKjj/8a+cnJywZcsW2NjYKKclXL58GZs2bcJPP/2E1q1bIzk5Gdu3b8fBgwfh4OBggurLB74rIUNjjg2POS4+zjhWRgqCHR8fj19//RVJSUkYMWKEEtSjR4+ic+fOaNu2LdavXw9bW1ts374d+/btw2+//YbatWsjODgYjRo1MvEzIbJczDGpDZt0GSgI9smTJ9GjRw+4urri8uXL8PDwwPHjx+Ho6AgAOHz4MLp06YJ27dphzZo1SvCzs7NhY2NjEZMcEKkVc0xqxL+mZ1QQ7Li4ODRv3hz9+vXDjh07cPToUWRkZGD79u3KsgEBAdixYwd++uknDB48GHfu3AEA2NnZMdhEJsQck1rxM+lnZGVlhUuXLqF58+aYOHEiZs6cCQBwd3fH888/j7i4OERGRqJTp05o2bIlmjdvjp07d6JFixZwdHTE8uXL+fkMkYkxx6RWbNLPKC8vD8uXL4ezs7PORdvnzJmDQ4cOoVatWrh8+TLWrVuH9957DzNnzkRAQACOHj0KJycnBptIBZhjUis26WdkZWWF0aNH4/79+1i/fj0cHByQnp6OuXPnKlveGo0GY8aMwbJlyzBu3Dg4OTnB39/f1KUT0f8wx6RW/AClDNSoUQNTpkxB06ZNER4ejqlTp2L9+vXo3LkzsrKyAABdunRBtWrVoNVqTVwtEenDHJMa8Z10GfHw8MC0adNgZWUFe3t7nDhxAq+++qpyROiPP/6IatWqwc3NzcSVElFRmGNSGzbpMuTu7o6QkBDk5eXhu+++Q05ODiZPnoxPP/0Uy5Ytw4EDB8z+Qu9E5R1zTGrC86QNICkpCbNmzUJcXBy0Wi1OnjyJmJgYfn5FVI4wx6QG/EzaADw8PPDRRx/B29sbt2/fxqFDhxhsonKGOSY14DtpA0pJSUFeXh7c3d1NXQoRlRJzTKbEJk1ERKRS3N1NRESkUmzSREREKsUmTUREpFJs0kRERCrFJk1ERKRSbNJEREQqxSZNRESkUmzSREREKsUmTUREpFJs0kRERCrFJk1ERKRS/w/YHtTpevyjUAAAAABJRU5ErkJggg==",
+ "text/plain": [
+ "<Figure size 500x300 with 2 Axes>"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "\n",
+ "# Assuming you have these dataframes already loaded\n",
+ "# comparison_out_df_gpt\n",
+ "# comparison_out_df_biomed_ner_all\n",
+ "# comparison_out_df_scispacy\n",
+ "\n",
+ "# Create a list of dataframes and their labels\n",
+ "dfs = [comparison_out_df_gpt, comparison_out_df_biomed_ner_all, comparison_out_df_scispacy]\n",
+ "labels = ['GPT-3.5', 'BioMed NER', 'SciSpaCy']\n",
+ "\n",
+ "# Function to calculate SEM\n",
+ "def sem(data):\n",
+ " return np.std(data, ddof=1) / np.sqrt(len(data))\n",
+ "\n",
+ "# Calculate mean and SEM for precision and recall\n",
+ "precision_means = [df['precision'].mean() for df in dfs]\n",
+ "precision_sems = [sem(df['precision']) for df in dfs]\n",
+ "recall_means = [df['recall'].mean() for df in dfs]\n",
+ "recall_sems = [sem(df['recall']) for df in dfs]\n",
+ "\n",
+ "# Set up the plot\n",
+ "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(5, 3))\n",
+ "x = np.arange(len(labels))\n",
+ "width = 0.35\n",
+ "\n",
+ "# Function to remove top and right spines\n",
+ "def remove_spines(ax):\n",
+ " ax.spines['top'].set_visible(False)\n",
+ " ax.spines['right'].set_visible(False)\n",
+ "\n",
+ "# Plot precision\n",
+ "ax1.bar(x, precision_means, width, yerr=precision_sems, capsize=5)\n",
+ "ax1.set_ylabel('Precision')\n",
+ "# ax1.set_title('Average Precision')\n",
+ "ax1.set_xticks(x)\n",
+ "ax1.set_xticklabels(labels, rotation=45, ha='right')\n",
+ "ax1.set_ylim(0, 1)\n",
+ "remove_spines(ax1)\n",
+ "\n",
+ "# Plot recall\n",
+ "ax2.bar(x, recall_means, width, yerr=recall_sems, capsize=5)\n",
+ "ax2.set_ylabel('Recall')\n",
+ "# ax2.set_title('Average Recall')\n",
+ "ax2.set_xticks(x)\n",
+ "ax2.set_xticklabels(labels, rotation=45, ha='right')\n",
+ "ax2.set_ylim(0, 1)\n",
+ "remove_spines(ax2)\n",
+ "\n",
+ "# Adjust layout and display\n",
+ "plt.tight_layout()\n",
+ "plt.show()\n",
+ "\n",
+ "fig_path = 'data/results/figures'\n",
+ "os.makedirs(fig_path, exist_ok=True)\n",
+ "fig.savefig(os.path.join(fig_path, 'ner_extraction_comparison.tiff'), format='tiff', bbox_inches='tight') \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "9af751da-eee0-4d03-9bba-137baf429eae",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[0.9549689440993789, 0.23680124223602483, 0.5910973084886129] [0.008286258373808576, 0.022466879308773186, 0.025950253677613028]\n",
+ "[0.9968944099378882, 0.2795031055900621, 0.6428571428571429] [0.003105590062111801, 0.025047065948613282, 0.02674395944460631]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(precision_means, precision_sems)\n",
+ "print(recall_means, recall_sems)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "5f2faf89-cdc6-492c-9372-8f1ff6233dd5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.5822619658819637"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "comparison_out_df_gpt.run_time_per_text.mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "c3c44796-55f0-4027-8651-f53fdce6629c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.015508739844612453"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "comparison_out_df_biomed_ner_all.run_time_per_text.mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "302251b3-4748-4cd2-950a-d4e25ffec4bf",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.013423655344092327"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "comparison_out_df_scispacy.run_time_per_text.mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "362efc28-28c0-46ad-95b2-3d78ca7a6540",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# # Print all labels\n",
+ "# # print(model.config.id2label)\n",
+ "\n",
+ "# # Or, if you want a list of just the label names\n",
+ "# label_names = list(model.config.id2label.values())\n",
+ "\n",
+ "# set(map(lambda x:x.split('-')[-1], label_names))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "64c523d7-3ed8-4e4a-a1a0-089bd84dd554",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# method = method_list[0]\n",
+ "# text = data.iloc[25].text\n",
+ "# entity, run_time = entity_extraction(text, method)\n",
+ "# print(text)\n",
+ "# print(entity, run_time, method)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1a60d8c0-fd66-4700-911d-a3e8ac51115e",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}