From 73c194f304f827b55081b15524479f82a1b7d94c Mon Sep 17 00:00:00 2001 From: maszhongming Date: Tue, 16 Sep 2025 15:15:29 -0500 Subject: Initial commit --- notebooks/disease_extraction_comparison.ipynb | 750 ++++++++++++++++++++++++++ 1 file changed, 750 insertions(+) create mode 100644 notebooks/disease_extraction_comparison.ipynb (limited to 'notebooks/disease_extraction_comparison.ipynb') diff --git a/notebooks/disease_extraction_comparison.ipynb b/notebooks/disease_extraction_comparison.ipynb new file mode 100644 index 0000000..dcf68d7 --- /dev/null +++ b/notebooks/disease_extraction_comparison.ipynb @@ -0,0 +1,750 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "860ebc4a-63e5-462d-b6ab-9bae23d10afb", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.chdir('..')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "851d771c-15b4-4168-acf5-86bdd15d9610", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/root/anaconda3/envs/kg_rag_test_2/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from kg_rag.utility import *\n", + "from tqdm import tqdm\n", + "import pandas as pd\n", + "import spacy\n", + "import scispacy\n", + "from scispacy.linking import EntityLinker\n", + "from transformers import pipeline\n", + "from transformers import AutoModelForTokenClassification\n", + "from IPython.display import clear_output" + ] + }, + { + "cell_type": "markdown", + "id": "f242aeb6-99f7-496a-8fd8-1f0d964a2556", + "metadata": {}, + "source": [ + "## List the NER methods to compare" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "45fdafb8-65cc-44dd-b22d-f17e5e807b49", + "metadata": {}, + "outputs": [], + "source": [ + "method_list = ['gpt', 'biomed-ner-all', 'scispacy']\n" + ] + }, + { + "cell_type": "markdown", + "id": "ddc073a0-2508-410e-8e39-4bd94020bf8a", + "metadata": {}, + "source": [ + "## Load spacy and bert based models" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "07a1ccc1-826a-4986-b7a7-f7bf26dd1d8c", + "metadata": {}, + "outputs": [], + "source": [ + "nlp = spacy.load(\"en_core_sci_sm\") \n", + "nlp.add_pipe(\"scispacy_linker\", config={\"resolve_abbreviations\": True, \"linker_name\": \"umls\"})\n", + "\n", + "\n", + "biomed_ner_all_tokenizer = AutoTokenizer.from_pretrained(\"d4data/biomedical-ner-all\",\n", + " revision=\"main\",\n", + " cache_dir=config_data['LLM_CACHE_DIR'])\n", + "biomed_ner_all_model = AutoModelForTokenClassification.from_pretrained(\"d4data/biomedical-ner-all\", \n", + " torch_dtype=torch.float16,\n", + " revision=\"main\",\n", + " cache_dir=config_data['LLM_CACHE_DIR']\n", + " )\n", + "clear_output()" + ] + }, + { + "cell_type": "markdown", + "id": "4cf3589a-6dec-41e5-9f43-703b0171e79c", + "metadata": {}, + "source": [ + "## Load evaluation dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "108796a6-5887-464b-8394-04e04b017d0b", + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_csv('data/dataset_for_entity_retrieval_accuracy_analysis.csv')\n" + ] + }, + { + "cell_type": "markdown", + "id": "ed5fca24-2b5d-4696-bcd4-ba911dce6624", + "metadata": {}, + "source": [ + "## Custom functions" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6e262f71-eac1-4894-8390-f1f4c2e8f84f", + "metadata": {}, + "outputs": [], + "source": [ + "def entity_extraction(text, method):\n", + " if method == 'gpt':\n", + " start_time = time.time()\n", + " entity = disease_entity_extractor_compare_version(text)\n", + " run_time = time.time()-start_time\n", + " elif method == 'scispacy':\n", + " start_time = time.time()\n", + " entity = disease_entity_extractor_scispacy(text)\n", + " run_time = time.time()-start_time\n", + " elif method == 'biomed-ner-all':\n", + " start_time = time.time()\n", + " entity = disease_entity_extractor_biomed_ner(text)\n", + " run_time = time.time()-start_time\n", + " return entity, run_time\n", + "\n", + "def get_GPT_response_compare_version(instruction, system_prompt, chat_model_id, chat_deployment_id, temperature=0):\n", + " return fetch_GPT_response(instruction, system_prompt, chat_model_id, chat_deployment_id, temperature)\n", + " \n", + "def disease_entity_extractor_compare_version(text):\n", + " chat_model_id, chat_deployment_id = get_gpt35()\n", + " prompt_updated = system_prompts[\"DISEASE_ENTITY_EXTRACTION\"] + \"\\n\" + \"Sentence : \" + text\n", + " resp = get_GPT_response_compare_version(prompt_updated, system_prompts[\"DISEASE_ENTITY_EXTRACTION\"], chat_model_id, chat_deployment_id, temperature=0)\n", + " try:\n", + " entity_dict = json.loads(resp)\n", + " return entity_dict[\"Diseases\"]\n", + " except:\n", + " return None\n", + "\n", + "def disease_entity_extractor_scispacy(text):\n", + " doc = nlp(text)\n", + " disease_semantic_types = {\"T047\", \"T191\"} \n", + " entity = []\n", + " for ent in doc.ents:\n", + " if ent._.kb_ents:\n", + " umls_cui = ent._.kb_ents[0][0]\n", + " umls_entity = nlp.get_pipe(\"scispacy_linker\").kb.cui_to_entity[umls_cui]\n", + " if any(t in disease_semantic_types for t in umls_entity.types):\n", + " entity.append(ent.text)\n", + " return entity\n", + "\n", + "def disease_entity_extractor_biomed_ner(text):\n", + " pipe = pipeline(\"ner\", model=biomed_ner_all_model, tokenizer=biomed_ner_all_tokenizer, aggregation_strategy=\"simple\", device=0)\n", + " out = pipe(text)\n", + " return list(filter(None, map(lambda x:x['word'] if x['entity_group']=='Disease_disorder' or x['entity_group']=='Sign_symptom' else None, out)))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "65c46409-f3dd-45e6-9ea8-da84cd8db212", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing method : gpt, 1/3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "322it [03:07, 1.71it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing method : biomed-ner-all, 2/3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "322it [00:05, 63.45it/s] \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing method : scispacy, 3/3\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "322it [00:04, 72.89it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 11.3 s, sys: 1.71 s, total: 13 s\n", + "Wall time: 3min 17s\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "%%time\n", + "comparison_out = []\n", + "for method_index, method in enumerate(method_list):\n", + " print(f'Processing method : {method}, {method_index+1}/{len(method_list)}')\n", + " for row_index, row in tqdm(data.iterrows()):\n", + " entity, run_time = entity_extraction(row['text'], method)\n", + " comparison_out.append((row['text'], row['node_hits'], entity, run_time, method))\n", + "\n", + "comparison_out_df = pd.DataFrame(comparison_out, columns=['input_text', 'node_hits', 'entity_extracted', 'run_time_per_text', 'ner_method'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "3fbfbfed-3dd6-4e86-8fac-e0ee40d2c363", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
input_textnode_hitsentity_extractedrun_time_per_textner_method
0LIRAGLUTIDE TREATS OBESITYobesity[obesity]2.214761gpt
1disease ontology identifier for central diabet...central diabetes insipidus[central diabetes insipidus]0.549497gpt
2Xeroderma pigmentosum, group G is not associat...xeroderma pigmentosum[Xeroderma pigmentosum]0.926769gpt
3cherubism is not a autosomal dominant diseasecherubism[cherubism, autosomal dominant disease]0.675068gpt
4MASA SYNDROME (DISORDER) IS NOT ASSOCIATED WIT...MASA syndrome[MASA SYNDROME]0.465556gpt
..................
961antineoplastic agents treats osteosarcomaosteosarcoma[osteosarcoma]0.012946scispacy
962timothy syndrome associates gene cacna1cTimothy syndrome[syndrome]0.011308scispacy
963piebaldism is a autosomal dominant diseasepiebaldism[autosomal dominant disease]0.012271scispacy
964Disease ontology identifier for Loeys-Dietz sy...Loeys-Dietz syndrome[Loeys-Dietz syndrome]0.012468scispacy
965NOONAN SYNDROME ASSOCIATES GENE PTPN11Noonan syndrome[NOONAN SYNDROME]0.010858scispacy
\n", + "

966 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " input_text \\\n", + "0 LIRAGLUTIDE TREATS OBESITY \n", + "1 disease ontology identifier for central diabet... \n", + "2 Xeroderma pigmentosum, group G is not associat... \n", + "3 cherubism is not a autosomal dominant disease \n", + "4 MASA SYNDROME (DISORDER) IS NOT ASSOCIATED WIT... \n", + ".. ... \n", + "961 antineoplastic agents treats osteosarcoma \n", + "962 timothy syndrome associates gene cacna1c \n", + "963 piebaldism is a autosomal dominant disease \n", + "964 Disease ontology identifier for Loeys-Dietz sy... \n", + "965 NOONAN SYNDROME ASSOCIATES GENE PTPN11 \n", + "\n", + " node_hits entity_extracted \\\n", + "0 obesity [obesity] \n", + "1 central diabetes insipidus [central diabetes insipidus] \n", + "2 xeroderma pigmentosum [Xeroderma pigmentosum] \n", + "3 cherubism [cherubism, autosomal dominant disease] \n", + "4 MASA syndrome [MASA SYNDROME] \n", + ".. ... ... \n", + "961 osteosarcoma [osteosarcoma] \n", + "962 Timothy syndrome [syndrome] \n", + "963 piebaldism [autosomal dominant disease] \n", + "964 Loeys-Dietz syndrome [Loeys-Dietz syndrome] \n", + "965 Noonan syndrome [NOONAN SYNDROME] \n", + "\n", + " run_time_per_text ner_method \n", + "0 2.214761 gpt \n", + "1 0.549497 gpt \n", + "2 0.926769 gpt \n", + "3 0.675068 gpt \n", + "4 0.465556 gpt \n", + ".. ... ... \n", + "961 0.012946 scispacy \n", + "962 0.011308 scispacy \n", + "963 0.012271 scispacy \n", + "964 0.012468 scispacy \n", + "965 0.010858 scispacy \n", + "\n", + "[966 rows x 5 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "comparison_out_df" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ebb106e7-9f63-475c-acec-61dffbda4f98", + "metadata": {}, + "outputs": [], + "source": [ + "comparison_out_df_gpt = comparison_out_df[comparison_out_df.ner_method=='gpt']\n", + "comparison_out_df_biomed_ner_all = comparison_out_df[comparison_out_df.ner_method=='biomed-ner-all']\n", + "comparison_out_df_scispacy = comparison_out_df[comparison_out_df.ner_method=='scispacy']\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b6524af1-912a-44e0-8687-3e9ff65d14e3", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def calculate_precision_recall(row):\n", + " # Convert to lowercase and split node_hits into a list\n", + " true_entities = set([row['node_hits'].lower()])\n", + " \n", + " # Convert extracted_entity list to lowercase\n", + " predicted_entities = set([entity.lower() for entity in row['entity_extracted']])\n", + " \n", + " # Calculate true positives, false positives, and false negatives\n", + " true_positives = len(true_entities.intersection(predicted_entities))\n", + " false_positives = len(predicted_entities - true_entities)\n", + " false_negatives = len(true_entities - predicted_entities)\n", + " \n", + " # Calculate precision and recall\n", + " precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0\n", + " recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0\n", + " \n", + " return pd.Series({'precision': precision, 'recall': recall})\n", + "\n", + "comparison_out_df_gpt[['precision', 'recall']] = comparison_out_df_gpt.apply(calculate_precision_recall, axis=1)\n", + "comparison_out_df_biomed_ner_all[['precision', 'recall']] = comparison_out_df_biomed_ner_all.apply(calculate_precision_recall, axis=1)\n", + "comparison_out_df_scispacy[['precision', 'recall']] = comparison_out_df_scispacy.apply(calculate_precision_recall, axis=1)\n", + "\n", + "clear_output()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "09112cfd-43a3-4bdd-8128-e872f5ede03a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9549689440993789\n", + "0.9968944099378882\n" + ] + } + ], + "source": [ + "print(comparison_out_df_gpt.precision.mean())\n", + "print(comparison_out_df_gpt.recall.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "3814b05f-8708-428c-8c37-27160feb3ed7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.23680124223602483\n", + "0.2795031055900621\n" + ] + } + ], + "source": [ + "print(comparison_out_df_biomed_ner_all.precision.mean())\n", + "print(comparison_out_df_biomed_ner_all.recall.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "9496c72c-2976-4bdd-bde5-6b8612461853", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.5910973084886129\n", + "0.6428571428571429\n" + ] + } + ], + "source": [ + "print(comparison_out_df_scispacy.precision.mean())\n", + "print(comparison_out_df_scispacy.recall.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "075fa7ce-e463-459c-88e6-00d3db62682f", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAekAAAEiCAYAAADd4SrgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA6w0lEQVR4nO3deVhU9f4H8PewoyBiCqgXI0PNpQBRcUGvJq5dvaalqbnmkuUWaoqZ5M8U65ZiuWDX7ep1KzU1UUwxS9TUVHDLLRfQAMEFBGUQ+Pz+4HJ0YlBAZuYw8349D8/DfOec4TMDbz5nzpzzPRoRERAREZHqWJm6ACIiItKPTZqIiEil2KSJiIhUik2aiIhIpdikiYiIVIpNmoiISKXYpImIiFSKTZqIiEil2KSJiIhUik2aiIhIpUzapH/55Rd069YNNWrUgEajwZYtW566zr59+9C4cWPY29vD29sbK1euNHidRPRkzDKRYZi0SWdmZsLHxwcLFy4s1vJXrlzBa6+9hnbt2iE2Nhbjx4/HsGHDsGvXLgNXSkRPwiwTGYZGLRfY0Gg0+P7779GjR48il5k8eTIiIyNx+vRpZeytt97C3bt3ERUVZYQqiehpmGWislOuPpM+dOgQgoKCdMY6deqEQ4cOmagiIioNZpmoeGxMXUBJJCUlwd3dXWfM3d0d6enpePDgARwdHQuto9VqodVqldsiguzsbFStWhUajcbgNRNRYcwyUfGUqyZdGmFhYZgxY0ah8bS0NFSqVMkEFRFRaZQmy15TIguN3d67FPeObinxz3du2gNVXh2mM3Z1zmslfhyikihXu7s9PDyQnJysM5acnIxKlSrp3fIGgJCQEKSlpSlfCQkJxiiViJ6AWSYqnnL1TrpFixbYsWOHztju3bvRokWLItext7eHvb29oUsjohIwZZYrNeuJig3alng9a6cqz/yziUrKpE06IyMDly5dUm5fuXIFsbGxqFKlCmrVqoWQkBDcuHEDq1atAgC8++67WLBgAT788EMMHToUe/fuxbfffovIyMK7tIjIeMpTlm2cqsCGDZfKCZPu7v7tt9/g5+cHPz8/AEBwcDD8/Pwwffp0AEBiYiLi4+OV5V944QVERkZi9+7d8PHxwZdffomlS5eiU6dOJqmfiPIxy0SGoZrzpI0lPT0dLi4uJT5wLDExEYmJiSX+edWrV0f16tVLvB4RPVlxsqzvwLGyxAPHyNDK1WfSprRkyRK9R5Y+TWhoKD755JOyL4iIiMwem3QxjRw5Et27d9cZe/DgAQIDAwEAMTExeo9K5btoIiIqLTZpPfTtIsvJuI3cjNs6Y/IwW/m+16KD0NjaFVrPWs9BKtxFRkRExcEmXUwZsTuRdmBdkfcnr/1Q77hLq76oHNjfUGUREZEZY5MuJiffLnD0Dijxejy3koiISotNuph4biURERlbuZoWlIiIyJKwSRMREakUmzQREZFKsUkTERGpFJs0ERGRSrFJExERqRSbNBERkUqxSRMREakUmzQREZFKsUkTERGpFJs0ERGRSrFJExERqRSbNBERkUqxSRMREakUmzQREZFKsUkTERGpFJs0ERGRSrFJExERqRSbNBERkUqxSRMREamUyZv0woUL4eXlBQcHBwQEBODIkSNPXD48PBz16tWDo6MjPD098cEHHyArK8tI1RJRUZhlorJn0ia9YcMGBAcHIzQ0FMePH4ePjw86deqEmzdv6l1+7dq1mDJlCkJDQ/H7779j2bJl2LBhA6ZOnWrkyonoccwykWGYtEnPnTsXw4cPx5AhQ9CgQQNERESgQoUKWL58ud7lDx48iFatWqFfv37w8vJCx44d0bdv36dusRORYTHLRIZhsiadnZ2NY8eOISgo6FExVlYICgrCoUOH9K7TsmVLHDt2TAny5cuXsWPHDnTt2tUoNRNRYcwykeHYmOoHp6amIjc3F+7u7jrj7u7uOHfunN51+vXrh9TUVAQGBkJEkJOTg3ffffeJu8i0Wi20Wq1yOz09vWyeABEBYJaJDMnkB46VxL59+zB79mwsWrQIx48fx+bNmxEZGYmZM2cWuU5YWBhcXFyUL09PTyNWTET6MMtExWOyd9JVq1aFtbU1kpOTdcaTk5Ph4eGhd52PP/4YAwYMwLBhwwAAL7/8MjIzMzFixAh89NFHsLIqvM0REhKC4OBg5XZ6ejrDTVSGmGUiwzHZO2k7Ozv4+/sjOjpaGcvLy0N0dDRatGihd5379+8XCq+1tTUAQET0rmNvb49KlSrpfBFR2WGWiQzHZO+kASA4OBiDBg1CkyZN0KxZM4SHhyMzMxNDhgwBAAwcOBA1a9ZEWFgYAKBbt26YO3cu/Pz8EBAQgEuXLuHjjz9Gt27dlIATkfExy0SGYdIm3adPH6SkpGD69OlISkqCr68voqKilANQ4uPjdba2p02bBo1Gg2nTpuHGjRuoVq0aunXrhlmzZpnqKRARmGUiQ9FIUfuWzFR6ejpcXFyQlpZW5O4yrymRBq3h6pzXDPr4RJaAWSZLUK6O7iYiIrIkbNJEREQqxSZNRESkUmzSREREKsUmTUREpFJs0kRERCrFJk1ERKRSbNJEREQqxSZNRESkUmzSREREKsUmTUREpFJs0kRERCrFJk1ERKRSbNJEREQqxSZNRESkUmzSREREKsUmTUREpFI2pi6AiIhILRITE5GYmFji9apXr47q1auXeT1s0kRERP+zZMkSzJgxo8TrhYaG4pNPPinzetikiYiI/mfkyJHo3r27ztiDBw8QGBgIAIiJiYGjo2Oh9QzxLhpgkyYiIlLo222dmZmpfO/r64uKFSsarR4eOEZERKRSpXonnZubi5UrVyI6Oho3b95EXl6ezv179+4tk+KIiIgsWama9Lhx47By5Uq89tpraNSoETQaTVnXRUREZFBeUyKLtVxedpbyff2Po2Bl51Cs9a7Oea1UdT2uVE16/fr1+Pbbb9G1a9dnLoCIiIj0K9Vn0nZ2dvD29i7rWoiIiOgxpWrSEyZMwPz58yEiz1zAwoUL4eXlBQcHBwQEBODIkSNPXP7u3bt4//33Ub16ddjb26Nu3brYsWPHM9dB9KwSExNx/PjxEn+VZuIENWKWicpeqXZ3x8TE4KeffsLOnTvRsGFD2Nra6ty/efPmYj3Ohg0bEBwcjIiICAQEBCA8PBydOnXC+fPn4ebmVmj57OxsdOjQAW5ubti4cSNq1qyJa9euoXLlyqV5GkRlSm2TIBgTs0zmIifjNnIzbuuMycNs5fvs5MvQ2NoVWs/aqQpsnKqUeT2latKVK1fG66+//sw/fO7cuRg+fDiGDBkCAIiIiEBkZCSWL1+OKVOmFFp++fLluH37Ng4ePKhsGHh5eT1zHURlQW2TIBgTs0zmIiN2J9IOrCvy/uS1H+odd2nVF5UD+5d5PaVq0itWrHjmH5ydnY1jx44hJCREGbOyskJQUBAOHTqkd51t27ahRYsWeP/997F161ZUq1YN/fr1w+TJk2Ftbf3MNRE9C7VNgmAszDKZEyffLnD0DijxetYGeBcNPOOMYykpKTh//jwAoF69eqhWrVqx101NTUVubi7c3d11xt3d3XHu3Dm961y+fBl79+5F//79sWPHDly6dAnvvfceHj58iNDQUL3raLVaaLVa5XZ6enqxaySip2OWyZzYGGi3dWmV6sCxzMxMDB06FNWrV0ebNm3Qpk0b1KhRA++88w7u379f1jUq8vLy4Obmhm+++Qb+/v7o06cPPvroI0RERBS5TlhYGFxcXJQvT09Pg9VHRMXDLBMVT6neSQcHB+Pnn3/GDz/8gFatWgHI/7xt7NixmDBhAhYvXvzUx6hatSqsra2RnJysM56cnAwPDw+961SvXh22trY6u8Pq16+PpKQkZGdnw86u8If5ISEhCA4OVm6np6cz3PTMDDkJQllMgPAkJ0+eLPayr7zyylOXYZaJDKdUTXrTpk3YuHEj2rZtq4x17doVjo6O6N27d7GatJ2dHfz9/REdHY0ePXoAyN+6jo6OxujRo/Wu06pVK6xduxZ5eXmwssrfCXDhwgVUr15db6gBwN7eHvb29iV7gkRmzNfXFxqNpshTKAvu02g0yM3NferjMctEhlOqJn3//v1Cnz8BgJubW4l2dwcHB2PQoEFo0qQJmjVrhvDwcGRmZipHiA4cOBA1a9ZEWFgYAGDUqFFYsGABxo0bhzFjxuDixYuYPXs2xo4dW5qnQWSRrly5UuaPySwTGUapmnSLFi0QGhqKVatWwcEhf/fdgwcPMGPGDLRo0aLYj9OnTx+kpKRg+vTpSEpKgq+vL6KiopQNgPj4eGUrGwA8PT2xa9cufPDBB3jllVdQs2ZNjBs3DpMnTy7N0yCySM8//3yZPyazTGQYpWrS8+fPR6dOnfC3v/0NPj4+AIC4uDg4ODhg165dJXqs0aNHF7lLbN++fYXGWrRogV9//bXENRMZmtomQSjKtm3bir3sX8/7fhJmmajslapJN2rUCBcvXsSaNWuUUyz69u2L/v37652sgcgSqG0ShKIUfG78NMX9TJqIDKfU50lXqFABw4cPL8taiMo1tU2CUJS/Xv+diNSr2E1627Zt6NKlC2xtbZ+6u6wku8iIzIXaJkEgovKv2E26R48eSEpKgpub2xN3l3EXGVH5kpmZiZ9//hnx8fHIzs7WuY9HWxOZVrGb9OO7yLi7jMg8nDhxAl27dsX9+/eRmZmJKlWqIDU1FRUqVICbmxubNJGJlWpaUH3u3r1bVg9FREbywQcfoFu3brhz5w4cHR3x66+/4tq1a/D398cXX3xh6vKILF6pmvRnn32GDRs2KLfffPNNVKlSBTVr1kRcXFyZFUdEhhUbG4sJEybAysoK1tbW0Gq18PT0xOeff46pU6eaujwii1eqJh0REaHMmbt7927s2bMHUVFR6NKlCyZNmlSmBRKR4dja2iqTjLi5uSE+Ph4A4OLigoSEBFOWRkQo5SlYSUlJSpPevn07evfujY4dO8LLywsBASU/BYWITMPPzw9Hjx5FnTp18Pe//x3Tp09HamoqVq9ejUaNGpm6PCKLV6p30q6urspWdlRUFIKCggAAIsIju4nKkdmzZ6N69eoAgFmzZsHV1RWjRo1CSkoKlixZYuLqiKhU76R79uyJfv36oU6dOrh16xa6dOkCIP9IUW9v7zItkIgMp0mTJsr3bm5uiIqKMmE1RPRXpWrS8+bNg5eXFxISEvD555/DyckJAJCYmIj33nuvTAskIsO5cuUKcnJyUKdOHZ3xixcvwtbWFl5eXqYpjIgAlLJJ29raYuLEiYXGP/jgg2cuiIiMZ/DgwRg6dGihJn348GEsXbpU74UxiMh4OC0okQU7ceIEWrVqVWi8efPmRV7RioiMh9OCElkwjUaDe/fuFRpPS0tjjolUoNhHd+fl5cHNzU35vqgvBpuo/GjTpg3CwsJ0cpubm4uwsDAEBgaasDIiAp7hUpVEVP599tlnaNOmDerVq4fWrVsDAPbv34/09HTs3bvXxNURUanOkx47diy++uqrQuMLFizA+PHjn7UmIjKSBg0a4OTJk+jduzdu3ryJe/fuYeDAgTh37hwnMyFSgVK9k960aZPeg8datmyJOXPmIDw8/FnrIiIjqVGjBmbPnm3qMohIj1K9k7516xZcXFwKjVeqVAmpqanPXBQRGc/+/fvx9ttvo2XLlrhx4wYAYPXq1YiJiTFxZURUqibt7e2td2ainTt3onbt2s9cFBEZx6ZNm9CpUyc4Ojri+PHj0Gq1APKP7ua7ayLTK9Xu7uDgYIwePRopKSl49dVXAQDR0dH48ssvuaubqBz59NNPERERgYEDB2L9+vXKeKtWrfDpp5+asDIiAkrZpIcOHQqtVotZs2Zh5syZAAAvLy8sXrwYAwcOLNMCichwzp8/jzZt2hQad3Fxwd27d41fEBHpKPUpWKNGjVKuluPo6KjM301E5YeHhwcuXbpUaI7umJgYfnRFpAKl+kwaAHJycrBnzx5s3rwZIgIA+PPPP5GRkVFmxRGRYQ0fPhzjxo3D4cOHodFo8Oeff2LNmjWYMGECRo0aZeryiCxeqd5JX7t2DZ07d0Z8fDy0Wi06dOgAZ2dnfPbZZ9BqtYiIiCjrOonIAKZMmYK8vDy0b98e9+/fR5s2bWBvb49JkyZh2LBhpi6PyOKV6p30uHHj0KRJE9y5cweOjo7K+Ouvv47o6OgSP97ChQvh5eUFBwcHBAQE4MiRI8Vab/369dBoNE+cS5yIiqbRaPDRRx/h9u3bOH36NH799VekpKTAxcUFL7zwQokeizkmKnulatL79+/HtGnTYGdnpzPu5eWlnGdZXBs2bEBwcDBCQ0Nx/Phx+Pj4oFOnTrh58+YT17t69SomTpyoTGVIRMWn1WoREhKCJk2aoFWrVtixYwcaNGiAM2fOoF69epg/f36JLj3LHBMZRqmadFEX0rh+/TqcnZ1L9Fhz587F8OHDMWTIEDRo0AARERGoUKECli9fXuQ6ubm56N+/P2bMmMGDW4hKYfr06Vi8eDG8vLxw5coVvPnmmxgxYgTmzZuHL7/8EleuXMHkyZOL/XjMMZFhlKpJd+zYUed8aI1Gg4yMDISGhqJr167Ffpzs7GwcO3YMQUFBjwqyskJQUBAOHTpU5Hr/93//Bzc3N7zzzjulKZ/I4n333XdYtWoVNm7ciB9//BG5ubnIyclBXFwc3nrrLVhbWxf7sZhjIsMp1YFjX3zxBTp37owGDRogKysL/fr1w8WLF1G1alWsW7eu2I+TmpqK3NxcuLu764y7u7vj3LlzeteJiYnBsmXLEBsbW6yfodVqlVmUACA9Pb3Y9RGZq+vXr8Pf3x8A0KhRI9jb2+ODDz6ARqMp8WMZI8cAs0yWqVRN2tPTE3FxcdiwYQPi4uKQkZGBd955B/3799c5kKys3bt3DwMGDMC///1vVK1atVjrhIWFYcaMGQariag8ys3N1TmmxMbGxmhzHZQmxwCzTJapxE364cOHeOmll7B9+3b0798f/fv3L/UPr1q1KqytrZGcnKwznpycDA8Pj0LL//HHH7h69Sq6deumjOXl5QHI/ydz/vx5vPjiizrrhISEIDg4WLmdnp4OT0/PUtdMZA5EBIMHD4a9vT0AICsrC++++y4qVqyos9zmzZuf+ljGyDHALJNlKnGTtrW1RVZWVpn8cDs7O/j7+yM6Olo5/SIvLw/R0dEYPXp0oeVfeuklnDp1Smds2rRpuHfvHubPn683sPb29so/IiLKN2jQIJ3bb7/9dqkfyxg5Bphlskyl2t39/vvv47PPPsPSpUthY1PqmUUB5F+sY9CgQWjSpAmaNWuG8PBwZGZmYsiQIQCAgQMHombNmggLC4ODg0OhC9FXrlwZAHiBeqISWLFiRZk+HnNMZBil6rBHjx5FdHQ0fvzxR7z88sul2kVWoE+fPkhJScH06dORlJQEX19fREVFKQehxMfHw8qq1LOXEpERMMdEhlGqJl25cmX06tWrzIoYPXq03t1iALBv374nrrty5coyq4OISo85Jip7JWrSeXl5+Ne//oULFy4gOzsbr776Kj755BODHtFNRERkqUq0/2nWrFmYOnUqnJycULNmTXz11Vd4//33DVUbERGRRStRk161ahUWLVqEXbt2YcuWLfjhhx+wZs0a5fQJIiIiKjslatLx8fE6034GBQUp16AlIiKislWiJp2TkwMHBwedMVtbWzx8+LBMiyIiIqISHjj211mKAP0zFZXkFCwiIiLSr0RN+q+zFAHPNlMRERERFa1ETbqsZykiIiKionEKICIiIpVikyYiIlKpZ7s6BhERlWuJiYlITEws8XrVq1dH9erVDVARPY5NmojIgi1ZsgQzZswo8XqhoaH45JNPyr4g0sEmTURkwUaOHInu3bvrjD148ACBgYEAgJiYGL3XZ+C7aONgkyYismD6dltnZmYq3/v6+ha6HDEZDw8cIyIiUik2aSIiIpXi7m4iIgvhNSWyWMvlZWcp39f/OApWdg5PWPqRq3NeK1VdVDS+kyYiIlIpNmkiIiKV4u5uKhVOgEBEZHhs0lQqnACByDzkZNxGbsZtnTF5mK18n518GRpbu0LrWTtVgY1TFYPXZ+nYpKlUOAECkXnIiN2JtAPrirw/ee2HesddWvVF5cD+hiqL/odNmkqFEyAQmQcn3y5w9A4o8XrWfBdtFGzSREQWzIa7rVWNTZqeiudWEhGZBk/BIiIiUilVNOmFCxfCy8sLDg4OCAgIwJEjR4pc9t///jdat24NV1dXuLq6Iigo6InLE5FxMMdEZc/kTXrDhg0IDg5GaGgojh8/Dh8fH3Tq1Ak3b97Uu/y+ffvQt29f/PTTTzh06BA8PT3RsWNH3Lhxw8iVW7acjNvQJl3S+cpOvqzcn518udD92qRLyPnLqR5kHphjIsMw+WfSc+fOxfDhwzFkyBAAQEREBCIjI7F8+XJMmTKl0PJr1qzRub106VJs2rQJ0dHRGDhwoFFqJp62QbqYYyLDMGmTzs7OxrFjxxASEqKMWVlZISgoCIcOHSrWY9y/fx8PHz5ElSo8OtGYeNoGFWCOiQzHpE06NTUVubm5cHd31xl3d3fHuXPnivUYkydPRo0aNRAUFKT3fq1WC61Wq9xOT08vfcGk4GkbVMAYOQaYZbJMJv9M+lnMmTMH69evx/fffw8HB/2n+4SFhcHFxUX58vT0NHKVRPQkxckxwCyTZTJpk65atSqsra2RnJysM56cnAwPD48nrvvFF19gzpw5+PHHH/HKK68UuVxISAjS0tKUr4SEhDKpnYjyGSPHALNMlsmkTdrOzg7+/v6Ijo5WxvLy8hAdHY0WLVoUud7nn3+OmTNnIioqCk2aNHniz7C3t0elSpV0voio7BgjxwCzTJbJ5Ed3BwcHY9CgQWjSpAmaNWuG8PBwZGZmKkeJDhw4EDVr1kRYWBgA4LPPPsP06dOxdu1aeHl5ISkpCQDg5OQEJycnkz0PIkvGHBMZhsmbdJ8+fZCSkoLp06cjKSkJvr6+iIqKUg5CiY+Ph5XVozf8ixcvRnZ2Nt544w2dx+ElEIlMhzkmMgyTN2kAGD16NEaPHq33vn379uncvnr1quELIqISY46Jyl65PrqbiIjInLFJExERqRSbNBERkUqxSRMREakUmzQREZFKsUkTERGpFJs0ERGRSrFJExERqRSbNBERkUqxSRMREakUmzQREZFKsUkTERGpFJs0ERGRSrFJExERqRSbNBERkUqxSRMREakUmzQREZFKsUkTERGpFJs0ERGRSrFJExERqRSbNBERkUqxSRMREakUmzQREZFKsUkTERGpFJs0ERGRSrFJExERqZQqmvTChQvh5eUFBwcHBAQE4MiRI09c/rvvvsNLL70EBwcHvPzyy9ixY4eRKiWiojDHRGXP5E16w4YNCA4ORmhoKI4fPw4fHx906tQJN2/e1Lv8wYMH0bdvX7zzzjs4ceIEevTogR49euD06dNGrpyICjDHRIZh8iY9d+5cDB8+HEOGDEGDBg0QERGBChUqYPny5XqXnz9/Pjp37oxJkyahfv36mDlzJho3bowFCxYYuXIiKsAcExmGSZt0dnY2jh07hqCgIGXMysoKQUFBOHTokN51Dh06pLM8AHTq1KnI5YnIsJhjIsOxMeUPT01NRW5uLtzd3XXG3d3dce7cOb3rJCUl6V0+KSlJ7/JarRZarVa5nZaWBgBIT08vsq487f1i1V9aT/rZasTXozBDvibFfT2cnZ2h0WgMVkdxGSPHALNcFvh66FLD6/G0HJu0SRtDWFgYZsyYUWjc09PTBNXkcwk32Y9WJb4euor7eqSlpaFSpUoGrUVNmGX14+uhqzivx9NybNImXbVqVVhbWyM5OVlnPDk5GR4eHnrX8fDwKNHyISEhCA4OVm7n5eXh9u3beO6558rkXUh6ejo8PT2RkJBgUf8wi8LXozBDvSbOzs5l9ljPwhg5BphlY+ProctUOTZpk7azs4O/vz+io6PRo0cPAPnBi46OxujRo/Wu06JFC0RHR2P8+PHK2O7du9GiRQu9y9vb28Pe3l5nrHLlymVRvo5KlSrxD/kxfD0KM9fXxBg5BphlU+Hrocvor4eY2Pr168Xe3l5WrlwpZ8+elREjRkjlypUlKSlJREQGDBggU6ZMUZY/cOCA2NjYyBdffCG///67hIaGiq2trZw6dcok9aelpQkASUtLM8nPVxu+HoVZwmtS3nMsYhm/p5Lg66HLVK+HyT+T7tOnD1JSUjB9+nQkJSXB19cXUVFRykEl8fHxsLJ6dBB6y5YtsXbtWkybNg1Tp05FnTp1sGXLFjRq1MhUT4HI4jHHRAZi1E0CM5SVlSWhoaGSlZVl6lJUga9HYXxNygf+nnTx9dBlqtdDIyJi6g0FIiIiKszkM44RERGRfmzSREREKsUmTUREpFJs0kRERCrFJk1PpO+4Qh5rmI+vA5UXzHHR1P468OhuPVJTU+Ho6IiKFSuauhSTysvLg5WVFe7cuYNbt27Bzs4Of/vb32BlZaXcR6RmzDJzXN7xt/MXx48fR61atRAbG2vqUkyqILynT5/GP/7xD3To0AEdO3bE9OnT8fDhQwb7f+bPn4+JEyeaugzSg1lmjotLzTnmb+gxcXFxaNu2LUaOHIlWrVoVut9SdjoUBDsuLg4tW7ZE06ZNERERgYYNG2LBggXYunWrqUtUhezsbPz55584c+YMHjx4YDF/H+UBs8wcF5fqc2zUqVNU7OTJk+Ls7CwffvihiIjk5ubK77//Lvv375eLFy8qy+Xl5ZmqRKM6d+6cVKxYUaZOnaqMnT17VjQajXz88cc6y+bk5Bi7PNU4ePCgODg4yNatW01dCv0Ps/wIc1w8as4x30kDePjwIcaMGYOsrCx89tlnEBF0794dAwYMQJs2bfDGG28ol8gri0viqVlubi5yc3Pxr3/9C/b29mjYsKFy36ZNmwAASUlJWLRoEfbu3QutVgtra2tTlWs0eXl5esdbtGiBYcOGYfHixbh9+7aRq6K/YpbzMcf6lcccm/wCG2pga2uLBQsWoFu3bujQoQNyc3Ph4OCAL7/8Evb29oiJicG8efNQuXJlTJ8+3dTlGkTBrrGHDx/CwcEBM2bMwL179xAREQFHR0ecP38eX375JcaPH4/GjRtj0aJFEBEkJCTg73//OwYPHowOHTqY+mkYTMFndzNnzoSNjQ1at26NwMBAAECHDh0QGRmJxMREVKlShQfjmJClZ5k5frJymWPTvpFXl7Nnz8rzzz8vPj4+cuPGDWU8PT1dRo0aJUFBQXLv3j0TVmgYubm5IiJy4sQJadq0qfLcExMTpWfPnlKnTh2pUKGC7Nq1S1lHq9VKWlqazJw5U3r16iW///67SWo3tAMHDsjChQvl66+/ltTUVJkxY4Y0btxY6tatKwMGDJAjR46IiEjPnj3lH//4h4mrpQKWmGXmuGjlOcds0n9x6dIl2bx5s2RnZ4vIo8+tpk+fLn5+fmZ3RZiCYMfGxoqjo6Nyzd+C8cTEROndu7f4+PjIqlWrlPUe//zK3F6TAitWrBBvb28ZPHiwLFu2THlNEhISZPfu3dKsWTNp1qyZtG7dWiZNmiS+vr5y8uRJEbGMzzvVzpKyzBwXrbznmE1aD32/mGHDhsmQIUOUwJuDx7e8HR0ddQ4uERG5c+eOiDzaEm/durUsX75cuf/hw4dGq9XY1q5dKxUqVJDvvvtOtFqt3mWys7MlJiZGRowYIW5ubqLRaOSTTz4xcqX0JJaQZea4aOaQYzbpp0hOTpapU6dK1apV5cyZM6Yup8ydPXtW7OzsJCwsTGc8PDxcgoODJTMzU0QeBbxdu3ayaNEiU5RqNDdu3JCWLVvK7NmzdcYf/4df8I+xQFxcnISGhkqdOnXk7NmzRqmTSsacs8wcF2YuObaoJp2cnCy//PKLHDx4UK5fv/7U5ffu3StDhw6VGjVqyIkTJwxfoJFlZGRIz549pWLFijqvR1hYmFSsWFF++uknEXn0h5yUlCTt27eXrl27yt27d01RslEcP35c3N3d5ZdfftF7f0HI79+/rzN+6tQpqVOnjkRFRRm8RkvHLD/CHOtnLjm2mCZ98uRJqV+/vjRq1Eg8PDykf//+OgeUiBTeqrp48aKsWrVK/vjjD2OWalB/fY4bNmyQLl26SGBgoGRkZMiCBQukSpUqOgeXPO7GjRuSkJBgjFJNZtu2bVKlShW5cuWKiOg/fzQpKUnCw8Pl3r17Oq+pr6+vzJkzx1ilWiRmmTkuDnPJsUU06TNnzkjVqlVl8uTJkpCQICtWrJAaNWooBwf81eLFi5UjP/8aBnNw7do12bJli3J7y5Yt0qFDB6lVq5Y4OTkpRzo+vlto1qxZcvjwYaPXagqnT58WjUYj8+bNU8b++tnm119/LSNGjND5+9i4caNUqVLFbI+QVQNm+RHm+MnMJcdm36Tv3Lkj7du3l9GjR+uMd+zYUdavXy+7d++W06dPi0j+L/DIkSPi6ekpPXr0kLy8PFUc3VeWsrKy5N1335UXX3xRvvvuO2V8y5YtEhQUJA0bNpT4+HgRebTl+cknn4hGozG73YT65OXlSUZGhrz55pvi7u4u33//vTJeICsrS3r16iUfffSRzrqnTp0ym3dqasQsP8IcP5k55djsm/TNmzdl+fLlEhsbq4x9+umnotFoxM/PT/z8/MTNzU12794tIvm/uHXr1qnql1TWDh06JIMGDZImTZrIhg0blPGCLfHAwEC5dOmSiIhMmzZNHBwc5NixY6Yq16CKencVGRkpPj4+4uXlJf/9739FJP+zv9jYWOnUqZM0btxYOSrWnP75qxmzrIs5fsScc2z2TVpE5Pbt28r369evFxsbG9m8ebOkp6fLH3/8IW+88Yb07NlT0tPTTVilYRT1x3v48GF5++23iwx4+/btZeTIkeLo6Ci//fabsco1qsdfm61bt8qKFStkwYIFytgPP/wgbdu2FY1GI6+88op4eXlJQECAtGnTRjl9x5LnOzYFS80yc1w0c8+xWV9PWkQKzc+bm5uLkydPws/PTxl75513EB8fj927dxu7RIMqmNbu6tWruHHjBqpXr47atWsr9x85cgTz58/H+fPnMWnSJPTp0wcAsG3bNnz66ac4e/YsfvnlFzRu3NhUT8FgHv/bmDJlCtatWwcPDw/cuXMHlStXxrfffgsvLy8kJCQgNjYWv/zyC5ycnODv748uXbrA2toaOTk5sLHhzLrGYMlZZo6LZhE5NuUWgiGcP39e1q5dqzNWsBvjr1ujBZ9TDRs2TCZMmCA5OTmq3eVRWjdu3BCNRiMajUaqVq0qQ4cOldmzZ0tSUpLk5eVJfHy8vP3229KiRQud123Hjh1y7do1E1ZuHHPnzpXq1asr7zJWr16t7D69cOFCkeupecvbXDDLjzDHT2bOOVbB7OFlJzY2Fq+88orOVUxyc3Oh0WiQnp6OnJwcneVzcnIQGhqKyMhIDBs2DNbW1mZ3ZRxXV1e0bdsWbm5u6NGjB+7evYtNmzbB19cXzZo1Q1RUFLy9vdGgQQOEh4dj8+bNAIAuXbqgVq1aJq7esG7evImLFy9i3rx58Pf3x9atW/H+++/j888/h4igX79+uHDhAoDC1x+2hCsGmRKzrIs5LprZ59i02whlJy4uTipUqKBcQ1bk0XR3V69elebNm8vOnTuV+3bt2iXvvfeeuLm5yfHjx41erzEUbCVmZmZKu3btpGPHjvLdd99Jbm6u7Ny5U0JDQ8XHx0fq1q0rNjY2otFopGnTppKRkWFW70Ke5IcffpDExEQ5fvy41K5dW/ksa+nSpaLRaKRWrVrKUbJkHMyyLub46cw5x2bRpM+ePStVq1aVAQMGiIjurrA//vhDPD09ZcSIETp/sNu2bZNJkyap5lw4QykIeEZGhrRr104aN24s27dvV/7pJSUlSXx8vMyZM0dGjBihnMJibp52jmxERIQEBQVJSkqKiOQflDRq1Ch59913y8UuMXPBLOvHHOezxByX+wPH4uLi0LJlS7i6ukKr1WLnzp1o0qQJcnNzYW1tjaFDhyIrKwtr1qwptPtLq9XC3t7eRJUbhhRxgI21tTUyMzPxz3/+E2lpaQgJCUG3bt1ga2tbaDlz8/hrsmrVKpw6dQo1atRAo0aNlGvnTpw4Ed9++y3OnTuH7OxsDBo0CE2aNMHHH38MwHxfGzVhlh9hjguz2BybcgvhWR0/flwqVKggU6dOlYyMDOndu7dUqlRJjh49qrOcJezymT17ts6Vbf7q8S3xV199VZo2bSpbt2416yvgiOj+7qdMmSLOzs4SFBQk/v7+UqVKFZk1a5aIiPz555/i6ekpVatWFW9vb2nUqJHZXCWpPGCW8zHH+llyjsttk87JyZHAwECZNGmSMlZwzdTHw23uoRbJv9Ta2LFjRaPRyLp164pc7vGAd+zYUerUqSPbt283VplG9/jv/vDhw9K1a1c5cOCAiOSHOTw8XGxsbOSLL74QkfyLNnz55ZfyzTffKP/0zP2fnxowy/mYY/0sPcflsklnZGRITk6OzqT6Bb/I5ORk6dOnjzg7O+udu9Zcpaeny9SpU8XKykrWrFlT5HIFAb93757885//lMuXLxurRJP5z3/+I127dpXAwEBJS0tTxjMyMuTTTz+V+vXry7lz5wqtV14/wypPmGVdzHHRLDXH5a5JnzlzRjp27Cje3t7i7+8vS5YsKXSpscfDbe5b4Y8fSHHlyhUZN26cWFlZ6Uy8/1dqnwbvWZ08eVIOHjyo3F68eLHUrl1bnJyc5Ndff9VZdv/+/VK5cuVC42R4zPIjzHFhzHG+ctWkY2NjpVKlSjJo0CCZO3euNG3aVNzc3JTp8B7/Q09OTpb+/fuLRqMxy9My/ur777+XJk2aSK9evcTa2lqsrKyeuMvMXK1Zs0b8/Pxk7NixOkf7bty4UerVqye9e/fWmb84ISFBateuLT/++KMpyrVYzLJ+zHE+5viRctOkz5w5I87OzjJlyhSd8Vq1askbb7yhd53ExEQZOnSo3l0g5uTYsWPi4OAgS5YskeTkZDl27JiMGjXK4gK+bNkycXJykoULFyoXFnj8Xcby5cvFz89P2rVrJ2vWrJHIyEjp2rWrNGrUqNzvEitPmGX9mON8zLGuctGk8/LypFevXmJvby8//fST5ObmKrt6hg4dKv/85z8L7SYrYG7XkNVn48aN4uPjI5mZmcpYamqqjBgxQqysrGTbtm0mrM44jhw5Is8//7ysX7++0H13795Vvv/vf/8rtWvXFhsbG+nevbtMmjSpXEyyby6Y5aIxx8yxPuWiSYvkX/2mbdu20qpVK9m6dauIiKSkpIiDg4N89dVXJq7OtLZu3SrW1tbKJfkKtjpjYmKU+X4fv+asOSn4B7969WoJCAjQOaBk586dMn78ePH19ZXXX39dOThp7dq14uPjI2PHjpWTJ0/qPA4ZHrOsH3PMHOuj6rm7r1+/jjVr1mDRokVwdHTExo0bISKYN28eVqxYAV9fXwwbNgxjxowBUHheVnOk7zm2bt0azZs3R1hYGK5du6ac8F+zZk307t0boaGhaNiwobFLNbi7d+8qV6/RarXIyspS5ugdN24cZs2ahaNHj6J9+/Y4deoUevToAQDo27cvxowZg5iYGHz99dc4ceKEuq+CYwaYZV3M8SPM8VOYcgvhSU6fPi0+Pj7y9ttvy4cffqjs6rpz5460a9dONBqNdO3a1Wx3cejz+JZ1eHi4fPjhh7Jr1y7Jzc2V//znP9K8eXMZPHiwnDlzRpKTk2Xq1KnSsmVLs7u2rojIqFGjpEePHsrt2NhYadCggTRo0EA8PDzk+eefl2XLlkliYqKIiBw8eFDs7Oxk3759yjqrV6+W2rVry5gxY0Sr1Rr9OVgKZlkXc/wIc/x0qmzSp0+fFldXV5k2bZrObo/NmzfLwYMH5f79+9KhQwdp3ry5REZGKqE311MRHrdp0yblqNhXX31VGjduLEOGDBERka+//lrat28vGo1GGjRoIK6urnLixAnTFmwgsbGxyj/1e/fuiUj+RAcrVqyQefPmSVpams7fw65du8TX11cuXbqkM75u3TqLOMfUVJhl/ZjjfMzx06muSd+6dUvatGkjo0eP1hmfM2eOaDQaadOmjRw4cEAyMjKkbdu2EhgYKJs3bzb7UIvkX1+3du3aEhERISIily5dkooVK8rEiROVZe7fvy+7d++WPXv2lNurvpTEypUrxc3NTdnS1ufBgwfSvXt3ef3115UmYO4HIakBs6wfc1wYc1w01TXps2fPyosvvih79+5VfgGLFy8WW1tbWbhwoXTo0EE6duwoBw8elMzMTHn55Zelc+fOkpGRYeLKy07B8y7Y7Vdw++eff5aXX35ZREQuX74stWrVkuHDhyvrHTlyxCwPnHjcX0O5f/9+CQgIkEaNGklycrKIiM6WeUxMjHTu3FlnDl9LCLYaWHqWmeOiMcfFp7omvXr1arG2ttbZmk5ISJBffvlFREROnTol7du3Fz8/P7l586bcunVLrly5YqJqy17B8z5//rwEBwfr7ML5+eefpUOHDnLx4kXlkn0F/wB+/fVXmTBhgnJkqDl6PJT79u2TCxcuiEj+c2/durXUr19fCfj9+/dl+PDhEhQUJK+//rpZzOFb3lhylpnjojHHJaO6Jr1//36xt7eXTZs2iYjuZ1MFv9xvvvlGmjZtKgkJCSap0dDu3LkjL730kjg4OMi7776rnNCfmJgorq6uotFoZMyYMTrrjB8/Xl599VW5deuWKUo2uMf/DiZPniwvvfSSrFixQu7duyd5eXly4MABad26tTRo0EAJ+MmTJ2XPnj3K340lBVsNLD3LzHFhzHHJqa5JJyQkiJubm3Tv3l2uXr2qd5kJEybIm2++aZZHO4rkhzsgIECqVasmPXr0kGHDhsnFixdFRGT37t3i6uoqI0aMkFOnTslvv/0mEyZMEBcXFzl16pSJKze8zz//XNzc3OTnn38utFv0wIED0qZNG2nYsKHOBRtELGfXmJpYepaZ46Ixx8WnuiYtkj/zjp2dnQwYMEDOnDmjjKelpcmkSZPE1dVVTp8+bcIKDafgj3Dv3r3SrFkzGT58uLRq1UqGDx+u7DLbtGmTVKtWTTw9PaVevXri7+9vtkd/ZmVlKd+np6dL27ZtJTw8XGeZx0/ZOXz4sNStW1f69esnIuZ/lLDaWWqWmWNdzHHpaUTUN2tAbm4uli5ditGjR8Pb2xstW7aEra0tbty4gd9++w07duyAn5+fqcssE3l5ebCyskJubi6sra2VSQ6uXLmCkJAQDB48GLdu3UJ4eDgaN26MqVOnwsvLCykpKbhy5QqcnZ3h5uaG5557zsTPpOz9+OOPiIuLw9///nc0a9YMd+7cga+vL8LCwtCvXz/lNQOArKwsXL9+Hd7e3jh79izq1aun3EemYylZZo6Lxhw/G1XOOGZtbY2RI0ciJiYGDRo0wLFjx3DmzBk0atQI+/fvN4tQA/mzDllZWeHixYuYPHky9u/fD41GA41Gg9q1a+OFF17AhAkT8NZbb2HkyJGIi4vD7Nmz8ccff6BatWpo1qwZ6tevb5bBXrFiBYYOHYorV67Ayir/z9TV1RXPPfcctmzZAiD/7yQvLw8AcP78efz3v/9FSkoKGjRoAGtra+Tm5pqqfPofS8gyc1w05rgMmPJtfHGY++xDqampUrduXdFoNFKhQgWZOHGifP311yKSv4vojTfeUA68mTdvngQGBspbb70l165dM2XZBrVu3TqpUKGCbNiwQZkAo2B31/r16+XFF1+UsWPHKuMPHjyQzp07yz/+8Q+L3i2mduacZea4MOa4bKh+otOCrS8gf4u1YD5bc+Hg4IDXXnsNp06dgpWVFfLy8rBp0yasWrUKw4cPx+3bt7Fnzx707NkT48ePR3Z2Nvbs2QNbW1tTl24QKSkpWLJkCT7//HP07t1bGc/MzMQff/yBihUr4r333sOKFSvg5+eHWrVqITk5Gffv38exY8eg0WjM8u/EHJhzlpljXcxx2VHlZ9KWouCP8N69e5g5cyZOnjyJl19+GTNmzMAXX3yB69evY9myZRARnDt3DnXr1gUA3LlzB66uriau3jBSUlLQtm1bzJo1S5lIf/Hixdi7dy82bdoEb29vODs745tvvsGaNWuQnZ0Nd3d3hISEwMbGBjk5OeY5yT6pFnNcGHNcdvgqqICzszM++ugjzJ49G9HR0ahcuTKmT58OEUHHjh1hZWWFunXrKgenmGuwC6SnpyMyMhKVKlXCokWLcOHCBQQGBiIqKgppaWkICQnB/v37MXfuXJ31cnNzGWwyGeZYF3NcNvhKmMDjW94VK1ZEZmYmXFxcEBISAmtra2zZsgWZmZmYMWMG3njjDWW9x3cXmqtq1aph5cqV6NWrF/bu3QtnZ2eEh4fDx8cHzz33HO7cuYPZs2fj1q1bhda19KNAybiY46Ixx2WHTdrICoK9fft2fPXVV7h16xaee+45jB49Gt27d0dISAhEBHv37oVGo8GMGTMsbquyffv2uHjxIjIyMvDCCy8Uur9SpUrw9PQ0QWVE+Zjjp2OOy4b5b9KpjEajwY4dO9CrVy+0bt0a3bt3h6enJ3r06IEFCxbA2dkZkydPRlBQEDZu3IjZs2ebumSTqFatWqFgp6SkYMCAAdBqtXjnnXdMVBkRc1xczPGzs6xNOxPQarWwt7dXbmdnZ2Pp0qV477338PHHHwPInwihYcOGGDt2LF588UV06dIFkyZNgr29PQYMGGCq0lUjNTUVS5cuRUxMDG7evIkDBw4o509y1xgZA3P87Jjj0uE7aQOaOnUqhg8fjscPoM/OzsaFCxfg4uICAMp948aNQ9++fbF8+XLls61p06bBy8vLFKWryvXr13HgwAF4e3vj4MGDsLW1RU5ODoNNRsEclw3muHT4TtqAevbsCTs7O2g0GmVr0cnJCW3atMGePXswYsQI1KhRA0D+wSTVqlVDXFwcKlasCAA8R/B/fH19sXr1ari4uCivpaV9vkemwxyXDea4dPhO2oCaNGmCV155BXv27EGfPn1w9+5dAPkHVOTk5GDu3LlISkpSQqzVauHm5oasrCzw9HVdlStXViY44JY3GRNzXHaY45LjZkwZKjj/8a+cnJywZcsW2NjYKKclXL58GZs2bcJPP/2E1q1bIzk5Gdu3b8fBgwfh4OBggurLB74rIUNjjg2POS4+zjhWRgqCHR8fj19//RVJSUkYMWKEEtSjR4+ic+fOaNu2LdavXw9bW1ts374d+/btw2+//YbatWsjODgYjRo1MvEzIbJczDGpDZt0GSgI9smTJ9GjRw+4urri8uXL8PDwwPHjx+Ho6AgAOHz4MLp06YJ27dphzZo1SvCzs7NhY2NjEZMcEKkVc0xqxL+mZ1QQ7Li4ODRv3hz9+vXDjh07cPToUWRkZGD79u3KsgEBAdixYwd++uknDB48GHfu3AEA2NnZMdhEJsQck1rxM+lnZGVlhUuXLqF58+aYOHEiZs6cCQBwd3fH888/j7i4OERGRqJTp05o2bIlmjdvjp07d6JFixZwdHTE8uXL+fkMkYkxx6RWbNLPKC8vD8uXL4ezs7PORdvnzJmDQ4cOoVatWrh8+TLWrVuH9957DzNnzkRAQACOHj0KJycnBptIBZhjUis26WdkZWWF0aNH4/79+1i/fj0cHByQnp6OuXPnKlveGo0GY8aMwbJlyzBu3Dg4OTnB39/f1KUT0f8wx6RW/AClDNSoUQNTpkxB06ZNER4ejqlTp2L9+vXo3LkzsrKyAABdunRBtWrVoNVqTVwtEenDHJMa8Z10GfHw8MC0adNgZWUFe3t7nDhxAq+++qpyROiPP/6IatWqwc3NzcSVElFRmGNSGzbpMuTu7o6QkBDk5eXhu+++Q05ODiZPnoxPP/0Uy5Ytw4EDB8z+Qu9E5R1zTGrC86QNICkpCbNmzUJcXBy0Wi1OnjyJmJgYfn5FVI4wx6QG/EzaADw8PPDRRx/B29sbt2/fxqFDhxhsonKGOSY14DtpA0pJSUFeXh7c3d1NXQoRlRJzTKbEJk1ERKRS3N1NRESkUmzSREREKsUmTUREpFJs0kRERCrFJk1ERKRSbNJEREQqxSZNRESkUmzSREREKsUmTUREpFJs0kRERCrFJk1ERKRS/w/YHtTpevyjUAAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# Assuming you have these dataframes already loaded\n", + "# comparison_out_df_gpt\n", + "# comparison_out_df_biomed_ner_all\n", + "# comparison_out_df_scispacy\n", + "\n", + "# Create a list of dataframes and their labels\n", + "dfs = [comparison_out_df_gpt, comparison_out_df_biomed_ner_all, comparison_out_df_scispacy]\n", + "labels = ['GPT-3.5', 'BioMed NER', 'SciSpaCy']\n", + "\n", + "# Function to calculate SEM\n", + "def sem(data):\n", + " return np.std(data, ddof=1) / np.sqrt(len(data))\n", + "\n", + "# Calculate mean and SEM for precision and recall\n", + "precision_means = [df['precision'].mean() for df in dfs]\n", + "precision_sems = [sem(df['precision']) for df in dfs]\n", + "recall_means = [df['recall'].mean() for df in dfs]\n", + "recall_sems = [sem(df['recall']) for df in dfs]\n", + "\n", + "# Set up the plot\n", + "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(5, 3))\n", + "x = np.arange(len(labels))\n", + "width = 0.35\n", + "\n", + "# Function to remove top and right spines\n", + "def remove_spines(ax):\n", + " ax.spines['top'].set_visible(False)\n", + " ax.spines['right'].set_visible(False)\n", + "\n", + "# Plot precision\n", + "ax1.bar(x, precision_means, width, yerr=precision_sems, capsize=5)\n", + "ax1.set_ylabel('Precision')\n", + "# ax1.set_title('Average Precision')\n", + "ax1.set_xticks(x)\n", + "ax1.set_xticklabels(labels, rotation=45, ha='right')\n", + "ax1.set_ylim(0, 1)\n", + "remove_spines(ax1)\n", + "\n", + "# Plot recall\n", + "ax2.bar(x, recall_means, width, yerr=recall_sems, capsize=5)\n", + "ax2.set_ylabel('Recall')\n", + "# ax2.set_title('Average Recall')\n", + "ax2.set_xticks(x)\n", + "ax2.set_xticklabels(labels, rotation=45, ha='right')\n", + "ax2.set_ylim(0, 1)\n", + "remove_spines(ax2)\n", + "\n", + "# Adjust layout and display\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "fig_path = 'data/results/figures'\n", + "os.makedirs(fig_path, exist_ok=True)\n", + "fig.savefig(os.path.join(fig_path, 'ner_extraction_comparison.tiff'), format='tiff', bbox_inches='tight') \n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "9af751da-eee0-4d03-9bba-137baf429eae", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.9549689440993789, 0.23680124223602483, 0.5910973084886129] [0.008286258373808576, 0.022466879308773186, 0.025950253677613028]\n", + "[0.9968944099378882, 0.2795031055900621, 0.6428571428571429] [0.003105590062111801, 0.025047065948613282, 0.02674395944460631]\n" + ] + } + ], + "source": [ + "print(precision_means, precision_sems)\n", + "print(recall_means, recall_sems)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "5f2faf89-cdc6-492c-9372-8f1ff6233dd5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.5822619658819637" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "comparison_out_df_gpt.run_time_per_text.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "c3c44796-55f0-4027-8651-f53fdce6629c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.015508739844612453" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "comparison_out_df_biomed_ner_all.run_time_per_text.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "302251b3-4748-4cd2-950a-d4e25ffec4bf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.013423655344092327" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "comparison_out_df_scispacy.run_time_per_text.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "362efc28-28c0-46ad-95b2-3d78ca7a6540", + "metadata": {}, + "outputs": [], + "source": [ + "# # Print all labels\n", + "# # print(model.config.id2label)\n", + "\n", + "# # Or, if you want a list of just the label names\n", + "# label_names = list(model.config.id2label.values())\n", + "\n", + "# set(map(lambda x:x.split('-')[-1], label_names))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "64c523d7-3ed8-4e4a-a1a0-089bd84dd554", + "metadata": {}, + "outputs": [], + "source": [ + "# method = method_list[0]\n", + "# text = data.iloc[25].text\n", + "# entity, run_time = entity_extraction(text, method)\n", + "# print(text)\n", + "# print(entity, run_time, method)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a60d8c0-fd66-4700-911d-a3e8ac51115e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- cgit v1.2.3