{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "3d3dca32-b77f-471d-b834-20ac795f9f17", "metadata": {}, "outputs": [], "source": [ "import os\n", "os.chdir('..')\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "9da344d2-8e45-4574-aa19-4ad76c566101", "metadata": {}, "outputs": [], "source": [ "from IPython.display import clear_output\n", "from kg_rag.utility import *\n", "\n", "clear_output()\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "b44bf274-41d1-4153-a65e-bfb9b90ebcc6", "metadata": {}, "outputs": [], "source": [ "\n", "def generate_response(question, llm, kg_rag_flag, evidence_flag=False, temperature=0):\n", " CHAT_MODEL_ID = llm\n", " CHAT_DEPLOYMENT_ID = llm\n", " \n", " if kg_rag_flag:\n", " SYSTEM_PROMPT = system_prompts[\"KG_RAG_BASED_TEXT_GENERATION\"]\n", " CONTEXT_VOLUME = int(config_data[\"CONTEXT_VOLUME\"])\n", " QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD = float(config_data[\"QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD\"])\n", " QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY = float(config_data[\"QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY\"])\n", " VECTOR_DB_PATH = config_data[\"VECTOR_DB_PATH\"]\n", " NODE_CONTEXT_PATH = config_data[\"NODE_CONTEXT_PATH\"]\n", " SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL = config_data[\"SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL\"]\n", " SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL = config_data[\"SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL\"]\n", " vectorstore = load_chroma(VECTOR_DB_PATH, SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL)\n", " embedding_function_for_context_retrieval = load_sentence_transformer(SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL)\n", " node_context_df = pd.read_csv(NODE_CONTEXT_PATH)\n", " context = retrieve_context(question, vectorstore, embedding_function_for_context_retrieval, node_context_df, CONTEXT_VOLUME, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY, evidence_flag)\n", " enriched_prompt = \"Context: \"+ context + \"\\n\" + \"Question: \" + question\n", " question = enriched_prompt\n", " else:\n", " SYSTEM_PROMPT = system_prompts[\"PROMPT_BASED_TEXT_GENERATION\"]\n", " \n", " output = get_GPT_response(question, SYSTEM_PROMPT, CHAT_MODEL_ID, CHAT_DEPLOYMENT_ID, temperature=temperature)\n", " stream_out(output)\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "33c0771d-e6be-406b-9b17-51f6377bcb6a", "metadata": {}, "outputs": [], "source": [ "\n", "LLM_TO_USE = 'gpt-4'\n", "TEMPERATURE = config_data[\"LLM_TEMPERATURE\"]\n" ] }, { "cell_type": "markdown", "id": "aab5d68e-ed47-4e36-986f-8842287d8ab6", "metadata": {}, "source": [ "## Question 1:" ] }, { "cell_type": "code", "execution_count": 5, "id": "bbbdb428-6d01-43f2-9e58-b919e7a68736", "metadata": {}, "outputs": [], "source": [ "question = 'Are there any latest drugs used for weight management in patients with Bardet-Biedl Syndrome?'\n" ] }, { "cell_type": "markdown", "id": "19ffb33b-bce6-4cb1-8f86-76080fcf0e40", "metadata": {}, "source": [ "### With KG-RAG" ] }, { "cell_type": "code", "execution_count": 6, "id": "02374d3c-5711-4704-8b37-7eda0965c9b0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Yes, the compound Setmelanotide is used to treat Bardet-Biedl syndrome. It is currently in phase 3 of clinical trials according to the sources ChEMBL and DrugCentral. However, it is advised to seek guidance from a healthcare professional for the most current and personalized treatment options. [Provenance: ChEMBL, DrugCentral]\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = True\n", "EDGE_EVIDENCE_FLAG = True #Used only when KG_RAG_FLAG=True\n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, evidence_flag=EDGE_EVIDENCE_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "17be2ecd-40f1-44b5-8f79-9358264a3703", "metadata": {}, "source": [ "### Without KG-RAG" ] }, { "cell_type": "code", "execution_count": 7, "id": "06bf55f2-eccf-4c26-b65a-0ed2ed30e689", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "As of my knowledge up to date, there are no specific drugs designed for weight management in patients with Bardet-Biedl Syndrome. The treatment generally involves managing the symptoms and complications. However, any new developments would be best advised by a healthcare professional.\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = False\n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "afa24795-71e1-4f1a-ac99-b8d106513f76", "metadata": {}, "source": [ "## Question 2:" ] }, { "cell_type": "code", "execution_count": 8, "id": "eca290bb-fe30-4bb2-ab36-405c2151bccb", "metadata": {}, "outputs": [], "source": [ "question = 'Is it PNPLA3 or HLA-B that has a significant association with the disease liver benign neoplasm?'\n" ] }, { "cell_type": "markdown", "id": "0cd8e67f-f769-4130-8e7d-52e0524c66df", "metadata": {}, "source": [ "### With KG-RAG" ] }, { "cell_type": "code", "execution_count": 9, "id": "03863049-4ea5-4733-aeb4-0be3f09d6b5f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The gene PNPLA3 has a more significant association with the disease liver benign neoplasm, as indicated by the lower p-value of 4e-14 compared to HLA-B's p-value of 2e-08. The provenance of this information is the GWAS Catalog.\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = True\n", "EDGE_EVIDENCE_FLAG = True #Used only when KG_RAG_FLAG=True\n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, evidence_flag=EDGE_EVIDENCE_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "30121a5b-3956-41bd-ac4a-62e32744ba99", "metadata": {}, "source": [ "### Without KG-RAG" ] }, { "cell_type": "code", "execution_count": 10, "id": "8614706d-90d9-49e8-9481-30628505ba2e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "It is PNPLA3 that has a significant association with the disease liver benign neoplasm.\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = False\n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "8087eb26-e144-48ee-8630-09c73eb35a37", "metadata": {}, "source": [ "## Question 3" ] }, { "cell_type": "code", "execution_count": 11, "id": "4ed3bb71-12c8-40bf-b361-f9cd227c142d", "metadata": {}, "outputs": [], "source": [ "question = \"Is Parkinson's disease associated with PINK1 gene?\"\n" ] }, { "cell_type": "code", "execution_count": 12, "id": "0d06bb9e-ed6e-4f1b-ac2e-65cf219eeb12", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Yes, Parkinson's disease is associated with the PINK1 gene. This association is reported in the DISEASES database - https://diseases.jensenlab.org.\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = True\n", "EDGE_EVIDENCE_FLAG = True \n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, evidence_flag=EDGE_EVIDENCE_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "b17ed61e-9de1-4e2b-ad04-5c41b1167e5b", "metadata": {}, "source": [ "## Question 3- perturbed (entities in smaller case)" ] }, { "cell_type": "code", "execution_count": 13, "id": "ae0cccdb-8689-46b0-a1cb-808ec8f997e0", "metadata": {}, "outputs": [], "source": [ "question = \"Is parkinson's disease associated with pink1 gene?\"\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "1961ca49-f48a-46e3-bfc3-ced02fb1a2c2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Yes, Parkinson's disease is associated with the PINK1 gene. This association is reported in the DISEASES database - https://diseases.jensenlab.org.\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = True\n", "EDGE_EVIDENCE_FLAG = True \n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, evidence_flag=EDGE_EVIDENCE_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "b7f6ee15-2e3a-4dea-bfa6-35401b6b67da", "metadata": {}, "source": [ "## Question 4:" ] }, { "cell_type": "code", "execution_count": 15, "id": "f5068a03-143d-4c29-87bf-bba1082408bd", "metadata": {}, "outputs": [], "source": [ "question = \"What are some protein markers associated with thoracic aortic aneurysm?\"\n" ] }, { "cell_type": "markdown", "id": "056a4de0-c0fb-48ee-93e3-7742584362c9", "metadata": {}, "source": [ "### With KG-RAG" ] }, { "cell_type": "code", "execution_count": 16, "id": "6edd6bc4-e03b-473e-996e-6705f83ee2fa", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The protein markers associated with thoracic aortic aneurysm include Chondroitin sulfate proteoglycan 4 (CSPG4), Matrix Gla protein (MGP), Interleukin-2 receptor subunit alpha (IL2RA), Interleukin-1 beta (IL1B), Myosin-10 (MYH10), Tropomyosin alpha-4 chain (TPM4), Tyrosine-protein kinase Mer (MERTK), and Stabilin-1 (STAB1). The provenance of these associations is the Cell Taxonomy database.\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = True\n", "EDGE_EVIDENCE_FLAG = True \n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, evidence_flag=EDGE_EVIDENCE_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "e95cd283-ebf1-4394-bf40-e62fc96505eb", "metadata": {}, "source": [ "### Without KG-RAG" ] }, { "cell_type": "code", "execution_count": 17, "id": "f4c9d96b-c78f-44d4-9a46-1254f4856324", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Some protein markers associated with thoracic aortic aneurysm include Matrix metalloproteinases (MMPs), C-reactive protein (CRP), and fibrillin-1.\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = False\n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "84ecf6b4-5732-453a-94c3-a524c4572b81", "metadata": {}, "source": [ "## Question 5:" ] }, { "cell_type": "code", "execution_count": 18, "id": "0a049b56-cc9c-4777-8351-265839145690", "metadata": {}, "outputs": [], "source": [ "question = \"Are there any protein markers that show increased activity in adenocarcinoma?\"\n" ] }, { "cell_type": "markdown", "id": "a9a82988-b1e6-441b-bafa-4b148e4054ba", "metadata": {}, "source": [ "### With KG-RAG" ] }, { "cell_type": "code", "execution_count": 19, "id": "6cd1217e-b504-4006-a099-87bc5361d85f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Yes, there are several protein markers that show increased activity in adenocarcinoma. These include Keratin, type II cytoskeletal 7 (Cytokeratin-7) (CK-7) (Keratin-7) (K7) (Sarcolectin) (Type-II keratin Kb7), Anterior gradient protein 2 homolog (AG-2) (hAG-2) (HPC8) (Secreted cement gland protein XAG-2 homolog), Guanine deaminase (Guanase) (Guanine aminase) (3.5.4.3) (Guanine aminohydrolase) (GAH) (p51-nedasin), and Graves disease carrier protein (GDC) (Graves disease autoantigen) (GDA) (Mitochondrial solute carrier protein homolog) (Solute carrier family 25 member 16). The provenance of these associations is the Cell Taxonomy.\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = True\n", "EDGE_EVIDENCE_FLAG = True \n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, evidence_flag=EDGE_EVIDENCE_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "7bcae3c8-130c-4121-a119-9b9bb79a21ee", "metadata": {}, "source": [ "### Without KG-RAG" ] }, { "cell_type": "code", "execution_count": 20, "id": "96d8e70c-4258-4679-a5fa-1bc798eabd59", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Yes, certain protein markers like carcinoembryonic antigen (CEA), CA 19-9, and cytokeratins can show increased activity in adenocarcinoma.\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = False\n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "03697e1c-6868-4c56-8876-1333bc493f44", "metadata": {}, "source": [ "## Question 6:" ] }, { "cell_type": "code", "execution_count": 21, "id": "189ef273-bf3a-415f-8198-0e7fc4a1cac0", "metadata": {}, "outputs": [], "source": [ "question = \"Do you know if ruxolitinib is approved as a pharmacologic treatment for vitiligo?\"\n" ] }, { "cell_type": "markdown", "id": "fffd4c16-08fa-436f-b692-d3fe9eda305e", "metadata": {}, "source": [ "### With KG-RAG" ] }, { "cell_type": "code", "execution_count": 22, "id": "8e520c41-c5d3-4b38-a744-f954c106847a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Yes, Ruxolitinib is associated with the treatment of vitiligo. This association is supported by data from ChEMBL and DrugCentral databases. However, it is always recommended to seek guidance from a healthcare professional for treatment options. (Provenance: ChEMBL, DrugCentral)\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = True\n", "EDGE_EVIDENCE_FLAG = True \n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, evidence_flag=EDGE_EVIDENCE_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "6b1c1e0c-2a6a-48a0-99f1-4daabb2f1f61", "metadata": {}, "source": [ "### Without KG-RAG" ] }, { "cell_type": "code", "execution_count": 23, "id": "91e4eabc-2027-4bb6-b75f-0064d2a52369", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "As of my knowledge up to date, ruxolitinib is not officially approved for the treatment of vitiligo. However, some clinical trials have shown promising results. Always consult with a healthcare provider for treatment options.\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = False\n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "be6ca1c2-f118-41cc-93a7-5e8db134193f", "metadata": {}, "source": [ "## Question 7:" ] }, { "cell_type": "code", "execution_count": 24, "id": "36a704be-f9d6-4968-82ec-09c4d0efe3a2", "metadata": {}, "outputs": [], "source": [ "question = \"Are there any biomarkers that show increased profile in hydrocephalus?\"\n" ] }, { "cell_type": "markdown", "id": "cf579442-9b23-4160-be25-e4a9dd88999f", "metadata": {}, "source": [ "### With KG-RAG" ] }, { "cell_type": "code", "execution_count": 25, "id": "a9f242a8-4f31-4f58-926c-d48f3f95f584", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Yes, the Transmembrane protein 119 (TMEM119) and P2Y purinoceptor 12 (P2RY12) show an increased profile in hydrocephalus. The provenance of this information is the Cell Taxonomy database.\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = True\n", "EDGE_EVIDENCE_FLAG = True \n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, evidence_flag=EDGE_EVIDENCE_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "e0904315-0f47-4422-aa5c-063a3ccbc919", "metadata": {}, "source": [ "### Without KG-RAG" ] }, { "cell_type": "code", "execution_count": 26, "id": "1f1ab642-9385-4b04-83cf-6ace3eee6653", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Yes, certain biomarkers such as L1CAM, S100B, GFAP, and NSE have shown increased profiles in hydrocephalus.\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = False\n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "1fd3811b-546f-4ec3-8207-f9323e372744", "metadata": {}, "source": [ "## Question 8:" ] }, { "cell_type": "code", "execution_count": 27, "id": "2a1c9337-fd39-45b0-b12a-6de9b5971b9e", "metadata": {}, "outputs": [], "source": [ "\n", "question = 'Does drug dependence have any genetic factors? Do you have any statistical evidence from trustworthy sources for this?'\n" ] }, { "cell_type": "markdown", "id": "93f43325-fd99-4b43-a2b3-1314c26c7d8e", "metadata": {}, "source": [ "### With KG-RAG" ] }, { "cell_type": "code", "execution_count": 28, "id": "e6852cb3-8bf9-408b-ab65-492b75c690ed", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Yes, drug dependence does have genetic factors. The genes KAT2B and SLC25A16 have been associated with drug dependence. This information is backed by statistical evidence from the GWAS Catalog, with p-values of 4e-10 and 1e-09 respectively.\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = True\n", "EDGE_EVIDENCE_FLAG = True \n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, evidence_flag=EDGE_EVIDENCE_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "markdown", "id": "796144e2-338d-479c-ba3b-34b0c23a209b", "metadata": {}, "source": [ "### Without KG-RAG" ] }, { "cell_type": "code", "execution_count": 29, "id": "b8079bc6-d309-4c88-9440-376aa43d972e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Yes, drug dependence does have genetic factors. According to the National Institute on Drug Abuse, genetics account for about 40-60% of a person's vulnerability to drug addiction.\n", "\n" ] } ], "source": [ "KG_RAG_FLAG = False\n", "\n", "generate_response(question, LLM_TO_USE, KG_RAG_FLAG, temperature=TEMPERATURE)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "16e1ae6d-d0e0-4b42-a8fe-517033fb9960", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 5 }