From 73c194f304f827b55081b15524479f82a1b7d94c Mon Sep 17 00:00:00 2001 From: maszhongming Date: Tue, 16 Sep 2025 15:15:29 -0500 Subject: Initial commit --- notebooks/cypher_rag_example.ipynb | 271 +++++++++++++++++++++++++++++++++++++ 1 file changed, 271 insertions(+) create mode 100644 notebooks/cypher_rag_example.ipynb (limited to 'notebooks/cypher_rag_example.ipynb') diff --git a/notebooks/cypher_rag_example.ipynb b/notebooks/cypher_rag_example.ipynb new file mode 100644 index 0000000..c844dd6 --- /dev/null +++ b/notebooks/cypher_rag_example.ipynb @@ -0,0 +1,271 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "677867a3", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import GraphCypherQAChain\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.graphs import Neo4jGraph\n", + "from langchain.callbacks import get_openai_callback\n", + "from dotenv import load_dotenv\n", + "import os\n", + "import openai\n", + "import pandas as pd\n", + "from neo4j.exceptions import CypherSyntaxError\n" + ] + }, + { + "cell_type": "markdown", + "id": "186eb11d", + "metadata": {}, + "source": [ + "## Choose the LLM" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "fe18f3fc", + "metadata": {}, + "outputs": [], + "source": [ + "LLM_MODEL = 'gpt-4-32k'\n" + ] + }, + { + "cell_type": "markdown", + "id": "1a040044", + "metadata": {}, + "source": [ + "## Custom function for neo4j RAG chain" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ead42cd3", + "metadata": {}, + "outputs": [], + "source": [ + "def get_neo4j_cypher_rag_chain():\n", + " load_dotenv(os.path.join(os.path.expanduser('~'), '.spoke_neo4j_config.env'))\n", + " username = os.environ.get('NEO4J_USER')\n", + " password = os.environ.get('NEO4J_PSW')\n", + " url = os.environ.get('NEO4J_URI')\n", + " database = os.environ.get('NEO4J_DB')\n", + "\n", + " graph = Neo4jGraph(\n", + " url=url, \n", + " username=username, \n", + " password=password,\n", + " database = database\n", + " )\n", + "\n", + " load_dotenv(os.path.join(os.path.expanduser('~'), '.gpt_config.env'))\n", + " API_KEY = os.environ.get('API_KEY')\n", + " API_VERSION = os.environ.get('API_VERSION')\n", + " RESOURCE_ENDPOINT = os.environ.get('RESOURCE_ENDPOINT')\n", + " openai.api_type = \"azure\"\n", + " openai.api_key = API_KEY\n", + " openai.api_base = RESOURCE_ENDPOINT\n", + " openai.api_version = API_VERSION\n", + " chat_deployment_id = LLM_MODEL\n", + " chat_model_id = chat_deployment_id\n", + " temperature = 0\n", + " chat_model = ChatOpenAI(openai_api_key=API_KEY, \n", + " engine=chat_deployment_id, \n", + " temperature=temperature)\n", + " chain = GraphCypherQAChain.from_llm(\n", + " chat_model, \n", + " graph=graph, \n", + " verbose=True, \n", + " validate_cypher=True,\n", + " return_intermediate_steps=True\n", + " )\n", + " return chain" + ] + }, + { + "cell_type": "markdown", + "id": "cc863aed", + "metadata": {}, + "source": [ + "## Initiate neo4j RAG chain" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "9866fd11", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING! engine is not default parameter.\n", + " engine was transferred to model_kwargs.\n", + " Please confirm that engine is what you intended.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 17.5 ms, sys: 3.65 ms, total: 21.2 ms\n", + "Wall time: 28.1 s\n" + ] + } + ], + "source": [ + "%%time\n", + "neo4j_rag_chain = get_neo4j_cypher_rag_chain()\n" + ] + }, + { + "cell_type": "markdown", + "id": "3827d5c4", + "metadata": {}, + "source": [ + "## Enter question -1 " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7b9837c5", + "metadata": {}, + "outputs": [], + "source": [ + "question = \"Is Parkinson's disease associated with PINK1 gene?\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "23e669ed", + "metadata": {}, + "source": [ + "## Run cypher RAG" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "9f685b8e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n", + "Generated Cypher:\n", + "\u001b[32;1m\u001b[1;3mMATCH (d:Disease {name: \"Parkinson's disease\"}), (g:Gene {name: \"PINK1\"}) \n", + "RETURN EXISTS((d)-[:ASSOCIATES_DaG]->(g)) AS is_associated\u001b[0m\n", + "Full Context:\n", + "\u001b[32;1m\u001b[1;3m[{'is_associated': True}]\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "Yes, Parkinson's disease is associated with the PINK1 gene.\n" + ] + } + ], + "source": [ + "\n", + "out = neo4j_rag_chain.run(query=question, return_final_only=True, verbose=True)\n", + "print(out)\n" + ] + }, + { + "cell_type": "markdown", + "id": "069bc2f2", + "metadata": {}, + "source": [ + "## Question 1 perturbed (all smallcase)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "4cf3363b", + "metadata": {}, + "outputs": [], + "source": [ + "question = \"Is parkinson's disease associated with pink1 gene?\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "487fd17f", + "metadata": {}, + "source": [ + "## Run cypher RAG" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "61d5eb0b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n", + "Generated Cypher:\n", + "\u001b[32;1m\u001b[1;3mMATCH (d:Disease {name: \"Parkinson's Disease\"}), (g:Gene {name: \"PINK1\"}) \n", + "RETURN EXISTS((d)-[:ASSOCIATES_DaG]->(g)) AS is_associated\u001b[0m\n", + "Full Context:\n", + "\u001b[32;1m\u001b[1;3m[]\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "I'm sorry, but I don't have the information to answer that question.\n" + ] + } + ], + "source": [ + "\n", + "out = neo4j_rag_chain.run(query=question, return_final_only=True, verbose=True)\n", + "print(out)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6123c1a7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- cgit v1.2.3