summaryrefslogtreecommitdiff
path: root/notebooks/cypher_rag_example.ipynb
diff options
context:
space:
mode:
authormaszhongming <mingz5@illinois.edu>2025-09-16 15:15:29 -0500
committermaszhongming <mingz5@illinois.edu>2025-09-16 15:15:29 -0500
commit73c194f304f827b55081b15524479f82a1b7d94c (patch)
tree5e8660e421915420892c5eca18f1ad680f80a861 /notebooks/cypher_rag_example.ipynb
Initial commit
Diffstat (limited to 'notebooks/cypher_rag_example.ipynb')
-rw-r--r--notebooks/cypher_rag_example.ipynb271
1 files changed, 271 insertions, 0 deletions
diff --git a/notebooks/cypher_rag_example.ipynb b/notebooks/cypher_rag_example.ipynb
new file mode 100644
index 0000000..c844dd6
--- /dev/null
+++ b/notebooks/cypher_rag_example.ipynb
@@ -0,0 +1,271 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "677867a3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.chains import GraphCypherQAChain\n",
+ "from langchain.chat_models import ChatOpenAI\n",
+ "from langchain.graphs import Neo4jGraph\n",
+ "from langchain.callbacks import get_openai_callback\n",
+ "from dotenv import load_dotenv\n",
+ "import os\n",
+ "import openai\n",
+ "import pandas as pd\n",
+ "from neo4j.exceptions import CypherSyntaxError\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "186eb11d",
+ "metadata": {},
+ "source": [
+ "## Choose the LLM"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "fe18f3fc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "LLM_MODEL = 'gpt-4-32k'\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1a040044",
+ "metadata": {},
+ "source": [
+ "## Custom function for neo4j RAG chain"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "ead42cd3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_neo4j_cypher_rag_chain():\n",
+ " load_dotenv(os.path.join(os.path.expanduser('~'), '.spoke_neo4j_config.env'))\n",
+ " username = os.environ.get('NEO4J_USER')\n",
+ " password = os.environ.get('NEO4J_PSW')\n",
+ " url = os.environ.get('NEO4J_URI')\n",
+ " database = os.environ.get('NEO4J_DB')\n",
+ "\n",
+ " graph = Neo4jGraph(\n",
+ " url=url, \n",
+ " username=username, \n",
+ " password=password,\n",
+ " database = database\n",
+ " )\n",
+ "\n",
+ " load_dotenv(os.path.join(os.path.expanduser('~'), '.gpt_config.env'))\n",
+ " API_KEY = os.environ.get('API_KEY')\n",
+ " API_VERSION = os.environ.get('API_VERSION')\n",
+ " RESOURCE_ENDPOINT = os.environ.get('RESOURCE_ENDPOINT')\n",
+ " openai.api_type = \"azure\"\n",
+ " openai.api_key = API_KEY\n",
+ " openai.api_base = RESOURCE_ENDPOINT\n",
+ " openai.api_version = API_VERSION\n",
+ " chat_deployment_id = LLM_MODEL\n",
+ " chat_model_id = chat_deployment_id\n",
+ " temperature = 0\n",
+ " chat_model = ChatOpenAI(openai_api_key=API_KEY, \n",
+ " engine=chat_deployment_id, \n",
+ " temperature=temperature)\n",
+ " chain = GraphCypherQAChain.from_llm(\n",
+ " chat_model, \n",
+ " graph=graph, \n",
+ " verbose=True, \n",
+ " validate_cypher=True,\n",
+ " return_intermediate_steps=True\n",
+ " )\n",
+ " return chain"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cc863aed",
+ "metadata": {},
+ "source": [
+ "## Initiate neo4j RAG chain"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "9866fd11",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "WARNING! engine is not default parameter.\n",
+ " engine was transferred to model_kwargs.\n",
+ " Please confirm that engine is what you intended.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 17.5 ms, sys: 3.65 ms, total: 21.2 ms\n",
+ "Wall time: 28.1 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "neo4j_rag_chain = get_neo4j_cypher_rag_chain()\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3827d5c4",
+ "metadata": {},
+ "source": [
+ "## Enter question -1 "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "7b9837c5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "question = \"Is Parkinson's disease associated with PINK1 gene?\"\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "23e669ed",
+ "metadata": {},
+ "source": [
+ "## Run cypher RAG"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "9f685b8e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
+ "Generated Cypher:\n",
+ "\u001b[32;1m\u001b[1;3mMATCH (d:Disease {name: \"Parkinson's disease\"}), (g:Gene {name: \"PINK1\"}) \n",
+ "RETURN EXISTS((d)-[:ASSOCIATES_DaG]->(g)) AS is_associated\u001b[0m\n",
+ "Full Context:\n",
+ "\u001b[32;1m\u001b[1;3m[{'is_associated': True}]\u001b[0m\n",
+ "\n",
+ "\u001b[1m> Finished chain.\u001b[0m\n",
+ "Yes, Parkinson's disease is associated with the PINK1 gene.\n"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "out = neo4j_rag_chain.run(query=question, return_final_only=True, verbose=True)\n",
+ "print(out)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "069bc2f2",
+ "metadata": {},
+ "source": [
+ "## Question 1 perturbed (all smallcase)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "4cf3363b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "question = \"Is parkinson's disease associated with pink1 gene?\"\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "487fd17f",
+ "metadata": {},
+ "source": [
+ "## Run cypher RAG"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "61d5eb0b",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
+ "Generated Cypher:\n",
+ "\u001b[32;1m\u001b[1;3mMATCH (d:Disease {name: \"Parkinson's Disease\"}), (g:Gene {name: \"PINK1\"}) \n",
+ "RETURN EXISTS((d)-[:ASSOCIATES_DaG]->(g)) AS is_associated\u001b[0m\n",
+ "Full Context:\n",
+ "\u001b[32;1m\u001b[1;3m[]\u001b[0m\n",
+ "\n",
+ "\u001b[1m> Finished chain.\u001b[0m\n",
+ "I'm sorry, but I don't have the information to answer that question.\n"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "out = neo4j_rag_chain.run(query=question, return_final_only=True, verbose=True)\n",
+ "print(out)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6123c1a7",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}