diff options
Diffstat (limited to 'notebooks/rag_comparison.ipynb')
| -rw-r--r-- | notebooks/rag_comparison.ipynb | 382 |
1 files changed, 382 insertions, 0 deletions
diff --git a/notebooks/rag_comparison.ipynb b/notebooks/rag_comparison.ipynb new file mode 100644 index 0000000..035dd65 --- /dev/null +++ b/notebooks/rag_comparison.ipynb @@ -0,0 +1,382 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "id": "d514b0e6", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import os\n", + "\n", + "from IPython.display import clear_output\n" + ] + }, + { + "cell_type": "markdown", + "id": "349f3171", + "metadata": {}, + "source": [ + "## Load RAG output files" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "109057cd", + "metadata": {}, + "outputs": [], + "source": [ + "neo4j_rag = pd.read_csv('../data/results/cypher_rag_output.csv')\n", + "kg_rag = pd.read_csv('../data/results/kg_rag_output.csv')\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "38565176", + "metadata": {}, + "source": [ + "## Token usage comparison" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "12e415b1", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWsAAAESCAYAAAA7a/RxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA4X0lEQVR4nO3deVxU1fsH8M/IMizDIiirxCIE4o4bqEkmikuiaWV8MXdTQkXURH+llhuKL1PLNEvETDNNbdFy4YviFkouuIO7uICkyAAiIMzz+4MvV6+gcmEQLjzv1+u+ZM45985zmeGZ65lzz1EQEYExxliNVq+6A2CMMfZynKwZY0wGOFkzxpgMcLJmjDEZ4GTNGGMywMmaMcZkgJM1Y4zJACfrciAiZGVlgYekM8aqCyfrcsjOzoaZmRmys7OrOxTGWB3FyZoxxmSAkzVjjMkAJ2vGGJMBTtaMMSYDnKwZY0wGOFkzxpgMcLJmjDEZqNZkXVRUhBkzZsDZ2RmGhoZo3Lgx5syZI7r5hIgwc+ZM2NrawtDQEH5+frh06ZLoOBkZGQgKCoKpqSnMzc0xcuRI5OTkiNqcPn0ab7zxBgwMDODg4IDIyMhXco6MMaYVVI3mzZtHlpaWtGPHDrp27Rr98ssvpFKpaNmyZUKbBQsWkJmZGf3222906tQpCggIIGdnZ3r06JHQpmfPntSyZUs6cuQIHTx4kFxdXSkwMFCoV6vVZG1tTUFBQXT27FnauHEjGRoa0qpVq8oVp1qtJgCkVqu1d/KMMSaBgqj67qF+++23YW1tjaioKKFs4MCBMDQ0xPr160FEsLOzw+TJkzFlyhQAgFqthrW1NdauXYsPPvgAFy5cgKenJ/755x+0bdsWALBr1y707t0bt27dgp2dHVauXIlPP/0UaWlp0NfXBwBMmzYNv/32G5KSkl4aZ1ZWFszMzKBWq2FqaloFv4lXKzU1FampqZL3s7W1ha2tbRVExBh7qer8pJg3bx45OjpScnIyERElJiaSlZUVrV+/noiIrly5QgDo5MmTov26dOlCEyZMICKiqKgoMjc3F9U/fvyYdHR0aNu2bURE9OGHH1K/fv1Ebfbu3UsAKCMjo1RceXl5pFarhe3mzZvSrqyBGr1NAggV2CbVgNhfuDFWi+m++o+HJ6ZNm4asrCx4eHhAR0cHRUVFmDdvHoKCggAAaWlpAABra2vRftbW1kJdWloarKysRPW6urqwsLAQtXF2di51jJK6+vXri+oiIiLwxRdfaOksGWOs8qo1WW/evBkbNmzATz/9hKZNmyIxMRETJ06EnZ0dhg4dWm1xTZ8+HZMmTRIeZ2VlwcHBodri0bYpAIIqsB93gDBWfao1WX/yySeYNm0aPvjgAwBA8+bNcePGDURERGDo0KGwsbEBANy9e1fUV3r37l20atUKAGBjY4P09HTRcQsLC5GRkSHsb2Njg7t374ralDwuafM0pVIJpVKpnZOsgWzBiZcxuanWoXu5ubmoV08cgo6ODjQaDQDA2dkZNjY2iI2NFeqzsrJw9OhR+Pj4AAB8fHyQmZmJ48ePC2327t0LjUaDDh06CG0OHDiAx48fC21iYmLg7u5eqguEMcZqpOrsMB86dCjZ29sLQ/e2bdtGDRo0oKlTpwptFixYQObm5vT777/T6dOnqV+/fmUO3WvdujUdPXqUDh06RG5ubqKhe5mZmWRtbU0ffvghnT17ln7++WcyMjKquqF71f1FW13dGKvFqvUdnpWVRaGhofTaa6+RgYEBubi40Keffkr5+flCG41GQzNmzCBra2tSKpXUrVs3YfRIifv371NgYCCpVCoyNTWl4cOHU3Z2tqjNqVOnqHPnzqRUKsne3p4WLFhQ7jg5WctkY6wWq9Zx1nIheZy1QlH1QbHS+K3MajGeG4QxxmSAkzVjjMkAJ2vGGJMBTtaMMSYDnKwZY0wGOFkzxpgMcLJmjDEZ4GTNGGMywMmaMcZkgJM1Y4zJACdrxhiTAU7WjDEmA5ysGWNMBjhZM8aYDHCyZowxGajWNRgZq8uKiopES82xukdPTw86OjrlasvJmrFXjIiQlpaGzMzM6g6F1QDm5uawsbGB4iWLlnCyZuwVK0nUVlZWMDIyeukfKaudiAi5ublIT08HANja2r6wPSdrxl6hoqIiIVFbWlpWdzismhkaGgIA0tPTYWVl9cIuEf6CkbFXqKSP2sjIqJojYTVFyXvhZd9fcLJmrBpw1wcrUd73AidrxhiTAU7WjLEaxcnJCUuXLq3uMGocTtaM1QQKxavdKigtLQ3jx4+Hi4sLlEolHBwc0LdvX8TGxmrxl/HqvPnmm1AoFFAoFDAwMMDrr7+OiIgIEFGptvHx8dDR0UGfPn3KPFZBQQEWLVoELy8vGBsbw8zMDC1btsRnn32GO3fuVDpWTtaMsXK5fv062rRpg71792LRokU4c+YMdu3aha5duyIkJKS6w3uhgoKC59aNHj0aqampSE5OxvTp0zFz5kx8++23pdpFRUVh/PjxOHDgQKnkm5+fj+7du2P+/PkYNmwYDhw4gDNnzuCrr77CvXv38PXXX1f+JKgSHj16VJndZUOtVhMAUqvV5dsB4K06Nhl49OgRnT9/vvTfjgx+V7169SJ7e3vKyckpVffgwQMaPnw49enTR1ReUFBADRs2pNWrVxMRka+vL4WEhFBISAiZmpqSpaUlffbZZ6TRaIR9HB0dad68eTR8+HBSqVTk4OBAq1atEh03JSWF3nvvPTIzM6P69etTQEAAXbt2TagfOnQo9evXj+bOnUu2trbk5ORU5jn5+vpSaGioqMzLy4veeecdUVl2djapVCpKSkqiQYMG0bx580T1ERERVK9ePTpx4kSZz/P0+T3rue+JZ0i+stZoNJgzZw7s7e2hUqlw9epVAMCMGTMQFRVV+U8PxliNk5GRgV27diEkJATGxsal6s3NzTFq1Cjs2rULqampQvmOHTuQm5uLQYMGCWU//PADdHV1kZCQgGXLluHLL7/E6tWrRcdbvHgx2rZti5MnT+Ljjz9GcHAwkpOTARQPcfP394eJiQkOHjyIw4cPQ6VSoWfPnqIr6NjYWCQnJyMmJgY7dux46TkSEQ4ePIikpCTo6+uL6jZv3gwPDw+4u7tj8ODBWLNmjairZOPGjejevTtat25d5rG1Mvrnham8DF988QW5uLjQ+vXrydDQkK5cuUJERD///DN5e3tLPZws8JW1TDYZkOuV9dGjRwkAbdu27YXtPD09aeHChcLjvn370rBhw4THvr6+1KRJE9GVZnh4ODVp0kR47OjoSIMHDxYeazQasrKyopUrVxIR0Y8//kju7u6iY+Tn55OhoSHt3r2biIqvrK2trSk/P/+F8fr6+pKenh4ZGxuTnp4eASADAwM6fPiwqF3Hjh1p6dKlRET0+PFjatCgAe3bt0+oNzAwoAkTJoj26d+/PxkbG5OxsTH5+Pg8N4Yqu7Jet24dvvvuOwQFBYnutmnZsiWSkpIq/+nBGKtx6KmryBcZNWoUoqOjAQB3797Fzp07MWLECFEbb29v0ZWmj48PLl26hKKiIqGsRYsWws8KhQI2NjbCbdmnTp3C5cuXYWJiApVKBZVKBQsLC+Tl5eHKlSvCfs2bNxeukDds2CC0ValUOHjwoNAuKCgIiYmJOHz4MHr16oVPP/0UHTt2FOqTk5ORkJCAwMBAAICuri4GDRr00p6EFStWIDExESNGjEBubm65fn8vIvl289u3b8PV1bVUuUaj4RnEGKul3NzcoFAoXnpBNmTIEEybNg3x8fH4+++/4ezsjDfeeEPy8+np6YkeKxQKaDQaAEBOTg7atGmDDRs2lNqvYcOGws9Pd9cEBASgQ4cOwmN7e3vhZzMzMyGnbd68Ga6urvD29oafnx+A4i8WCwsLYWdnJ+xDRFAqlVi+fDnMzMzg5uYmdNOUKJnrw8LCQtrJP4fkK2tPT0/Rp1KJLVu2PLe/hjEmbxYWFvD398c333yDhw8flqovmUHQ0tIS/fv3R3R0NNauXYvhw4eXanv06FHR4yNHjsDNza3cU4V6eXnh0qVLsLKygqurq2gzMzMrcx8TExNRu5I5OZ6lUqkQGhqKKVOmgIhQWFiIdevWYfHixUhMTBS2U6dOwc7ODhs3bgQABAYGIiYmBidPnizXOVTICztJyvDbb7+RmZkZLViwgIyMjGjRokU0atQo0tfXpz179kg9nCxwn7VMNhmQa581EdGVK1fIxsaGPD09acuWLXTx4kU6f/48LVu2jDw8PIR2e/bsIX19fdLR0aHbt2+LjuHr60sqlYrCwsIoKSmJfvrpJzI2NqZvv/1WaOPo6EhLliwR7deyZUuaNWsWERE9fPiQ3Nzc6M0336QDBw7Q1atXad++fTR+/Hi6efMmET0ZDfIyZY0GuX//PhkaGtIvv/xCv/76K+nr61NmZmapfadOnUpt27YlouLXtVOnTlS/fn1aunQpHT9+nK5evUq7du2i9u3bk5eX13NjKG+fteRukH79+mH79u2YPXs2jI2NMXPmTHh5eWH79u3o3r17FXycMMZqAhcXF5w4cQLz5s3D5MmTkZqaioYNG6JNmzZYuXKl0M7Pzw+2trZo2rSpqOugxJAhQ/Do0SO0b98eOjo6CA0NxUcffVTuOIyMjHDgwAGEh4djwIAByM7Ohr29Pbp16wZTU9NKn6eFhQWGDBmCzz//HM7OzvDz8yvzin3gwIGIjIzE6dOn0aJFC8TGxmLp0qWIjo7G9OnTodFo4OzsjF69eiEsLKzScSmIyvnNQR2WlZUFMzMzqNXq8r0ZeJKe6iGDt3JeXh6uXbsGZ2dnGBgYVHc4VSInJwf29vaIjo7GgAEDRHVvvvkmWrVqxbeTP6W87wmez5oxphUajQb37t3D4sWLYW5ujoCAgOoOqVaRnKzr169f5gDvknvrXV1dMWzYsDK/WGCM1V4pKSlwdnZGo0aNsHbtWujq8rWgNkn+bc6cORPz5s1Dr1690L59ewBAQkKCcHfTtWvXEBwcjMLCQowePVrrATPGaiYnJ6eXjseOi4t7NcHUQpKT9aFDhzB37lyMHTtWVL5q1Srs2bMHW7duRYsWLfDVV19xsmaMMS2RPM569+7dwmDxp3Xr1g27d+8GAPTu3VuYM4QxxljlSU7WFhYW2L59e6ny7du3C3fqPHz4ECYmJpWPjjHGGIAKdIPMmDEDwcHB2Ldvn9Bn/c8//+Cvv/4S5oCNiYmBr6+vdiNljLE6THKyHj16NDw9PbF8+XJs27YNAODu7o79+/cLk59MnjxZu1EyxlgdV6GxNZ06dUKnTp20HQtjjLHnqNRAyLy8vFLL5Wjjdk/GGGNikr9gzM3Nxbhx42BlZQVjY2PUr19ftEl1+/ZtDB48GJaWljA0NETz5s1x7NgxoZ6IMHPmTNja2sLQ0BB+fn64dOmS6BgZGRkICgqCqakpzM3NMXLkSOTk5IjanD59Gm+88QYMDAzg4OCAyMhIybEyVlUUXyhe6VYRw4YNQ//+/UVlW7ZsgYGBARYvXgygeEHd0NBQuLq6wsDAANbW1ujUqRNWrlz50jmdSxauVSgUMDU1Rbt27fD777+X2TYiIgI6OjpYtGhRmfWViaOmkpysP/nkE+zduxcrV66EUqnE6tWr8cUXX8DOzg7r1q2TdKwHDx6gU6dO0NPTw86dO3H+/HksXrxYlPQjIyPx1Vdf4dtvv8XRo0dhbGwMf39/5OXlCW2CgoJw7tw5YfmeAwcOiCaGycrKQo8ePeDo6Ijjx49j0aJF+Pzzz/Hdd99JPX3G2P+sXr0aQUFBWLlyJSZPnoyrV6+idevW2LNnD+bPn4+TJ08iPj4eU6dOxY4dO/Df//73pceMjo5Gamoqjh07hk6dOuHdd9/FmTNnSrVbs2YNpk6dijVr1pSq00YcNdJL5xB8hoODg7CcjYmJCV26dImIiNatW0e9evWSdKzw8HDq3Lnzc+s1Gg3Z2NjQokWLhLLMzExSKpW0ceNGIiI6f/48AaB//vlHaLNz505SKBTC9IwrVqyg+vXri5b4CQ8PJ3d39zKfNy8vj9RqtbDdvHmTwFOk1vxNBp43HSY+xyvdKuLpaUcXLlxIBgYGomW+/P39qVGjRmUuqEv04kVjiYgA0K+//io8zsrKIgC0bNkyUbu4uDiyt7engoICsrOzK7UEV2XjeNWqbFmvjIwMuLi4ACjun87IyAAAdO7cGQcOHJB0rD/++ANt27bFe++9BysrK7Ru3Rrff/+9UH/t2jWkpaWJbsIxMzNDhw4dEB8fDwCIj4+Hubk52rZtK7Tx8/NDvXr1hEnO4+Pj0aVLF9EimP7+/khOTsaDBw9KxRUREQEzMzNhc3BwkHRejNVm4eHhmDNnDnbs2IF33nkHAHD//n3s2bPnuQvqAtIWjS0sLBSWzXp28dqoqCgEBgZCT08PgYGBouW1tB1HTSI5Wbu4uODatWsAAA8PD2zevBlA8U0x5ubmko519epVrFy5Em5ubti9ezeCg4MxYcIE/PDDDwCK+50AwNraWrSftbW1UJeWlgYrKytRva6uLiwsLERtyjrG08/xtOnTp0OtVgvbzZs3JZ0XY7XVzp07ERkZid9//x3dunUTyi9fvgwigru7u6h9gwYNhHUPw8PDX3r8wMBAqFQqKJVKhIWFwcnJCe+//75Qn5WVhS1btmDw4MEAgMGDB2Pz5s3Cd1TaiqMmkpyshw8fjlOnTgEApk2bhm+++QYGBgYICwvDJ598IulYGo0GXl5emD9/Plq3bo2PPvoIo0ePFm6uqS5KpRKmpqaijTFWvJCtk5MTZs2aVepL/LIkJCQgMTERTZs2RX5+PgBg7NixosVrn7ZkyRIkJiZi586d8PT0xOrVq0VrGG7cuBGNGzdGy5YtAQCtWrWCo6MjNm3aJDkOuZE8dO/pFQ/8/PyQlJSE48ePw9XVVbQicXnY2trC09NTVNakSRNs3boVAGBjYwOgeJXkksUnSx63atVKaFOy6nGJwsJCZGRkCPvb2Njg7t27ojYlj0vaMMZezt7eHlu2bEHXrl3Rs2dP7Ny5U1jfUKFQlFo0tqTL9Ok1D2fPno0pU6aUeXwbGxthncTo6Gj07t0b58+fF/73HBUVhXPnzommX9VoNFizZg1GjhwpKQ65kXxl/SxHR0e89dZbkhM1UHxzzbO/1IsXL8LR0REA4OzsDBsbG8TGxgr1WVlZOHr0KHx8fAAUL2OfmZmJ48ePC2327t0LjUYjrGbs4+ODAwcOiFZfj4mJgbu7e4WGGzJWlzk6OmL//v1IS0tDz549kZ2dDUtLS3Tv3h3Lly8vc0Hdpz270O3ztG/fHm3atMG8efMAAGfOnMGxY8cQFxcnWrw2Li4O8fHxSEpKkhSH3EhO1gsXLhT9l+P999+HpaUl7O3the6R8goLC8ORI0cwf/58XL58GT/99BO+++47hISEACj+ImDixImYO3cu/vjjD5w5cwZDhgyBnZ2dMN6zSZMm6NmzJ0aPHo2EhAQcPnwY48aNwwcffCCs//af//wH+vr6GDlyJM6dO4dNmzZh2bJlmDRpktTTZ4wBcHBwQFxcHNLT0+Hv74+srCysWLEChYWFaNu2LTZt2oQLFy4gOTkZ69evR1JSUrlXL3/axIkTsWrVKty+fRtRUVFo3749unTpgmbNmglbly5d0K5dO+GLxqqIo0aQOszEyclJGCqzZ88eMjc3p927d9PIkSOpe/fukoetbN++nZo1a0ZKpZI8PDzou+++E9VrNBqaMWMGWVtbk1KppG7dulFycrKozf379ykwMJBUKhWZmprS8OHDKTs7W9Tm1KlT1LlzZ1IqlWRvb08LFiwod4y8urlMNhmoLUP3Sty6dYvc3NzI29ub1Go13blzh8aNG0fOzs6kp6dHKpWK2rdvT4sWLaKHDx++8Ph4ZugeUfHfv4eHBwUHB5OlpSVFRkaWue/ChQvJysqKCgoKiIgqFcerVt6he5IXzDU0NMTFixfh4OCA0NBQ5OXlYdWqVbh48SI6dOhQ5lA4ueMFc2VC2lu5WtSFBXOZNOV9T0juBqlfv74wlG3Xrl3CGGgiQlFRUQXDZYwx9iKSR4MMGDAA//nPf+Dm5ob79++jV69eAICTJ0++8MsCxhhjFSc5WS9ZsgROTk64efMmIiMjhXGSqamp+Pjjj7UeIGOMMUByn3VdxH3WMiGDtzL3WbNnlfc9IfnK+mUz6w0ZMkTqIRmrc/gaiZUo73tBcrIODQ0VPX78+DFyc3Ohr68PIyMjTtaMvYCenh6A4nnh5Xw3HdOekvm1S94bzyM5WZc1NO/SpUsIDg6WPDcIY3WNjo4OzM3NhSkSjIyMZDsLHKscIkJubi7S09Nhbm7+0pt1tNZnfezYMQwePBhJSUnaOFyNwn3WMiGTrgUiQlpaGjIzM6s7FFYDmJubw8bG5qUf2pVag1F0IF1d3LlzR1uHY6zWUigUsLW1hZWVlWi+Glb36Onplfv2d8nJ+o8//hA9JiKkpqZi+fLlvOI5YxLo6OjId54K9spJTtbPLpipUCjQsGFDvPXWW8KimYwxxrRLcrLWaDRVEQdjjLEXqPR81owxxqoeJ2vGGJMBTtaMMSYDnKwZY0wGOFkzxpgMVOimmMzMTCQkJCA9Pb3U6BCeG4QxxrRP8u3m27dvR1BQEHJycmBqaiq6RVKhUCAjI0PrQVY3vt1cJmRyuzljFSE5Wb/++uvo3bs35s+fDyMjo6qKq0bhZC0TnKxZLSa5z/r27duYMGFCnUnUjDFWE0hO1v7+/jh27FhVxMIYY+w5JH/B2KdPH3zyySc4f/48mjdvXmrC7ICAAK0FxxhjrJjkPut69Z5/Ma5QKFBUVFTpoGoa7rOWCe6zZrUYT+TEGGMyUKmbYvLy8rQVB2OMsReQnKyLioowZ84c2NvbQ6VS4erVqwCAGTNmICoqSusBMsYYq0CynjdvHtauXYvIyEjo6+sL5c2aNcPq1au1GhxjjLFikpP1unXr8N133yEoKEi0JFHLli1r5WK5jDFWE1TophhXV9dS5RqNhhf/ZIyxKiI5WXt6euLgwYOlyrds2YLWrVtrJSjGGGNikofuzZw5E0OHDsXt27eh0Wiwbds2JCcnY926ddixY0dVxMgYY3We5Cvrfv36Yfv27fjvf/8LY2NjzJw5ExcuXMD27dvRvXv3qoiRMcbqPMl3MN66dQuNGjUqs+7IkSPw9vbWSmA1Cd/BKBN8ByOrxSRfWffo0aPMOasPHz6Mnj17aiUoxhhjYpKTtbe3N3r06IHs7Gyh7MCBA+jduzdmzZql1eAYY4wVk5ysV69ejddeew19+/ZFfn4+9u3bhz59+mD27NkICwurihgZY6zOk9xnDQAFBQXo06cPcnNzcfr0aURERGDcuHFVEV+NwH3WMsF91qwWK1eyPn36dKmy7OxsBAYGok+fPggODhbKW7Rood0IawBO1jLByZrVYuVK1vXq1YNCocDTTZ9+XPIzz2f9P5ysqwcna1aLleummGvXrlV1HIwxxl6gXMna0dGxquNgjDH2ApJvNweAK1euYOnSpbhw4QKA4vlCQkND0bhxY60GxxhjrJjkoXu7d++Gp6cnEhIS0KJFC7Ro0QJHjx5F06ZNERMTUxUxMsYYI4latWpF4eHhpcrDw8OpdevWUg8niIiIIAAUGhoqlD169Ig+/vhjsrCwIGNjYxowYAClpaWJ9rtx4wb17t2bDA0NqWHDhjRlyhR6/PixqM2+ffuodevWpK+vT40bN6bo6GhJsanVagJAarW6fDsUf9XF26veGKvFJL/DlUolXbx4sVR5cnIyKZXKCgWRkJBATk5O1KJFC1GyHjt2LDk4OFBsbCwdO3aMvL29qWPHjkJ9YWEhNWvWjPz8/OjkyZP0119/UYMGDWj69OlCm6tXr5KRkRFNmjSJzp8/T19//TXp6OjQrl27yh0fJ2uZbIzVYpLf4Y0aNaLNmzeXKt+0aRM5ODhIDiA7O5vc3NwoJiaGfH19hWSdmZlJenp69MsvvwhtL1y4QAAoPj6eiIj++usvqlevnuhqe+XKlWRqakr5+flERDR16lRq2rSp6DkHDRpE/v7+z40pLy+P1Gq1sN28eZOTtRw2xmoxyV8wjh49Gh999BGuXr2Kjh07AiiexGnhwoWYNGmS5G6YkJAQ9OnTB35+fpg7d65Qfvz4cTx+/Bh+fn5CmYeHB1577TXEx8fD29sb8fHxaN68OaytrYU2/v7+CA4Oxrlz59C6dWvEx8eLjlHSZuLEic+NKSIiAl988YXkc2GsuqSmpiI1NVXyfra2trC1ta2CiJi2SU7WM2bMgImJCRYvXozp06cDAOzs7PD5559jwoQJko71888/48SJE/jnn39K1aWlpUFfXx/m5uaicmtra6SlpQltnk7UJfUldS9qk5WVhUePHsHQ0LDUc0+fPl30wZOVlQUHBwdJ58bYq7Rq1aoKXWDMmjULn3/+ufYDYlonOVkrFAqEhYUhLCxMmHnPxMRE8hPfvHkToaGhiImJgYGBgeT9q5JSqYRSqazuMBgrtzFjxiAgIEBU9ujRI3Tu3BkAcOjQoTIvTPiqWj4kJ+u33noL27Ztg7m5uShJZ2VloX///ti7d2+5jnP8+HGkp6fDy8tLKCsqKsKBAwewfPly7N69GwUFBcjMzBRdXd+9exc2NjYAABsbGyQkJIiOe/fuXaGu5N+SsqfbmJqalvnmZUyOyurOePjwofBzq1atYGxs/KrDYlokeZx1XFwcCgoKSpXn5eWVuZDu83Tr1g1nzpxBYmKisLVt2xZBQUHCz3p6eoiNjRX2SU5ORkpKCnx8fAAAPj4+OHPmDNLT04U2MTExMDU1haenp9Dm6WOUtCk5BmOMyUG5r6yfnnnv/PnzQp8wUHxFvGvXLtjb25f7iU1MTNCsWTNRmbGxMSwtLYXykSNHYtKkSbCwsICpqSnGjx8PHx8fYemwHj16wNPTEx9++CEiIyORlpaGzz77DCEhIUI3xtixY7F8+XJMnToVI0aMwN69e7F582b8+eef5Y6VMcaqXXmHjSgUCqpXrx7Vq1ePFApFqc3IyIiioqIqNTTl6aF7RE9uiqlfvz4ZGRnRO++8Q6mpqaJ9rl+/Tr169SJDQ0Nq0KABTZ48ucybYlq1akX6+vrk4uLCN8XU1o2J5OTkEAACQDk5OdUdDqukci8+cOPGDRARXFxckJCQgIYNGwp1+vr6sLKygo6OTtV8olQzniJVJsr3Vq4zHj58CJVKBQDIycnhPmuZK3c3SMnMexqNpsqCYYwxVrYKzbrHWF2j+EKG/1t6ahyAar4K0K++UCqKZvH/lkpIHg3CGGPs1eNkzRhjMsDJmjHGZKBCyTozMxOrV6/G9OnTkZGRAQA4ceIEbt++rdXgGGOMFZP8BePp06fh5+cHMzMzXL9+HaNHj4aFhQW2bduGlJQUrFu3ririZIyxOk3ylfWkSZMwbNgwXLp0STQBU+/evXHgwAGtBscYY6yY5Cvrf/75B6tWrSpVbm9vL7oFnTH2CmX/b3ta4VM/p6Hsv3aT/22sxpOcrJVKJbKyskqVX7x4UXRXI2PsFToGYP8L6tc8p9wXQFfth8O0T3KyDggIwOzZs7F582YAxfNbp6SkIDw8HAMHDtR6gIyxcmgLwL0C+/FVtWyUe26QEmq1Gu+++y6OHTuG7Oxs2NnZIS0tDT4+Pvjrr79q5fwDPDeITFTh3CCyvIOxFuA7GJ+QfGVtZmaGmJgYHDp0CKdPn0ZOTg68vLxKrXPIGGNMeyo8N0jnzp2FJYMYY4xVLcnJ+quvviqzXKFQwMDAAK6urujSpUutnS6VMcaqg+RkvWTJEvz777/Izc1F/fr1AQAPHjyAkZERVCoV0tPT4eLign379vGK4IwxpiWSb4qZP38+2rVrh0uXLuH+/fu4f/8+Ll68iA4dOmDZsmVISUmBjY0NwsLCqiJexhirkySPBmncuDG2bt2KVq1aicpPnjyJgQMH4urVq/j7778xcOBApKamajPWasOjQWSCR4PUOjwa5AnJV9apqakoLCwsVV5YWCjcwWhnZ4fs7Gdvp2KMMVZRkpN1165dMWbMGJw8eVIoO3nyJIKDg/HWW28BAM6cOQNnZ2ftRckYY3Wc5GQdFRUFCwsLtGnTBkqlEkqlEm3btoWFhQWioqIAACqVCosXL9Z6sIwxVldJ7rMukZSUhIsXLwIA3N3d4e5ekXtd5YH7rGWC+6xrHe6zfqLCN8V4eHjAw8NDm7Ewxhh7jgol61u3buGPP/5ASkoKCgoKRHVffvmlVgJjjDH2hORkHRsbi4CAALi4uCApKQnNmjXD9evXQUTw8vKqihgZY6zOk/wF4/Tp0zFlyhScOXMGBgYG2Lp1K27evAlfX1+89957VREjY4zVeZKT9YULFzBkyBAAgK6uLh49egSVSoXZs2dj4cKFWg+QMcZYBZK1sbGx0E9ta2uLK1euCHX37t3TXmSMMcYEkvusvb29cejQITRp0gS9e/fG5MmTcebMGWzbtg3e3t5VESNjjNV5kpP1l19+iZycHADAF198gZycHGzatAlubm48EoQxxqqIpGRdVFSEW7duoUWLFgCKu0S+/fbbKgmMMcbYE5L6rHV0dNCjRw88ePCgquJhjDFWBslfMDZr1gxXr16tilgYY4w9h+RkPXfuXEyZMgU7duxAamoqsrKyRBtjjDHtkzyRU716T/K74qkJi4gICoUCRUVF2ouuhuCJnGSCJ3KqdXgipyckjwbZt29fVcTBGGPsBSQna19f36qIgzHG2AtI7rMGgIMHD2Lw4MHo2LEjbt++DQD48ccfcejQIa0GxxhjrJjkZL1161b4+/vD0NAQJ06cQH5+PgBArVZj/vz5Wg+QMcZYBUeDfPvtt/j++++hp6cnlHfq1AknTpzQanCMMcaKSU7WycnJ6NKlS6lyMzMzZGZmaiMmxhhjz5CcrG1sbHD58uVS5YcOHYKLi4tWgmKMMSYmOVmPHj0aoaGhOHr0KBQKBe7cuYMNGzZgypQpCA4OrooYGWOszpM8dG/atGnQaDTo1q0bcnNz0aVLFyiVSkyZMgXjx4+vihgZY6zOk3xlrVAo8OmnnyIjIwNnz57FkSNH8O+//2LOnDmSnzwiIgLt2rWDiYkJrKys0L9/fyQnJ4va5OXlISQkBJaWllCpVBg4cCDu3r0rapOSkoI+ffrAyMgIVlZW+OSTT1BYWChqExcXBy8vLyiVSri6umLt2rWS42WMseoiOVmvX78eubm50NfXh6enJ9q3bw+VSlWhJ9+/fz9CQkJw5MgRxMTE4PHjx+jRowcePnwotAkLC8P27dvxyy+/YP/+/bhz5w4GDBgg1BcVFaFPnz4oKCjA33//jR9++AFr167FzJkzhTbXrl1Dnz590LVrVyQmJmLixIkYNWoUdu/eXaG4GWPsVZM8N0jDhg3x6NEjBAQEYPDgwfD394eOjo5Wgvn3339hZWWF/fv3o0uXLlCr1WjYsCF++uknvPvuuwCApKQkNGnSBPHx8fD29sbOnTvx9ttv486dO7C2tgYAfPvttwgPD8e///4LfX19hIeH488//8TZs2eF5/rggw+QmZmJXbt2lYojPz9fGD8OFM8N4uDgwHOD1HQ8N0itw3ODPCH5yjo1NRU///wzFAoF3n//fdja2iIkJAR///13pYNRq9UAAAsLCwDA8ePH8fjxY/j5+QltPDw88NprryE+Ph4AEB8fj+bNmwuJGgD8/f2RlZWFc+fOCW2ePkZJm5JjPCsiIgJmZmbC5uDgUOlzY4yxypCcrHV1dfH2229jw4YNSE9Px5IlS3D9+nV07doVjRs3rnAgGo0GEydORKdOndCsWTMAQFpaGvT19WFubi5qa21tjbS0NKHN04m6pL6k7kVtsrKy8OjRo1KxTJ8+HWq1Wthu3rxZ4fNijDFtkDwa5GlGRkbw9/fHgwcPcOPGDVy4cKHCxwoJCcHZs2drxPwiSqUSSqWyusNgjDFBhSZyys3NxYYNG9C7d2/Y29tj6dKleOedd4RuB6nGjRuHHTt2YN++fWjUqJFQbmNjg4KCglJ3Rt69exc2NjZCm2dHh5Q8flkbU1NTGBoaVihmxhh7lSQn6w8++ABWVlYICwuDi4sL4uLicPnyZcyZMwceHh6SjkVEGDduHH799Vfs3bsXzs7Oovo2bdpAT08PsbGxQllycjJSUlLg4+MDAPDx8cGZM2eQnp4utImJiYGpqSk8PT2FNk8fo6RNyTEYY6ymk9wNoqOjg82bN5c5CuTs2bNCf3N5hISE4KeffsLvv/8OExMToY/ZzMwMhoaGMDMzw8iRIzFp0iRYWFjA1NQU48ePh4+PD7y9vQEAPXr0gKenJz788ENERkYiLS0Nn332GUJCQoSujLFjx2L58uWYOnUqRowYgb1792Lz5s34888/pZ4+Y4xVC8lD956VnZ2NjRs3YvXq1Th+/LikZb0UzxniFh0djWHDhgEovilm8uTJ2LhxI/Lz8+Hv748VK1YIXRwAcOPGDQQHByMuLg7GxsYYOnQoFixYAF3dJ59FcXFxCAsLw/nz59GoUSPMmDFDeI6X4WW9ZIKH7tU6PHTviQon6wMHDiAqKgpbt26FnZ0dBgwYgIEDB6Jdu3bajrHacbKWCU7WtQ4n6yckdYOkpaVh7dq1iIqKQlZWFt5//33k5+fjt99+E/qHGWOMaV+5v2Ds27cv3N3dcfr0aSxduhR37tzB119/XZWxMcYY+59yX1nv3LkTEyZMQHBwMNzc3KoyJsYYY88o95X1oUOHkJ2djTZt2qBDhw5Yvnw57t27V5WxMcYY+59yJ2tvb298//33SE1NxZgxY/Dzzz/Dzs4OGo0GMTExyM7Orso4GWOsTqvU0L3k5GRERUXhxx9/RGZmJrp3744//vhDm/HVCDwaRCZ4NEitw6NBnqjQ7eYl3N3dERkZiVu3bmHjxo3aiokxxtgzKn1TTF3AV9YywVfWtQ5fWT9RqStrxhhjrwYna8YYkwFO1owxJgOcrBljTAY4WTPGmAxwsmaMMRngZM0YYzLAyZoxxmSAkzVjjMkAJ2vGGJMBTtaMMSYDnKwZY0wGOFkzxpgMcLJmjDEZ4GTNGGMywMmaMcZkgJM1Y4zJACdrxhiTAU7WjDEmA5ysGWNMBjhZM8aYDHCyZowxGeBkzRhjMsDJmjHGZICTNWOMyQAna8YYkwFO1owxJgOcrBljTAY4WTPGmAxwsmaMMRngZM0YYzLAyZoxxmSAkzVjjMkAJ2vGGJMBTtaMMSYDnKwZY0wG6lSy/uabb+Dk5AQDAwN06NABCQkJ1R0SY4yVS51J1ps2bcKkSZMwa9YsnDhxAi1btoS/vz/S09OrOzTGGHspBRFRdQfxKnTo0AHt2rXD8uXLAQAajQYODg4YP348pk2bJmqbn5+P/Px84bFarcZrr72GmzdvwtTU9OVPZmam1dhZOanVVXZoswh+TauDerq019TExAQKhaKKoqlmVAfk5+eTjo4O/frrr6LyIUOGUEBAQKn2s2bNIgC88cabzDa1Wv2Kssqrp4s64N69eygqKoK1tbWo3NraGklJSaXaT58+HZMmTRIeazQaZGRkwNLSsvZ+agPIysqCg4ND+f8HwWq8uvaampiYVHcIVaZOJGuplEollEqlqMzc3Lx6gqkGpqamdeIPuy7h11T+6sQXjA0aNICOjg7u3r0rKr979y5sbGyqKSrGGCu/OpGs9fX10aZNG8TGxgplGo0GsbGx8PHxqcbIGGOsfOpMN8ikSZMwdOhQtG3bFu3bt8fSpUvx8OFDDB8+vLpDqzGUSiVmzZpVqguIyRe/prVHnRm6BwDLly/HokWLkJaWhlatWuGrr75Chw4dqjssxhh7qTqVrBljTK7qRJ81Y4zJHSdrxhiTAU7WjDEmA5ysayknJycsXbq0usNgjGkJJ+tqkJaWhvHjx8PFxQVKpRIODg7o27evaBy4nLz55ptQKBRQKBQwMDDA66+/joiICJT13XV8fDx0dHTQp0+fMo9VUFCARYsWwcvLC8bGxjAzM0PLli3x2Wef4c6dO1V9Kq/csGHD0L9/f1HZli1bYGBggMWLFwMofr+EhobC1dUVBgYGsLa2RqdOnbBy5Urk5ua+8Pglr4tCoYCpqSnatWuH33//vcy2ERER0NHRwaJFi8qsr0wcTAuqdWaSOujatWtkZ2dHnp6etGXLFkpOTqazZ8/S4sWLyd3dXWvP4+joSEuWLNHa8YiKJ8Qqi6+vL40ePZpSU1Pp+vXrtGbNGtLV1aUVK1aUajty5EgKDQ0llUpFt2/fFtXl5eVRly5dyNzcnJYtW0bHjh2jGzduUFxcHI0ZM4amTZum1fOpCYYOHUr9+vUTHn///fekr69Pa9asISKiK1eukI2NDXl4eNCmTZvo/PnzdOXKFfrtt9+od+/e9Pvvv7/w+AAoOjqaUlNTKTk5mUJDQ0lXV5dOnz5dqq2rqytNmzaNPDw8StVVNg5WeZysX7FevXqRvb095eTklKp78OABDR8+nPr06SMqLygooIYNG9Lq1auJqDg5hoSEUEhICJmampKlpSV99tlnpNFohH0cHR1p3rx5NHz4cFKpVOTg4ECrVq0SHTclJYXee+89MjMzo/r161NAQABdu3ZNqC9JJHPnziVbW1tycnIq85x8fX0pNDRUVObl5UXvvPOOqCw7O5tUKhUlJSXRoEGDaN68eaL6iIgIqlevHp04caLM53n6/GqLp5P1woULycDAgLZt2ybU+/v7U6NGjcp8vxC9/HcCQDTbZFZWFgGgZcuWidrFxcWRvb09FRQUkJ2dHR0+fFhUX9k4WOVxN8grlJGRgV27diEkJATGxsal6s3NzTFq1Cjs2rULqampQvmOHTuQm5uLQYMGCWU//PADdHV1kZCQgGXLluHLL7/E6tWrRcdbvHgx2rZti5MnT+Ljjz9GcHAwkpOTAQCPHz+Gv78/TExMcPDgQRw+fBgqlQo9e/ZEQUGBcIzY2FgkJycjJiYGO3bseOk5EhEOHjyIpKQk6Ovri+o2b94MDw8PuLu7Y/DgwVizZo2oq2Tjxo3o3r07WrduXeaxa/OMh+Hh4ZgzZw527NiBd955BwBw//597Nmz57nvF0Da76SwsBBRUVEAUOq1iYqKQmBgIPT09BAYGCi0q4o4WAVV96dFXXL06FECILpyKounpyctXLhQeNy3b18aNmyY8NjX15eaNGkiupoJDw+nJk2aCI8dHR1p8ODBwmONRkNWVla0cuVKIiL68ccfyd3dXXSM/Px8MjQ0pN27dxNR8VWftbX1c7s/no5HT0+PjI2NSU9PjwCQgYFBqauzjh070tKlS4mI6PHjx9SgQQPat2+fUG9gYEATJkwQ7dO/f38yNjYmY2Nj8vHxeWEccjR06FDS19cnABQbGyuqO3LkSJnvF0tLS+F3MnXq1Bcev+S1MDY2pnr16hEAcnJyovv37wtt1Go1GRoaUmJiIhERnTx5klQqFWVnZ2stDlZ5fGX9ClE5bxYdNWoUoqOjARTPDLhz506MGDFC1Mbb21t0NePj44NLly6hqKhIKGvRooXws0KhgI2NjbCM2alTp3D58mWYmJhApVJBpVLBwsICeXl5uHLlirBf8+bNhauwDRs2CG1VKhUOHjwotAsKCkJiYiIOHz6MXr164dNPP0XHjh2F+uTkZCQkJCAwMBAAoKuri0GDBomu4MqyYsUKJCYmYsSIEbX2S6wWLVrAyckJs2bNQk5OzkvbJyQkIDExEU2bNhVWNBo7dqzotXnakiVLkJiYiJ07d8LT0xOrV6+GhYWFUL9x40Y0btwYLVu2BAC0atUKjo6O2LRpk+Q4WNWpMxM51QRubm5QKBRlLnjwtCFDhmDatGmIj4/H33//DWdnZ7zxxhuSn09PT0/0WKFQQKPRAABycnLQpk0bbNiwodR+DRs2FH5++r+9AQEBorlU7O3thZ/NzMzg6uoKoLi7w9XVFd7e3vDz8wNQ/N/swsJC2NnZCfsQEZRKJZYvXw4zMzO4ubkJ3TQlbG1tAUCUXGobe3t7bNmyBV27dkXPnj2xc+dOmJiYwNXVFQqFotTvxMXFBQBgaGgolM2ePRtTpkwp8/g2NjZwdXWFq6sroqOj0bt3b5w/fx5WVlYAil+bc+fOQVf3STrQaDRYs2YNRo4cKSkOVnX4yvoVsrCwgL+/P7755hs8fPiwVH1mZiYAwNLSEv3790d0dDTWrl1b5syAR48eFT0+cuQI3NzcoKOjU65YvLy8cOnSJVhZWQl/yCWb2XPWkCxJICXb8/5IVSoVQkNDMWXKFBARCgsLsW7dOixevBiJiYnCdurUKdjZ2WHjxo0AgMDAQMTExODkyZPlOofaxNHREfv370daWhp69uyJ7OxsWFpaonv37li+fHmZ75enPfs6Pk/79u3Rpk0bzJs3DwBw5swZHDt2DHFxcaLXJi4uDvHx8UhKSpIUB6s6nKxfsW+++QZFRUVo3749tm7dikuXLuHChQv46quvRHNrjxo1Cj/88AMuXLiAoUOHljpOSkoKJk2ahOTkZGzcuBFff/01QkNDyx1HUFAQGjRogH79+uHgwYO4du0a4uLiMGHCBNy6davS5zlmzBhcvHgRW7duxY4dO/DgwQOMHDkSzZo1E20DBw4UukLCwsLg4+ODbt26YdmyZThx4gSuXbuG3bt3Y+fOneX+IJIrBwcHxMXFIT09Hf7+/sjKysKKFStQWFiItm3bYtOmTbhw4QKSk5Oxfv16JCUlVeh3MnHiRKxatQq3b99GVFQU2rdvjy5duohely5duqBdu3bCa1MVcTCJqrnPvE66c+cOhYSEkKOjI+nr65O9vT0FBASIvmzTaDTk6OhIvXv3LrW/r68vffzxxzR27FgyNTWl+vXr0//93/+VGrr37Djrli1b0qxZs4THqampNGTIEGrQoAEplUpycXGh0aNHC4uOPjsG+HnKGrpHRDRmzBhq2rQpvf3222WeB9GTL11PnTpFRMVjrRcsWEAtW7YkQ0NDUiqV5OHhQWFhYZSSkvLSWOSmrN/xrVu3yM3Njby9vUmtVtOdO3do3Lhx5OzsTHp6eqRSqah9+/a0aNEievjw4QuPj2eG7hEVv7c8PDwoODiYLC0tKTIyssx9Fy5cSFZWVlRQUEBEVKk4WOXxFKk1VE5ODuzt7REdHY0BAwaI6t588020atWKbydnrA7hLxhrGI1Gg3v37mHx4sUwNzdHQEBAdYfEGKsBOFnXMCkpKXB2dkajRo2wdu1a0Tf0jLG6i7tBGGNMBng0CGOMyQAna8YYkwFO1owxJgOcrBljTAY4WTPGmAxwsmaMMRngZM0YYzLAyZoxxmTg/wE8al8OX3/hnwAAAABJRU5ErkJggg==", + "text/plain": [ + "<Figure size 300x300 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There is 53.9% reduction in token usage for KG-RAG compared to Cypher-RAG\n" + ] + } + ], + "source": [ + "neo4j_rag.loc[:, 'token_usage'] = 0.5*(neo4j_rag.total_tokens_used + neo4j_rag.total_tokens_used_perturbed)\n", + "kg_rag.loc[:, 'token_usage'] = 0.5*(kg_rag.total_tokens_used + kg_rag.total_tokens_used_perturbed)\n", + "\n", + "neo4j_avg = neo4j_rag['token_usage'].mean()\n", + "neo4j_sem = neo4j_rag['token_usage'].sem()\n", + "\n", + "kg_avg = kg_rag['token_usage'].mean()\n", + "kg_sem = kg_rag['token_usage'].sem()\n", + "\n", + "\n", + "fig = plt.figure(figsize=(3, 3))\n", + "\n", + "plt.bar(0, neo4j_avg, yerr=neo4j_sem, color='red', ecolor='black', capsize=5, label='Cypher-RAG')\n", + "\n", + "plt.bar(1, kg_avg, yerr=kg_sem, color='green', ecolor='black', capsize=5, label='KG-RAG')\n", + "\n", + "plt.ylabel('Average token usage')\n", + "plt.xticks([0, 1], ['Cypher-RAG', 'KG-RAG'])\n", + "\n", + "sns.despine()\n", + "\n", + "plt.legend(loc='center left', bbox_to_anchor=(0.6, 0.75))\n", + "\n", + "plt.show()\n", + "\n", + "percentage_of_reduction_in_token_usage = round(100*(neo4j_avg-kg_avg)/neo4j_avg,1)\n", + "print(f'There is {percentage_of_reduction_in_token_usage}% reduction in token usage for KG-RAG compared to Cypher-RAG')\n", + "\n", + "fig_path = '../data/results/figures'\n", + "os.makedirs(fig_path, exist_ok=True)\n", + "fig.savefig(os.path.join(fig_path, 'token_usage_comparison.svg'), format='svg', bbox_inches='tight') \n" + ] + }, + { + "cell_type": "markdown", + "id": "8ea726fd", + "metadata": {}, + "source": [ + "## Retrieval accuracy comparison" + ] + }, + { + "cell_type": "markdown", + "id": "5004ceb3", + "metadata": {}, + "source": [ + "### Cypher-RAG" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "757f36d5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correct retrieval percentage for Cypher-RAG 75.0%\n", + "Correct retrieval percentage for KG-RAG 97.0%\n" + ] + } + ], + "source": [ + "\n", + "neo4j_rag_no_nan = neo4j_rag.dropna(subset=['neo4j_rag_answer'])\n", + "neo4j_rag_no_nan.loc[:, 'contains_pvalue'] = neo4j_rag_no_nan.apply(lambda row: str(row['gwas_pvalue']) in str(row['neo4j_rag_answer']), axis=1)\n", + "neo4j_rag_yes_count_df = neo4j_rag_no_nan[neo4j_rag_no_nan.neo4j_rag_answer.str.contains('Yes')]\n", + "neo4j_rag_yes_count = neo4j_rag_yes_count_df.shape[0]\n", + "indices_to_remove = neo4j_rag_yes_count_df.index.tolist()\n", + "neo4j_rag_no_nan = neo4j_rag_no_nan.drop(indices_to_remove)\n", + "neo4j_rag_p_value_correct_retrieval_count = neo4j_rag_no_nan[neo4j_rag_no_nan.contains_pvalue==True].shape[0]\n", + "neo4j_rag_total_correct_retrieval = neo4j_rag_yes_count + neo4j_rag_p_value_correct_retrieval_count\n", + "\n", + "kg_rag_no_nan = kg_rag.dropna(subset=['kg_rag_answer'])\n", + "kg_rag_yes_count_df = kg_rag_no_nan[kg_rag_no_nan.kg_rag_answer.str.contains('Yes')]\n", + "kg_rag_yes_count = kg_rag_yes_count_df.shape[0]\n", + "indices_to_remove = kg_rag_yes_count_df.index.tolist()\n", + "kg_rag_no_nan = kg_rag_no_nan.drop(indices_to_remove)\n", + "kg_rag_no_nan.loc[:, 'contains_pvalue'] = kg_rag_no_nan.apply(lambda row: str(row['gwas_pvalue']) in str(row['kg_rag_answer']), axis=1)\n", + "kg_rag_p_value_correct_retrieval_count = kg_rag_no_nan[kg_rag_no_nan.contains_pvalue==True].shape[0]\n", + "kg_rag_total_correct_retrieval = kg_rag_yes_count + kg_rag_p_value_correct_retrieval_count\n", + "\n", + "clear_output()\n", + "\n", + "neo4j_rag_total_correct_retrieval_percentage = 100*neo4j_rag_total_correct_retrieval/neo4j_rag.shape[0]\n", + "kg_rag_total_correct_retrieval_percentage = 100*kg_rag_total_correct_retrieval/kg_rag.shape[0]\n", + "\n", + "print(f'Correct retrieval percentage for Cypher-RAG {neo4j_rag_total_correct_retrieval_percentage}%')\n", + "print(f'Correct retrieval percentage for KG-RAG {kg_rag_total_correct_retrieval_percentage}%')\n" + ] + }, + { + "cell_type": "markdown", + "id": "360a6019", + "metadata": {}, + "source": [ + "### KG-RAG" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0a433581", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correct retrieval percentage for Cypher-RAG after name perturbation 0.0%\n", + "Correct retrieval percentage for KG-RAG after name perturbation 97.0%\n" + ] + } + ], + "source": [ + "\n", + "neo4j_rag_no_nan = neo4j_rag.dropna(subset=['neo4j_rag_answer_perturbed'])\n", + "neo4j_rag_no_nan.loc[:, 'contains_pvalue'] = neo4j_rag_no_nan.apply(lambda row: str(row['gwas_pvalue']) in str(row['neo4j_rag_answer_perturbed']), axis=1)\n", + "neo4j_rag_yes_count_df = neo4j_rag_no_nan[neo4j_rag_no_nan.neo4j_rag_answer_perturbed.str.contains('Yes')]\n", + "neo4j_rag_yes_count = neo4j_rag_yes_count_df.shape[0]\n", + "indices_to_remove = neo4j_rag_yes_count_df.index.tolist()\n", + "neo4j_rag_no_nan = neo4j_rag_no_nan.drop(indices_to_remove)\n", + "neo4j_rag_p_value_correct_retrieval_count = neo4j_rag_no_nan[neo4j_rag_no_nan.contains_pvalue==True].shape[0]\n", + "neo4j_rag_total_correct_retrieval_perturbed = neo4j_rag_yes_count + neo4j_rag_p_value_correct_retrieval_count\n", + "\n", + "kg_rag_no_nan = kg_rag.dropna(subset=['kg_rag_answer_perturbed'])\n", + "kg_rag_yes_count_df = kg_rag_no_nan[kg_rag_no_nan.kg_rag_answer_perturbed.str.contains('Yes')]\n", + "kg_rag_yes_count = kg_rag_yes_count_df.shape[0]\n", + "indices_to_remove = kg_rag_yes_count_df.index.tolist()\n", + "kg_rag_no_nan = kg_rag_no_nan.drop(indices_to_remove)\n", + "kg_rag_no_nan.loc[:, 'contains_pvalue'] = kg_rag_no_nan.apply(lambda row: str(row['gwas_pvalue']) in str(row['kg_rag_answer_perturbed']), axis=1)\n", + "kg_rag_p_value_correct_retrieval_count = kg_rag_no_nan[kg_rag_no_nan.contains_pvalue==True].shape[0]\n", + "kg_rag_total_correct_retrieval_perturbed = kg_rag_yes_count + kg_rag_p_value_correct_retrieval_count\n", + "\n", + "clear_output()\n", + "\n", + "neo4j_rag_total_correct_retrieval_perturbed_percentage = 100*neo4j_rag_total_correct_retrieval_perturbed/neo4j_rag.shape[0]\n", + "kg_rag_total_correct_retrieval_perturbed_percentage = 100*kg_rag_total_correct_retrieval_perturbed/kg_rag.shape[0]\n", + "\n", + "print(f'Correct retrieval percentage for Cypher-RAG after name perturbation {neo4j_rag_total_correct_retrieval_perturbed_percentage}%')\n", + "print(f'Correct retrieval percentage for KG-RAG after name perturbation {kg_rag_total_correct_retrieval_perturbed_percentage}%')\n" + ] + }, + { + "cell_type": "markdown", + "id": "d72ebbfa", + "metadata": {}, + "source": [ + "### Bar plot" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "e6d8690d", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAEhCAYAAAB7mQezAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/H5lhTAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAyJklEQVR4nO3deVRU9f8/8OewjcjIICibIaDggvuSgpZWYLh8co2MDx5xSctQUbSUUnEnLddcMhfET+6aS5qWIbiEu+LyURENA03gU8mMuLDN+/uHP+/PG6iMDjMDPh/nzDnM+955z3MunHlx7/ve+1YIIQSIiIj+HwtTByAiIvPCwkBERDIsDEREJMPCQEREMiwMREQkw8JAREQyLAxERCTDwkBERDIsDACEENBqteC1fkRELAwAgDt37kCtVuPOnTumjkJEZHJWpnzzgwcP4ssvv8SpU6dw69YtbNu2DT179pSWCyEQExOD5cuXIzc3F+3bt8fSpUvh6+srrfP3339jxIgR+OGHH2BhYYE+ffpgwYIFUKlUJvhEROanuLgYhYWFpo5BJmRtbQ1LS8syr2/SwnD37l00a9YMgwYNQu/evUssnz17NhYuXIj4+Hh4e3tj4sSJCA4OxsWLF1GlShUAQFhYGG7duoV9+/ahsLAQAwcOxNChQ7Fu3TpjfxwisyKEQFZWFnJzc00dhcyAg4MDXF1doVAonrmuwlxuoqdQKGR7DEIIuLu7Y8yYMRg7diwAQKPRwMXFBatXr8b777+PS5cuwc/PDydOnEDr1q0BAHv37kXXrl1x48YNuLu7l+m9tVot1Go1NBoN7O3ty+XzERnbrVu3kJubC2dnZ1StWrVMXwhU+QghcO/ePeTk5MDBwQFubm7PfI1J9xieJj09HVlZWQgKCpLa1Go12rZtiyNHjuD999/HkSNH4ODgIBUFAAgKCoKFhQWOHTuGXr16ldp3fn4+8vPzpedarbb8PgiRCRQXF0tFwcnJydRxyMRsbW0BADk5OXB2dn7mYSWzHXzOysoCALi4uMjaXVxcpGVZWVlwdnaWLbeysoKjo6O0TmliY2OhVqulh4eHh4HTE5nWozGFqlWrmjgJmYtHfwtlGW8y28JQnqKjo6HRaKRHZmamqSMRlQsePqJH9PlbMNvC4OrqCgDIzs6WtWdnZ0vLXF1dkZOTI1teVFSEv//+W1qnNEqlEvb29rIHERE9ZLaFwdvbG66urkhISJDatFotjh07hoCAAABAQEAAcnNzcerUKWmd/fv3Q6fToW3btkbPTEQVh5eXF+bPn2/qGGbJpIUhLy8PKSkpSElJAfBwwDklJQUZGRlQKBQYNWoUpk+fjp07d+L8+fPo378/3N3dpTOXGjZsiM6dO2PIkCE4fvw4fv31VwwfPhzvv/9+mc9IInrpKBTGezynrKwsjBgxAnXq1IFSqYSHhwfeeecd2T+KFckbb7wBhUIBhUKBKlWqoF69eoiNjS31bgtHjhyBpaUlunXrVmpfBQUF+PLLL9GyZUvY2dlBrVajWbNmmDBhAv744w/DBBYmlJiYKACUeISHhwshhNDpdGLixInCxcVFKJVKERgYKFJTU2V9/PXXXyI0NFSoVCphb28vBg4cKO7cuaNXDo1GIwAIjUZjqI9GZFL3798XFy9eFPfv3y+5EDDe4zmkp6cLd3d34efnJ7Zs2SJSU1PFhQsXxJw5c0T9+vVfcMv8f56enmLevHkG608IIfLz80tt79ixoxgyZIi4deuWuH79uli1apWwsrISS5YsKbHu4MGDRWRkpFCpVOLmzZuyZQ8ePBAdOnQQDg4OYsGCBeLkyZPi999/F0lJSeLDDz8U48ePf2K2p/5N/INJC4O5YGGgyqYiF4YuXbqIWrVqiby8vBLLbt++LQYOHCi6desmay8oKBA1a9YUK1asEEI8/CKOiIgQERERwt7eXjg5OYkJEyYInU4nvcbT01PMmDFDDBw4UKhUKuHh4SGWLVsm6zcjI0OEhIQItVotqlevLrp37y7S09Ol5eHh4aJHjx5i+vTpws3NTXh5eZX6mTp27CgiIyNlbS1bthS9evWStd25c0eoVCpx+fJl0bdvXzFjxgzZ8tjYWGFhYSFOnz5d6vs8/vn+SZ/CYLbXMZCcYorpzy4RMWZxLaTRcJsb399//429e/dixowZsLOzK7HcwcEBH3zwATp06IBbt25JF2vt2rUL9+7dQ9++faV14+PjMXjwYBw/fhwnT57E0KFDUbt2bQwZMkRaZ86cOZg2bRo+++wzbNmyBcOGDUPHjh1Rv359FBYWIjg4GAEBATh06BCsrKwwffp0dO7cGefOnYONjQ0AICEhAfb29ti3b1+ZPqMQAocPH8bly5dlt/cBgE2bNqFBgwaoX78++vXrh1GjRiE6Olo6o2j9+vXo1KkTWrRoUWrfhjoLzWwHn4no5XP16lUIIdCgQYMnrtOuXTvUr18f//nPf6S2uLg4hISEyO6R5uHhgXnz5qF+/foICwvDiBEjMG/ePFlfXbt2xccffwwfHx+MGzcONWrUQGJiIgBg48aN0Ol0WLFiBZo0aYKGDRsiLi4OGRkZSEpKkvqws7PDihUr0KhRIzRq1OiJuZcsWQKVSgWlUokOHTpAp9Nh5MiRsnVWrlyJfv36AQA6d+4MjUaDAwcOSMuvXLmC+vXry17Tq1cvqFQqqFQqtGvX7onvrw8WBiIyG6KMd+j54IMPEBcXB+DhKex79uzBoEGDZOv4+/vL/oMOCAhAWloaiouLpbamTZtKPysUCtkp8GfPnsXVq1dRrVo16YvX0dERDx48wLVr16TXNWnSRNp7WLt2rbSuSqXCoUOHpPXCwsKQkpKCX3/9FV26dMHnn38u+yJPTU3F8ePHERoaCuDhxbp9+/bFypUrn7otlixZgpSUFAwaNAj37t0r0/Z7Fh5KIiKz4evrC4VCgcuXLz91vf79+2P8+PE4cuQIkpOT4e3tjddff13v97O2tpY9VygU0Ol0AB6eNdmqVSusXbu2xOtq1qwp/fz4Ia/u3bvLTpWvVauW9LNarYaPjw+Ah4eMfHx84O/vL932Z+XKlSgqKpKdUSmEgFKpxKJFi6BWq+Hr64vU1FRZlkeH0xwdHfX78E/BPQYiMhuOjo4IDg7G4sWLcffu3RLLH90p1snJCT179kRcXBxWr16NgQMHllj32LFjsudHjx6Fr69vmW8/3bJlS6SlpcHZ2Rk+Pj6yh1qtLvU11apVk6336B5F/6RSqRAZGYmxY8dCCIGioiKsWbMGc+bMkU7hT0lJwdmzZ+Hu7o7169cDAEJDQ7Fv3z6cOXOmTJ/hebEwEJFZWbx4MYqLi9GmTRts3boVaWlpuHTpEhYuXChd3Ao8PJwUHx+PS5cuITw8vEQ/GRkZiIqKQmpqKtavX4+vv/4akZGRZc4RFhaGGjVqoEePHjh06BDS09ORlJSEkSNH4saNGy/8OT/88ENcuXIFW7duxa5du3D79m0MHjwYjRs3lj369OkjHU4aPXo0AgICEBgYiAULFuD06dNIT0/HTz/9hD179ug158LT8FAS0cvGPO60/0R16tTB6dOnMWPGDIwZMwa3bt1CzZo10apVKyxdulRaLygoCG5ubmjUqFGpF7T2798f9+/fR5s2bWBpaYnIyEgMHTq0zDmqVq2KgwcPYty4cejduzfu3LmDWrVqITAw0CC30XF0dET//v0xefJkeHt7IygoqNQ9kT59+mD27Nk4d+4cmjZtioSEBMyfPx9xcXGIjo6GTqeDt7c3unTpgtGjR79wLsCM5mMwpYowHwNPnTS+irzNHzx4gPT0dHh7e0uTWlU2eXl5qFWrFuLi4kpM9PXGG2+gefPmvOXFY/T5m+AeAxFVKDqdDn/++SfmzJkDBwcHdO/e3dSRKh0WBiKqUDIyMuDt7Y1XXnkFq1evhpUVv8YMjVuUiCoULy+vZ17v8PgFaKQ/npVEREQyLAxERCTDwkBERDIsDEREJMPCQEREMiwMREQkw8JAREQyvI6B6CVjzFt9PM8tPQYMGIDc3Fxs375datuyZQv69esn3T8pKysLsbGx2L17N27cuCHd0rpfv34IDw9H1apVn9j/43M0VKtWDfXr18eECRPQo0ePEuvGxsZiwoQJ+OKLL/DJJ5+UWP4iOcwZ9xiIyKytWLECYWFhWLp0KcaMGYPffvsNLVq0wM8//4yZM2fizJkzOHLkCD799FPs2rULv/zyyzP7jIuLw61bt3Dy5Em0b98e7777Ls6fP19ivVWrVuHTTz/FqlWrSiwzRA5zxT0GIjJbs2fPRkxMDDZs2IBevXoBAD7++GNYWVnh5MmTskly6tSpgx49epRpFjgHBwe4urrC1dUV06ZNw4IFC5CYmIgmTZpI6xw4cAD379/H1KlTsWbNGiQnJ8tmXDNEDnPFPQYiMkvjxo3DtGnTsGvXLqko/PXXX/j5558REREh+zJ+3OOHip6lqKhImuvg0fScj6xcuRKhoaGwtrZGaGiobIpNQ+cwNywMRGR29uzZg9mzZ2PHjh0IDAyU2q9evQohBOrXry9bv0aNGtI8y+PGjXtm/6GhoVCpVFAqlRg9ejS8vLzw3nvvScu1Wq00rgEA/fr1w6ZNm5CXl2fQHOaKhYGIzE7Tpk3h5eWFmJgY6cv4aY4fP46UlBQ0atQI+fn5AICPPvpI+pJWqVSy9efNm4eUlBTs2bMHfn5+WLFihWzO5PXr16Nu3bpo1qwZAKB58+bw9PTExo0b9c5REbEwEJHZqVWrFpKSknDz5k107twZd+7cAQD4+PhAoVAgNTVVtn6dOnVKzLE8depU2fzJj3N1dYWPjw/efvttxMXFoW/fvsjJyZGWr1y5Ev/9739hZWUlPS5evCgNQuuToyJiYSAis+Tp6YkDBw4gKytLKg5OTk7o1KkTFi1ahLt37z719c7OzvDx8ZEeT9KmTRu0atUKM2bMAACcP38eJ0+eRFJSkqywJCUl4ciRI7h8+bJeOSoiFgYiMlseHh5ISkpCTk4OgoODodVqsWTJEhQVFaF169bYuHEjLl26hNTUVHz33Xe4fPkyLC0t9X6fUaNGYdmyZbh58yZWrlyJNm3aoEOHDmjcuLH06NChA1599VVpELo8cpgLnq5K9JKpaHN3v/LKK0hKSsKbb76J4OBg/PTTTzhz5gxmzpyJ6Oho3LhxA0qlEn5+fhg7diw+/vhjvd+jc+fO8Pb2xowZM7Bp06YnDhz36dMHc+bMwcyZM1G3bl2D5zAXClGRT7Y1EK1WC7VaDY1GA3t7e1PHKVVFnpi+oqrI21yfid/p5aDP3wQPJRERkQwLAxERybAwEBGRDAsDERHJsDAQVWI8t4Qe0edvgYWBqBKytrYGANy7d8/ESchcPPpbePS38TS8joGoErK0tISDg4N0m4eqVatW6Lt90vMTQuDevXvIycmBg4NDmS68Y2EgqqRcXV0BQHYPIHp5PZqDoizMujAUFxdj8uTJ+O6775CVlQV3d3cMGDAAEyZMkP77EUIgJiYGy5cvR25uLtq3b4+lS5fC19fXxOmJTEuhUMDNzQ3Ozs4oLCw0dRwyIWtra71u0WHWhWHWrFlYunQp4uPj0ahRI5w8eRIDBw6EWq3GyJEjATyc4WnhwoWIj4+Ht7c3Jk6ciODgYFy8eJFXfBLh4WGlinzfHjI+sy4MycnJ6NGjB7p16wYA8PLywvr163H8+HEAD/cW5s+fL5vIe82aNXBxccH27dvx/vvvmyw7EVFFZdZnJbVr1w4JCQm4cuUKAODs2bM4fPgwunTpAgBIT09HVlYWgoKCpNeo1Wq0bdsWR44ceWK/+fn50Gq1sgcRET1k1nsM48ePh1arRYMGDWBpaYni4mLMmDEDYWFhAICsrCwAgIuLi+x1Li4u0rLSxMbGYsqUKeUXnIioAjPrPYZNmzZh7dq1WLduHU6fPo34+Hh89dVXiI+Pf6F+o6OjodFopEdmZqaBEhMRVXxmvcfwySefYPz48dJYQZMmTfD7778jNjYW4eHh0qlX2dnZcHNzk16XnZ2N5s2bP7FfpVIJpVJZrtmJiCoqs95juHfvHiws5BEtLS2h0+kAAN7e3nB1dUVCQoK0XKvV4tixYwgICDBqViKiysKs9xjeeecdzJgxA7Vr10ajRo1w5swZzJ07F4MGDQLw8DztUaNGYfr06fD19ZVOV3V3d0fPnj1NG56IqIIy68Lw9ddfY+LEifj444+Rk5MDd3d3fPjhh5g0aZK0zqeffoq7d+9i6NChyM3NxWuvvYa9e/fyGgYioufEqT3BqT3LilN7Gt/Lts3JPOg9xhATE4Pff/+9PLIQEZEZ0Lsw7NixA3Xr1kVgYCDWrVuH/Pz88shFREQmondhSElJwYkTJ9CoUSNERkbC1dUVw4YNw4kTJ8ojHxERGdlzna7aokULLFy4EH/88QdWrlyJGzduoH379mjatCkWLFgAjUZj6JxERGQkL3QdgxAChYWFKCgogBAC1atXx6JFi+Dh4YGNGzcaKiMRERnRcxWGU6dOYfjw4XBzc8Po0aPRokULXLp0CQcOHEBaWhpmzJgh3Ra70lAoTPsgIjISvQtDkyZN4O/vj/T0dKxcuRKZmZn44osv4OPjI60TGhqK//3vfwYNSkRExqH3BW7vvfceBg0ahFq1aj1xnRo1aki3rSAioopF78IwceLE8shBRERmQu9DSX369MGsWbNKtM+ePRshISEGCUVERKajd2E4ePAgunbtWqK9S5cuOHjwoEFCERGR6ehdGPLy8mBjY1Oi3dramlNkEhFVAs91VlJp1yhs2LABfn5+BglFRESm81yDz71798a1a9fw1ltvAQASEhKwfv16bN682eABiYjIuPQuDO+88w62b9+OmTNnYsuWLbC1tUXTpk3xyy+/oGPHjuWRkYiIjOi5Jurp1q0bunXrZugsRERkBsx6zmciIjI+vfcYiouLMW/ePGzatAkZGRkoKCiQLf/7778NFo6IiIxP7z2GKVOmYO7cuejbty80Gg2ioqLQu3dvWFhYYPLkyeUQkYiIjEnvwrB27VosX74cY8aMgZWVFUJDQ7FixQpMmjQJR48eLY+MRERkRHoXhqysLDRp0gQAoFKppEl5/vWvf2H37t2GTUdEREand2F45ZVXcOvWLQBA3bp18fPPPwMATpw4AaVSadh0RERkdHoXhl69eiEhIQEAMGLECEycOBG+vr7o378/Bg0aZPCARERkXHqflfTFF19IP/ft2xeenp5ITk6Gr68v3nnnHYOGIyIi49OrMBQWFuLDDz/ExIkT4e3tDQDw9/eHv79/uYQjIiLj0+tQkrW1NbZu3VpeWYiIyAzoPcbQs2dPbN++vRyiEBGROdB7jMHX1xdTp07Fr7/+ilatWsHOzk62fOTIkQYLR0RExqcQQgh9XvBobKHUzhQK/Pbbby8cyti0Wi3UajU0Gg3s7e1LX0mhMG6of779ZJO+PQBAxOj1p1LhKaaY9ncOvHzbnMyD3nsM6enp5ZGDiIjMBO+uSkREMnrvMTzrIrZVq1Y9dxgiIjI9vQvD7du3Zc8LCwtx4cIF5ObmSlN9EhFRxaV3Ydi2bVuJNp1Oh2HDhqFu3boGCUVERKZjkDEGCwsLREVFYd68eYbojoiITMhgg8/Xrl1DUVGRobojIiIT0ftQUlRUlOy5EAK3bt3C7t27ER4ebrBgRERkGnrvMZw5c0b2OHfuHABgzpw5mD9/vqHz4ebNm+jXrx+cnJxga2uLJk2a4OTJk9JyIQQmTZoENzc32NraIigoCGlpaQbPQUT0stB7jyExMbE8cpTq9u3baN++Pd58803s2bMHNWvWRFpaGqpXry6tM3v2bCxcuBDx8fHw9vbGxIkTERwcjIsXL6JKlSpGy0pEVFk815XPRUVF8PX1lbWnpaXB2toaXl5ehsqGWbNmwcPDA3FxcVLb47fkEEJg/vz5mDBhAnr06AEAWLNmDVxcXLB9+3a8//77BstCRPSy0PtQ0oABA5CcnFyi/dixYxgwYIAhMkl27tyJ1q1bIyQkBM7OzmjRogWWL18uLU9PT0dWVhaCgoKkNrVajbZt2+LIkSNP7Dc/Px9arVb2ICKih55rjKF9+/Yl2v39/ZGSkmKITJLffvsNS5cuha+vL3766ScMGzYMI0eORHx8PAAgKysLAODi4iJ7nYuLi7SsNLGxsVCr1dLDw8PDoLmJiCoyvQuDQqHAnTt3SrRrNBoUFxcbJNQjOp0OLVu2xMyZM9GiRQsMHToUQ4YMwTfffPNC/UZHR0Oj0UiPzMxMAyUmIqr49C4MHTp0QGxsrKwIFBcXIzY2Fq+99ppBw7m5ucHPz0/W1rBhQ2RkZAAAXF1dAQDZ2dmydbKzs6VlpVEqlbC3t5c9iIjoIb0Hn2fNmoUOHTqgfv36eP311wEAhw4dglarxf79+w0arn379khNTZW1XblyBZ6engAeDkS7uroiISEBzZs3B/BwboVjx45h2LBhBs1CRPSy0HuPwc/PD+fOncN7772HnJwc3LlzB/3798fly5fRuHFjg4YbPXo0jh49ipkzZ+Lq1atYt24dvv32W0RERAB4eFhr1KhRmD59Onbu3Inz58+jf//+cHd3R8+ePQ2ahYjoZaH3HgMAuLu7Y+bMmYbOUsKrr76Kbdu2ITo6GlOnToW3tzfmz5+PsLAwaZ1PP/0Ud+/exdChQ5Gbm4vXXnsNe/fu5TUMRETPSe+pPePi4qBSqRASEiJr37x5M+7du1chb4vBqT3L5mWbZpJTe9LLSu9DSbGxsahRo0aJdmdnZ6PsRRARUfnSuzBkZGTIrj5+xNPTUzpbiIiIKi69C4Ozs7N047zHnT17Fk5OTgYJRUREpqN3YQgNDcXIkSORmJiI4uJiFBcXY//+/YiMjOS9iYiIKgG9z0qaNm0arl+/jsDAQFhZPXy5TqdD//79OcZARFQJ6F0YbGxssHHjRkybNg1nz56V5kh4dNEZERFVbM91HQMA1KtXD/Xq1TNkFiIiMgPPVRhu3LiBnTt3IiMjAwUFBbJlc+fONUgwIiIyDb0LQ0JCArp37446depIt8G4fv06hBBo2bJleWQkIiIj0vuspOjoaIwdOxbnz59HlSpVsHXrVmRmZqJjx44lroYmIqKKR+/CcOnSJfTv3x8AYGVlhfv370OlUmHq1KmYNWuWwQMSEZFx6V0Y7OzspHEFNzc3XLt2TVr2559/Gi4ZERGZhN5jDP7+/jh8+DAaNmyIrl27YsyYMTh//jy+//57+Pv7l0dGIiIyIr0Lw9y5c5GXlwcAmDJlCvLy8rBx40b4+vryjCQiokpA78JQp04d6Wc7O7sXnn+ZiIjMi95jDEREVLmxMBARkQwLAxERybAwEBGRDAsDERHJlOmspKioqDJ3yFNWiYgqtjIVhjNnzpSpM4VC8UJhiIjI9MpUGBITE8s7BxERmQmOMRARkcxzTdRz8uRJbNq0qdSJer7//nuDBCMiItPQe49hw4YNaNeuHS5duoRt27ahsLAQ//3vf7F//36o1eryyEhEREakd2GYOXMm5s2bhx9++AE2NjZYsGABLl++jPfeew+1a9cuj4xERGREeheGa9euoVu3bgAAGxsb3L17FwqFAqNHj8a3335r8IBERGRceheG6tWr486dOwCAWrVq4cKFCwCA3Nxc3Lt3z7DpiIjI6PQefO7QoQP27duHJk2aICQkBJGRkdi/fz/27duHwMDA8shIRERGpHdhWLRoER48eAAA+Pzzz2FtbY3k5GT06dMHEyZMMHhAIiIyLr0Lg6Ojo/SzhYUFxo8fb9BARERkWnqPMQQFBWH16tXQarXlkYeIiExM78LQqFEjREdHw9XVFSEhIdixYwcKCwvLIxsREZmA3oVhwYIFuHnzJrZv3w47Ozv0798fLi4uGDp0KA4cOFAeGYmIyIie615JFhYWePvtt7F69WpkZ2dj2bJlOH78ON566y1D5yMiIiN7rnslPZKVlYUNGzbgu+++w7lz59CmTRtD5SIiIhPRe49Bq9UiLi4OnTp1goeHB5YuXYru3bsjLS0NR48eLY+Mki+++AIKhQKjRo2S2h48eICIiAg4OTlBpVKhT58+yM7OLtccRESVmd57DC4uLqhevTr69u2L2NhYtG7dujxylXDixAksW7YMTZs2lbWPHj0au3fvxubNm6FWqzF8+HD07t0bv/76q1FyERFVNnoXhp07dyIwMBAWFsabyiEvLw9hYWFYvnw5pk+fLrVrNBqsXLkS69atk8Y34uLi0LBhQxw9ehT+/v5Gy0hEVFno/e3eqVMn6HQ6/PLLL1i2bJl036Q//vgDeXl5Bg8IABEREejWrRuCgoJk7adOnUJhYaGsvUGDBqhduzaOHDnyxP7y8/Oh1WplDyIiekjvPYbff/8dnTt3RkZGBvLz89GpUydUq1YNs2bNQn5+Pr755huDBtywYQNOnz6NEydOlFiWlZUFGxsbODg4yNpdXFyQlZX1xD5jY2MxZcoUg+YkIqos9N5jiIyMROvWrXH79m3Y2tpK7b169UJCQoJBw2VmZiIyMhJr165FlSpVDNZvdHQ0NBqN9MjMzDRY30REFZ3eewyHDh1CcnIybGxsZO1eXl64efOmwYIBDw8V5eTkoGXLllJbcXExDh48iEWLFuGnn35CQUEBcnNzZXsN2dnZcHV1fWK/SqUSSqXSoFmJiCoLvQuDTqdDcXFxifYbN26gWrVqBgn1SGBgIM6fPy9rGzhwIBo0aIBx48bBw8MD1tbWSEhIQJ8+fQAAqampyMjIQEBAgEGzEBG9LPQuDG+//Tbmz58vzdamUCiQl5eHmJgYdO3a1aDhqlWrhsaNG8va7Ozs4OTkJLUPHjwYUVFRcHR0hL29PUaMGIGAgACekURE9Jz0Lgxz5sxBcHAw/Pz88ODBA/z73/9GWloaatSogfXr15dHxqeaN28eLCws0KdPH+Tn5yM4OBhLliwxeg4iospCIYQQ+r6oqKgIGzZswLlz55CXl4eWLVsiLCxMNhhdkWi1WqjVamg0Gtjb25e+kkJh3FD/fPvJJn17AICI0ftPpUJTTDHt7xx4+bY5mYfnuleSlZUV+vXrZ+gsRERkBspUGHbu3IkuXbrA2toaO3fufOq63bt3N0gwIiIyjTIVhp49eyIrKwvOzs7o2bPnE9dTKBSlnrFEREQVR5kKg06nK/VnIiKqfPS68rmwsBCBgYFIS0srrzxERGRiehUGa2trnDt3rryyEBGRGdD7Xkn9+vXDypUryyMLERGZAb1PVy0qKsKqVavwyy+/oFWrVrCzs5Mtnzt3rsHCERGR8eldGC5cuCDd1O7KlSsGD0RERKald2FITEwsjxxERGQm9B5jGDRokDRr2+Pu3r2LQYMGGSQUERGZjt6FIT4+Hvfv3y/Rfv/+faxZs8YgoYiIyHTKfChJq9VCCAEhBO7cuSObUa24uBg//vgjnJ2dyyUkEREZT5kLg4ODAxQKBRQKBerVq1diuUKh4DzKRESVQJkLQ2JiIoQQeOutt7B161Y4OjpKy2xsbODp6Ql3d/dyCUlERMZT5sLQsWNHAEB6ejpq164NhYnnJyAiovKh9+Czp6cnDh8+jH79+qFdu3a4efMmAOA///kPDh8+bPCARERkXHoXhq1btyI4OBi2trY4ffo08vPzAQAajQYzZ840eEAiIjIuvQvD9OnT8c0332D58uWwtraW2tu3b4/Tp08bNBwRERmf3oUhNTUVHTp0KNGuVquRm5triExERGRCehcGV1dXXL16tUT74cOHUadOHYOEIiIi09G7MAwZMgSRkZE4duwYFAoF/vjjD6xduxZjx47FsGHDyiMjEREZkd430Rs/fjx0Oh0CAwNx7949dOjQAUqlEmPHjsWIESPKIyMRERmRQgghnueFBQUFuHr1KvLy8uDn5weVSoX79+/D1tbW0BnLnVarhVqthkajgb29fekrmfi6DcVkk749AEDEPNefSoWlmGL6a3Vetm1O5kHvQ0mP2NjYwM/PD23atIG1tTXmzp0Lb29vQ2YjIiITKHNhyM/PR3R0NFq3bo127dph+/btAIC4uDh4e3tj3rx5GD16dHnlJCIiIynzGMOkSZOwbNkyBAUFITk5GSEhIRg4cCCOHj2KuXPnIiQkBJaWluWZlYiIjKDMhWHz5s1Ys2YNunfvjgsXLqBp06YoKirC2bNned8kIqJKpMyHkm7cuIFWrVoBABo3bgylUonRo0ezKBARVTJlLgzFxcWwsbGRnltZWUGlUpVLKCIiMp0yH0oSQmDAgAFQKpUAgAcPHuCjjz6CnZ2dbL3vv//esAmJiMioylwYwsPDZc/79etn8DBERGR6ZS4McXFx5ZmDiIjMxHNf4EZERJUTCwMREcmwMBARkQwLAxERyZh1YYiNjcWrr76KatWqwdnZGT179kRqaqpsnQcPHiAiIgJOTk5QqVTo06cPsrOzTZSYiKjiM+vCcODAAURERODo0aPYt28fCgsL8fbbb+Pu3bvSOqNHj8YPP/yAzZs348CBA/jjjz/Qu3dvE6YmIqrYnns+BlP43//+B2dnZxw4cAAdOnSARqNBzZo1sW7dOrz77rsAgMuXL6Nhw4Y4cuQI/P39y9Qv52Mom5dtbgDOx0AvK7PeY/gnjUYDAHB0dAQAnDp1CoWFhQgKCpLWadCgAWrXro0jR448sZ/8/HxotVrZg4iIHqowhUGn02HUqFFo3749GjduDADIysqCjY0NHBwcZOu6uLggKyvriX3FxsZCrVZLDw8Pj/KMTkRUoVSYwhAREYELFy5gw4YNL9xXdHQ0NBqN9MjMzDRAQiKiyqHMt8QwpeHDh2PXrl04ePAgXnnlFand1dUVBQUFyM3Nle01ZGdnw9XV9Yn9KZVK6WaAREQkZ9Z7DEIIDB8+HNu2bcP+/ftLzCndqlUrWFtbIyEhQWpLTU1FRkYGAgICjB2XiKhSMOs9hoiICKxbtw47duxAtWrVpHEDtVoNW1tbqNVqDB48GFFRUXB0dIS9vT1GjBiBgICAMp+RREREcmZdGJYuXQoAeOONN2TtcXFxGDBgAABg3rx5sLCwQJ8+fZCfn4/g4GAsWbLEyEmJiCoPsy4MZbnEokqVKli8eDEWL15shERERJWfWY8xEBGR8bEwEBGRDAsDERHJsDAQEZEMCwMREcmwMBARkQwLAxERybAwEBGRDAsDERHJsDAQEZEMCwMREcmwMBARkQwLAxERybAwEBGRDAsDERHJsDAQEZEMCwMREcmwMBARkQwLAxERybAwEBGRDAsDERHJsDAQEZEMCwMREcmwMBARkQwLAxERybAwEBGRDAsDERHJsDAQEZEMCwMREcmwMBARkQwLAxERybAwEBGRDAsDERHJsDAQEZEMCwMREcmwMBARkQwLAxERybAwEBGRTKUpDIsXL4aXlxeqVKmCtm3b4vjx46aORERUIVWKwrBx40ZERUUhJiYGp0+fRrNmzRAcHIycnBxTRyMiqnAUQghh6hAvqm3btnj11VexaNEiAIBOp4OHhwdGjBiB8ePHl1g/Pz8f+fn50nONRoPatWsjMzMT9vb2pb+JWl0u2ctKXfJjGJ0mWmPqCEaljjXt7xwo2zavVq0aFAqFEdLQy6LCF4aCggJUrVoVW7ZsQc+ePaX28PBw5ObmYseOHSVeM3nyZEyZMsWIKYnKj0ajefI/NETPwcrUAV7Un3/+ieLiYri4uMjaXVxccPny5VJfEx0djaioKOm5TqfD33//DScnJ7P8z0ur1cLDw+PpezRkUBVpm1erVs3UEaiSqfCF4XkolUoolUpZm4ODg2nC6MHe3t7sv6QqG25zehlV+MHnGjVqwNLSEtnZ2bL27OxsuLq6migVEVHFVeELg42NDVq1aoWEhASpTafTISEhAQEBASZMRkRUMVWKQ0lRUVEIDw9H69at0aZNG8yfPx93797FwIEDTR3NIJRKJWJiYkoc/qLyw21OL7MKf1bSI4sWLcKXX36JrKwsNG/eHAsXLkTbtm1NHYuIqMKpNIWBiIgMo8KPMRARkWGxMBARkQwLAxERybAwvGTeeOMNjBo1yuD9rl69ukJcJGhoAwYMkN2KxVCSkpKgUCiQm5tr8L6JnuWlLgxP+pI0py+58voirwgGDBgAhUIBhUIBGxsb+Pj4YOrUqSgqKnqhPsvji5yoMqkU1zFURgUFBbCxsTHb/oylc+fOiIuLQ35+Pn788UdERETA2toa0dHRevVTXFxs0PtgGbo/InPyUu8xlMWj/zC/+uoruLm5wcnJCRERESgsLJTW8fLywrRp0xAaGgo7OzvUqlULixcvlvWTm5uLDz74ADVr1oS9vT3eeustnD17Vlo+efJkNG/eHCtWrIC3tzeqVKmCAQMG4MCBA1iwYIH0n/P169dL3aPZvn277IuqtP4eKSoqwvDhw6FWq1GjRg1MnDgRj5+1nJ+fj7Fjx6JWrVqws7ND27ZtkZSUJHu/1atXo3bt2qhatSp69eqFv/7660U28xMplUq4urrC09MTw4YNQ1BQEHbu3PnMjI+20c6dO+Hn5welUolBgwYhPj4eO3bskLZnUlJSqYdtUlJSpO39pP4yMjKk9adMmSL9bj/66CMUFBRIy3Q6HWJjY+Ht7Q1bW1s0a9YMW7ZskX3OH3/8EfXq1YOtrS3efPNN6X2JTIF7DGWQmJgINzc3JCYm4urVq+jbty+aN2+OIUOGSOt8+eWX+OyzzzBlyhT89NNPiIyMRL169dCpUycAQEhICGxtbbFnzx6o1WosW7YMgYGBuHLlChwdHQEAV69exdatW/H999/D0tISnp6euHLlCho3boypU6cCAGrWrFnm3P/s75H4+HgMHjwYx48fx8mTJzF06FDUrl1b+jzDhw/HxYsXsWHDBri7u2Pbtm3o3Lkzzp8/D19fXxw7dgyDBw9GbGwsevbsib179yImJuaFt3NZ2Nra4q+//npmRgC4d+8eZs2ahRUrVsDJyQlubm64f/8+tFot4uLiAACOjo5ITk4u03v/sz9nZ2cAQEJCAqpUqYKkpCRcv34dAwcOhJOTE2bMmAEAiI2NxXfffYdvvvkGvr6+OHjwIPr164eaNWuiY8eOyMzMRO/evREREYGhQ4fi5MmTGDNmTDlsPaIyEi+xjh07isjIyBLtcXFxQq1WCyGECA8PF56enqKoqEhaHhISIvr27Ss99/T0FJ07d5b10bdvX9GlSxchhBCHDh0S9vb24sGDB7J16tatK5YtWyaEECImJkZYW1uLnJycZ2Z8PN8j27ZtE4//Op/WX8OGDYVOp5Paxo0bJxo2bCiEEOL3338XlpaW4ubNm7LXBQYGiujoaCGEEKGhoaJr164lPu8/M72o8PBw0aNHDyGEEDqdTuzbt08olUoxYMCAZ2aMi4sTAERKSsoT+3wkMTFRABC3b9+W2s6cOSMAiPT09Gf25+joKO7evSu1LV26VKhUKlFcXCwePHggqlatKpKTk2WvGzx4sAgNDRVCCBEdHS38/Pxky8eNG1ciE5GxcI+hDBo1aiT7j9vNzQ3nz5+XrfPPG/YFBARg/vz5AICzZ88iLy8PTk5OsnXu37+Pa9euSc89PT312iN4lif15+/vLzvsFBAQgDlz5qC4uBjnz59HcXEx6tWrJ3tNfn6+lP/SpUvo1auXbHlAQAD27t1rsOyP7Nq1CyqVCoWFhdDpdPj3v/+Nd999F6tXr35qRuDhDRabNm1qsCxP6q9Zs2aoWrWq9DwgIAB5eXnIzMxEXl4e7t27J+05PlJQUIAWLVoAeLg9/3n7Ft4AkkzppS4M9vb20GhKTp2Ym5sL9WNTeVpbW8uWKxQK6HS6Mr9PXl4e3NzcShynB+TzQNjZ2ZWpPwsLC9mYAADZmIe+/T0uLy8PlpaWOHXqlKwYAoBKpdK7vxf15ptvYunSpbCxsYG7uzusrKywcePGMmW0tbUt0wCxhcXDobbHt2lp27Os/T0uLy8PALB7927UqlVLtow36CNz9VIXhvr16+Pnn38u0X769OkS/40+y9GjR0s8b9iwIQCgZcuWyMrKgpWVFby8vPTq18bGBsXFxbK2mjVr4s6dO7h796705Z+SklLmPo8dO1Yiq6+vLywtLdGiRQsUFxcjJycHr7/+eqmvb9iwYal9lAc7Ozv4+PjI2sqS8UmetD0B4NatW6hevToA/bbn2bNncf/+fdja2gJ4uC1UKhU8PDzg6OgoDVR37Nix1Nc3bNgQO3fulLWV1/YkKouX+qykYcOG4cqVKxg5ciTOnTuH1NRUzJ07F+vXr9d78O/XX3/F7NmzceXKFSxevBibN29GZGQkACAoKAgBAQHo2bMnfv75Z1y/fh3Jycn4/PPPcfLkyaf26+XlhWPHjuH69ev4888/odPp0LZtW1StWhWfffYZrl27hnXr1mH16tVlzpqRkYGoqCikpqZi/fr1+Prrr6Ws9erVQ1hYGPr374/vv/8e6enpOH78OGJjY7F7924AwMiRI7F371589dVXSEtLw6JFi8rlMNKTlCXjk3h5eUm/6z///BOFhYXw8fGBh4cHJk+ejLS0NOzevRtz5swpc56CggIMHjwYFy9exI8//oiYmBgMHz4cFhYWqFatGsaOHYvRo0cjPj4e165dw+nTp/H1118jPj4eAPDRRx8hLS0Nn3zyCVJTU/X+fRIZnKkHOUzt+PHjolOnTqJmzZpCrVaLtm3bim3btknLSxusjIyMFB07dpSee3p6iilTpoiQkBBRtWpV4erqKhYsWCB7jVarFSNGjBDu7u7C2tpaeHh4iLCwMJGRkSGEeDhY3KxZsxL5UlNThb+/v7C1tZUNhm7btk34+PgIW1tb8a9//Ut8++23JQafS+uvY8eO4uOPPxYfffSRsLe3F9WrVxefffaZbDC6oKBATJo0SXh5eQlra2vh5uYmevXqJc6dOyets3LlSvHKK68IW1tb8c4774ivvvqqXAef/+lZGUsboBdCiJycHNGpUyehUqkEAJGYmCiEEOLw4cOiSZMmokqVKuL1118XmzdvLjH4XFp/jzJOmjRJODk5CZVKJYYMGSI70UCn04n58+eL+vXrC2tra1GzZk0RHBwsDhw4IK3zww8/CB8fH6FUKsXrr78uVq1axcFnMhnedtsAvLy8MGrUqJf2CmUiqlxe6kNJRERUEgsDERHJ8FASERHJcI+BiIhkWBiIiEiGhYGIiGRYGIiISIaFgYiIZFgYiIhIhoWBiIhkWBiIiEjm/wCdJXLMmTxGVwAAAABJRU5ErkJggg==", + "text/plain": [ + "<Figure size 400x300 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "categories = ['Unperturbed', 'Perturbed']\n", + "neo4j_rag_percentage = [neo4j_rag_total_correct_retrieval_percentage, neo4j_rag_total_correct_retrieval_perturbed_percentage]\n", + "kg_rag_percentage = [kg_rag_total_correct_retrieval_percentage, kg_rag_total_correct_retrieval_perturbed_percentage]\n", + "\n", + "neo4j_color = 'red'\n", + "kg_rag_color = 'green'\n", + "\n", + "fig, ax = plt.subplots(figsize=(4, 3))\n", + "\n", + "bar_width = 0.35\n", + "index = range(len(categories))\n", + "\n", + "ax.bar(index, neo4j_rag_percentage, bar_width, color=neo4j_color, label='Cypher-RAG')\n", + "ax.bar([i + bar_width for i in index], kg_rag_percentage, bar_width, color=kg_rag_color, label='KG-RAG')\n", + "\n", + "ax.set_ylabel('Retrieval accuracy')\n", + "ax.set_xticks([i + bar_width / 2 for i in index])\n", + "ax.set_xticklabels(categories)\n", + "\n", + "\n", + "ax.legend(loc='center left', bbox_to_anchor=(1, 0.9))\n", + "\n", + "sns.despine()\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "fig_path = '../data/results/figures'\n", + "os.makedirs(fig_path, exist_ok=True)\n", + "fig.savefig(os.path.join(fig_path, 'retrieval_accuracy_comparison.svg'), format='svg', bbox_inches='tight') \n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "ecf8bd99", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[75.0, 0.0]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "neo4j_rag_percentage" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5f316867", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[97.0, 97.0]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kg_rag_percentage" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "269c8dc7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8006" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "round(neo4j_avg)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "56494f88", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3693" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "round(kg_avg)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} |
