1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
|
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "7bed2b6f-b8b2-4256-869c-1f3fa8561d1a",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import pandas as pd\n",
"import os\n",
"from dotenv import load_dotenv\n",
"from neo4j import GraphDatabase, basic_auth\n",
"from tqdm import tqdm\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import openai\n",
"import numpy as np\n"
]
},
{
"cell_type": "markdown",
"id": "b0b0240a-87ba-4e3e-b672-394cb2dd3c3a",
"metadata": {},
"source": [
"## Load data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4a162750-64e9-4906-84b3-7858b82a17da",
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('../data/rag_comparison_data.csv')\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "ffc91ffd-d76c-427f-9cbc-50b07e62fa98",
"metadata": {},
"source": [
"## Custom functions"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "82a7dd0b-01a5-4501-82bc-96224074c9d0",
"metadata": {},
"outputs": [],
"source": [
"\n",
"def connect_to_graph():\n",
" auth = basic_auth(graph_usr, graph_psw)\n",
" return GraphDatabase.driver(graph_uri, auth=auth)\n",
"\n",
"def run_cypher(cypher_query):\n",
" with connect_to_graph() as sdb:\n",
" with sdb.session() as session:\n",
" result = session.run(cypher_query)\n",
" out = []\n",
" for row in result:\n",
" out.append((row['d_name'], row['r_type'], row['g_label'], row['g_name'], row['relationship_properties']))\n",
" return out\n",
"\n",
"\n",
"def lucene_search(query, source):\n",
" source_search_uri = f\"https://spoke.rbvi.ucsf.edu/api/v1/search/{source}/{query}\"\n",
" source_search_resp = requests.get(source_search_uri)\n",
" return source_search_resp\n",
"\n",
"def get_context_using_lucene_search(query):\n",
" source = 'Disease'\n",
" source_resp = lucene_search(query, source) \n",
" if source_resp.status_code == 200:\n",
" source_resp_data = source_resp.json()\n",
" source_name = source_resp_data[0]['name']\n",
" else:\n",
" return ''\n",
" cypher = f'''\n",
" MATCH(d:Disease{{name:\"{source_name}\"}})-[r]-(g) \n",
" RETURN DISTINCT d.name AS d_name, TYPE(r) AS r_type, LABELS(g) AS g_label, g.name AS g_name, PROPERTIES(r) AS relationship_properties \n",
" '''\n",
" graph_out = run_cypher(cypher)\n",
" context = ''\n",
" if len(graph_out) > 0: \n",
" for i in graph_out:\n",
" try:\n",
" prov = ', '.join(i[3][\"sources\"]) \n",
" except:\n",
" try:\n",
" prov = i[3][\"source\"]\n",
" except:\n",
" prov = ''\n",
" context += 'Disease ' + i[0] + ' ' + i[1].split('_')[0].lower() + ' ' + i[2][0] + ' ' + i[3] + f'. Provenance of this association is {prov}. ' + '\\n' + str(i[4]) \n",
" return context\n",
"\n",
"\n",
"\n",
"def chat_completion_with_token_usage(instruction, system_prompt, chat_model_id, chat_deployment_id, temperature):\n",
" response = openai.ChatCompletion.create(\n",
" temperature=temperature,\n",
" deployment_id=chat_deployment_id,\n",
" model=chat_model_id,\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": instruction}\n",
" ]\n",
" )\n",
" return response['choices'][0]['message']['content'], response.usage.total_tokens\n",
" "
]
},
{
"cell_type": "markdown",
"id": "09225048-3d6e-46e0-91c3-a750c9a49c34",
"metadata": {},
"source": [
"## OpenAI credentials"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "07387b5e-c158-4e16-bec1-fb6dbcba80f3",
"metadata": {},
"outputs": [],
"source": [
"load_dotenv(os.path.join(os.path.expanduser('~'), '.gpt_config.env'))\n",
"\n",
"api_key = os.environ.get('API_KEY')\n",
"api_version = os.environ.get('API_VERSION')\n",
"resource_endpoint = os.environ.get('RESOURCE_ENDPOINT')\n",
"openai.api_type = 'azure'\n",
"openai.api_key = api_key\n",
"if resource_endpoint:\n",
" openai.api_base = resource_endpoint\n",
"if api_version:\n",
" openai.api_version = api_version\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "71f31b21-527e-42cd-9998-9ebf328af4f1",
"metadata": {},
"source": [
"## Graph credentials"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "7e45c21d-712a-4957-adcc-2dee2ca12e06",
"metadata": {},
"outputs": [],
"source": [
"load_dotenv(os.path.join(os.path.expanduser('~'), '.spoke_neo4j_config.env'))\n",
"graph_usr = os.environ.get('NEO4J_USER')\n",
"graph_psw = os.environ.get('NEO4J_PSW')\n",
"graph_uri = os.environ.get('NEO4J_URL')\n",
"database = os.environ.get('NEO4J_DB')\n"
]
},
{
"cell_type": "markdown",
"id": "532c603b-c1d3-4d8a-abea-ba10c186f726",
"metadata": {},
"source": [
"## Setting system prompt for LLM"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d57b2ec-c5ca-49ad-9d7f-e566009838d8",
"metadata": {},
"outputs": [],
"source": [
"system_prompt = '''\n",
" You are an expert biomedical researcher. \n",
" For answering the Question at the end with brevity, you need to first read the Context provided. \n",
" Then give your final answer briefly, by citing the Provenance information from the context. \n",
" You can find Provenance from the Context statement 'Provenance of this association is <Provenance>'. \n",
" Do not forget to cite the Provenance information. \n",
" Note that, if Provenance is 'GWAS' report it as 'GWAS Catalog'. \n",
" If Provenance is 'DISEASES' report it as 'DISEASES database - https://diseases.jensenlab.org'. \n",
" Additionally, when providing drug or medication suggestions, give maximum information available and then advise the user to seek guidance from a healthcare professional as a precautionary measure.\n",
"'''\n",
"\n",
"chat_model = 'gpt-4-32k'\n",
"temperature = 0\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "422b4993-ebfe-4088-af9a-0a6d013649e2",
"metadata": {},
"source": [
"## Example query for Lucene based RAG"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "5a412550-35f5-4b33-b15f-36a1743234a1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Yes, Parkinson's disease is associated with the PINK1 gene. Provenance of this association is not provided in the context.\n"
]
}
],
"source": [
"\n",
"query = \"Is Parkinson's disease associated with PINK1 gene?\"\n",
"\n",
"context = get_context_using_lucene_search(query)\n",
"\n",
"prompt = f'''\n",
"Context: {context}\n",
"Question: {query}\n",
"'''\n",
"output, token_usage = chat_completion_with_token_usage(prompt, system_prompt, chat_model, chat_model, temperature)\n",
"\n",
"print(output)\n"
]
},
{
"cell_type": "markdown",
"id": "2c0d739f-d3e0-4137-9857-ab98b0dd52ee",
"metadata": {},
"source": [
"## Example query for Lucene based RAG (after perturbation)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "4c01aef6-a01b-4c94-a8cb-f02858b9d5d9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Yes, Parkinson's disease is associated with the PINK1 gene. Provenance of this association is not provided in the context.\n"
]
}
],
"source": [
"\n",
"query = \"Is parkinson's disease associated with pink1 gene?\"\n",
"\n",
"context = get_context_using_lucene_search(query)\n",
"\n",
"prompt = f'''\n",
"Context: {context}\n",
"Question: {query}\n",
"'''\n",
"output, token_usage = chat_completion_with_token_usage(prompt, system_prompt, chat_model, chat_model, temperature)\n",
"\n",
"print(output)\n"
]
},
{
"cell_type": "markdown",
"id": "9b73767d-8575-47c0-95aa-923258d2af84",
"metadata": {},
"source": [
"## Lucene based context extraction"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "83543617-01ca-49da-aef4-8e936fd39218",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100it [00:43, 2.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 14.1 s, sys: 1.04 s, total: 15.1 s\n",
"Wall time: 43.3 s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"%%time\n",
"\n",
"lucene_based_context_list = []\n",
"for row_index, row in tqdm(data.iterrows()):\n",
" query = row['question']\n",
" lucene_based_context_list.append(get_context_using_lucene_search(query))\n",
"\n",
"data['extracted_context'] = lucene_based_context_list\n",
"# data_non_empty_context = data[data.extracted_context != '']\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "85138849-4f58-45de-9a28-7eecb9612412",
"metadata": {},
"source": [
"## Lucene based context extraction - after query perturbation"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "24dd2e6e-c9c5-4341-b14d-e5b37ded88dc",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100it [01:00, 1.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 19.5 s, sys: 1.32 s, total: 20.8 s\n",
"Wall time: 1min\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"%%time\n",
"\n",
"lucene_based_context_list = []\n",
"for row_index, row in tqdm(data.iterrows()):\n",
" query = row['question_perturbed']\n",
" lucene_based_context_list.append(get_context_using_lucene_search(query))\n",
"\n",
"data['extracted_context_after_perturbation'] = lucene_based_context_list\n",
"\n",
"# data_non_empty_context_after_perturbation = data[data.extracted_context_after_perturbation != '']\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "eaf7d711-1a8f-45d4-ba60-d8519e06c4c8",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100it [05:10, 3.11s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.94 s, sys: 484 ms, total: 2.42 s\n",
"Wall time: 5min 10s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"%%time\n",
"\n",
"full_text_track_output = []\n",
"full_text_track_token_usage = []\n",
"for row_index, row in tqdm(data.iterrows()):\n",
" query = row['question']\n",
" context = row['extracted_context']\n",
" prompt = f'''\n",
" Context: {context}\n",
" Question: {query}\n",
" '''\n",
" try:\n",
" output, token_usage = chat_completion_with_token_usage(prompt, system_prompt, chat_model, chat_model, temperature)\n",
" full_text_track_output.append(output)\n",
" full_text_track_token_usage.append(token_usage)\n",
" except:\n",
" full_text_track_output.append(None)\n",
" full_text_track_token_usage.append(None)\n",
" \n",
"data['full_text_index_answer'] = full_text_track_output\n",
"data['token_usage'] = full_text_track_token_usage\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "d596ec29-dae9-4b7b-9cf4-6f515943d946",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100it [04:53, 2.94s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.81 s, sys: 503 ms, total: 2.31 s\n",
"Wall time: 4min 53s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"%%time\n",
"\n",
"full_text_track_output = []\n",
"full_text_track_token_usage = []\n",
"for row_index, row in tqdm(data.iterrows()):\n",
" query = row['question_perturbed']\n",
" context = row['extracted_context_after_perturbation']\n",
" prompt = f'''\n",
" Context: {context}\n",
" Question: {query}\n",
" '''\n",
" try:\n",
" output, token_usage = chat_completion_with_token_usage(prompt, system_prompt, chat_model, chat_model, temperature)\n",
" full_text_track_output.append(output)\n",
" full_text_track_token_usage.append(token_usage)\n",
" except:\n",
" full_text_track_output.append(None)\n",
" full_text_track_token_usage.append(None)\n",
" \n",
"data['full_text_index_answer_after_perturbation'] = full_text_track_output\n",
"data['token_usage_after_perturbation'] = full_text_track_token_usage\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "06f551db-34a6-4aca-b591-a30b66f095c8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Correct retrieval percentage for Full-text index based retrieval 61.0%\n"
]
}
],
"source": [
"data_no_nan = data.dropna(subset=['full_text_index_answer'])\n",
"data_yes_count_df = data_no_nan[data_no_nan.full_text_index_answer.str.contains('Yes')]\n",
"data_yes_count = data_yes_count_df.shape[0]\n",
"indices_to_remove = data_yes_count_df.index.tolist()\n",
"data_no_nan = data_no_nan.drop(indices_to_remove)\n",
"data_no_nan.loc[:, 'contains_pvalue'] = data_no_nan.apply(lambda row: str(row['gwas_pvalue']) in str(row['full_text_index_answer']), axis=1)\n",
"data_p_value_correct_retrieval_count = data_no_nan[data_no_nan.contains_pvalue==True].shape[0]\n",
"data_total_correct_retrieval = data_yes_count + data_p_value_correct_retrieval_count\n",
"\n",
"full_text_index_based_total_correct_retrieval_percentage = 100*data_total_correct_retrieval/data.shape[0]\n",
"\n",
"print(f'Correct retrieval percentage for Full-text index based retrieval {full_text_index_based_total_correct_retrieval_percentage}%')\n"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "050699a8-ff46-4b4b-a31a-808ef0799e02",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Correct retrieval percentage for Full-text index based retrieval after name perturbation 58.0%\n"
]
}
],
"source": [
"data_no_nan = data.dropna(subset=['full_text_index_answer_after_perturbation'])\n",
"data_yes_count_df = data_no_nan[data_no_nan.full_text_index_answer_after_perturbation.str.contains('Yes')]\n",
"data_yes_count = data_yes_count_df.shape[0]\n",
"indices_to_remove = data_yes_count_df.index.tolist()\n",
"data_no_nan = data_no_nan.drop(indices_to_remove)\n",
"data_no_nan.loc[:, 'contains_pvalue'] = data_no_nan.apply(lambda row: str(row['gwas_pvalue']) in str(row['full_text_index_answer_after_perturbation']), axis=1)\n",
"data_p_value_correct_retrieval_count = data_no_nan[data_no_nan.contains_pvalue==True].shape[0]\n",
"data_total_correct_retrieval = data_yes_count + data_p_value_correct_retrieval_count\n",
"\n",
"full_text_index_based_total_correct_retrieval_perturbed_percentage = 100*data_total_correct_retrieval/data.shape[0]\n",
"\n",
"print(f'Correct retrieval percentage for Full-text index based retrieval after name perturbation {full_text_index_based_total_correct_retrieval_perturbed_percentage}%')\n"
]
},
{
"cell_type": "markdown",
"id": "e4a46cb6-60ea-426a-96f6-f047985b4178",
"metadata": {},
"source": [
"## KG-RAG"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "db00c5f2-a1b7-4a77-b26f-5fc0dbdb81dc",
"metadata": {},
"outputs": [],
"source": [
"kg_rag = pd.read_csv('../data/results/kg_rag_output.csv')"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "dbf582a6-079b-4676-a9b7-61bbe9f544c8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Correct retrieval percentage for KG-RAG 97.0%\n"
]
}
],
"source": [
"\n",
"kg_rag_no_nan = kg_rag.dropna(subset=['kg_rag_answer'])\n",
"kg_rag_yes_count_df = kg_rag_no_nan[kg_rag_no_nan.kg_rag_answer.str.contains('Yes')]\n",
"kg_rag_yes_count = kg_rag_yes_count_df.shape[0]\n",
"indices_to_remove = kg_rag_yes_count_df.index.tolist()\n",
"kg_rag_no_nan = kg_rag_no_nan.drop(indices_to_remove)\n",
"kg_rag_no_nan.loc[:, 'contains_pvalue'] = kg_rag_no_nan.apply(lambda row: str(row['gwas_pvalue']) in str(row['kg_rag_answer']), axis=1)\n",
"kg_rag_p_value_correct_retrieval_count = kg_rag_no_nan[kg_rag_no_nan.contains_pvalue==True].shape[0]\n",
"kg_rag_total_correct_retrieval = kg_rag_yes_count + kg_rag_p_value_correct_retrieval_count\n",
"\n",
"\n",
"kg_rag_total_correct_retrieval_percentage = 100*kg_rag_total_correct_retrieval/kg_rag.shape[0]\n",
"\n",
"print(f'Correct retrieval percentage for KG-RAG {kg_rag_total_correct_retrieval_percentage}%')\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "d4212209-cf87-4e14-b5fa-342c33080117",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Correct retrieval percentage for KG-RAG after name perturbation 97.0%\n"
]
}
],
"source": [
"\n",
"\n",
"kg_rag_no_nan = kg_rag.dropna(subset=['kg_rag_answer_perturbed'])\n",
"kg_rag_yes_count_df = kg_rag_no_nan[kg_rag_no_nan.kg_rag_answer_perturbed.str.contains('Yes')]\n",
"kg_rag_yes_count = kg_rag_yes_count_df.shape[0]\n",
"indices_to_remove = kg_rag_yes_count_df.index.tolist()\n",
"kg_rag_no_nan = kg_rag_no_nan.drop(indices_to_remove)\n",
"kg_rag_no_nan.loc[:, 'contains_pvalue'] = kg_rag_no_nan.apply(lambda row: str(row['gwas_pvalue']) in str(row['kg_rag_answer_perturbed']), axis=1)\n",
"kg_rag_p_value_correct_retrieval_count = kg_rag_no_nan[kg_rag_no_nan.contains_pvalue==True].shape[0]\n",
"kg_rag_total_correct_retrieval_perturbed = kg_rag_yes_count + kg_rag_p_value_correct_retrieval_count\n",
"\n",
"\n",
"kg_rag_total_correct_retrieval_perturbed_percentage = 100*kg_rag_total_correct_retrieval_perturbed/kg_rag.shape[0]\n",
"\n",
"print(f'Correct retrieval percentage for KG-RAG after name perturbation {kg_rag_total_correct_retrieval_perturbed_percentage}%')\n"
]
},
{
"cell_type": "markdown",
"id": "14046f1f-91ea-4a33-b7a0-15c42d335eec",
"metadata": {},
"source": [
"## Cypher-RAG"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "9fc3b113-bdd8-46cf-80a7-5611005ee613",
"metadata": {},
"outputs": [],
"source": [
"neo4j_rag = pd.read_csv('../data/results/cypher_rag_output.csv')\n"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "acf53878-a5a9-42ff-ba51-3ec71482ca36",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Correct retrieval percentage for Cypher-RAG 75.0%\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/p1/h56gxdhs5vgb0ztp7h4z606h0000gn/T/ipykernel_70250/1568521084.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" neo4j_rag_no_nan.loc[:, 'contains_pvalue'] = neo4j_rag_no_nan.apply(lambda row: str(row['gwas_pvalue']) in str(row['neo4j_rag_answer']), axis=1)\n"
]
}
],
"source": [
"\n",
"neo4j_rag_no_nan = neo4j_rag.dropna(subset=['neo4j_rag_answer'])\n",
"neo4j_rag_no_nan.loc[:, 'contains_pvalue'] = neo4j_rag_no_nan.apply(lambda row: str(row['gwas_pvalue']) in str(row['neo4j_rag_answer']), axis=1)\n",
"neo4j_rag_yes_count_df = neo4j_rag_no_nan[neo4j_rag_no_nan.neo4j_rag_answer.str.contains('Yes')]\n",
"neo4j_rag_yes_count = neo4j_rag_yes_count_df.shape[0]\n",
"indices_to_remove = neo4j_rag_yes_count_df.index.tolist()\n",
"neo4j_rag_no_nan = neo4j_rag_no_nan.drop(indices_to_remove)\n",
"neo4j_rag_p_value_correct_retrieval_count = neo4j_rag_no_nan[neo4j_rag_no_nan.contains_pvalue==True].shape[0]\n",
"neo4j_rag_total_correct_retrieval = neo4j_rag_yes_count + neo4j_rag_p_value_correct_retrieval_count\n",
"\n",
"neo4j_rag_total_correct_retrieval_percentage = 100*neo4j_rag_total_correct_retrieval/neo4j_rag.shape[0]\n",
"\n",
"print(f'Correct retrieval percentage for Cypher-RAG {neo4j_rag_total_correct_retrieval_percentage}%')\n"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "24c274c9-25b6-4db9-85cb-92149b17b685",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Correct retrieval percentage for Cypher-RAG after name perturbation 0.0%\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/p1/h56gxdhs5vgb0ztp7h4z606h0000gn/T/ipykernel_70250/1404871373.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" neo4j_rag_no_nan.loc[:, 'contains_pvalue'] = neo4j_rag_no_nan.apply(lambda row: str(row['gwas_pvalue']) in str(row['neo4j_rag_answer_perturbed']), axis=1)\n"
]
}
],
"source": [
"\n",
"neo4j_rag_no_nan = neo4j_rag.dropna(subset=['neo4j_rag_answer_perturbed'])\n",
"neo4j_rag_no_nan.loc[:, 'contains_pvalue'] = neo4j_rag_no_nan.apply(lambda row: str(row['gwas_pvalue']) in str(row['neo4j_rag_answer_perturbed']), axis=1)\n",
"neo4j_rag_yes_count_df = neo4j_rag_no_nan[neo4j_rag_no_nan.neo4j_rag_answer_perturbed.str.contains('Yes')]\n",
"neo4j_rag_yes_count = neo4j_rag_yes_count_df.shape[0]\n",
"indices_to_remove = neo4j_rag_yes_count_df.index.tolist()\n",
"neo4j_rag_no_nan = neo4j_rag_no_nan.drop(indices_to_remove)\n",
"neo4j_rag_p_value_correct_retrieval_count = neo4j_rag_no_nan[neo4j_rag_no_nan.contains_pvalue==True].shape[0]\n",
"neo4j_rag_total_correct_retrieval_perturbed = neo4j_rag_yes_count + neo4j_rag_p_value_correct_retrieval_count\n",
"\n",
"neo4j_rag_total_correct_retrieval_perturbed_percentage = 100*neo4j_rag_total_correct_retrieval_perturbed/neo4j_rag.shape[0]\n",
"\n",
"print(f'Correct retrieval percentage for Cypher-RAG after name perturbation {neo4j_rag_total_correct_retrieval_perturbed_percentage}%')\n"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "723e8ae3-b3ba-4589-b4eb-70b33e2facc7",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAekAAAEhCAYAAABbdVhOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA3jElEQVR4nO3deXhMZ/8G8HuyTZZJhkRWjWRILEGsRWitIaG1pqjKKxGlCCKqJW9LLCWltW+hCFpbVe2l1RBLEWssLREajS1JlcwIss75/eHnvEaiMsnEHHJ/rmuuyzznmWe+M3Mud845zzlHJgiCACIiIpIcE2MXQERERMVjSBMREUkUQ5qIiEiiGNJEREQSxZAmIiKSKIY0ERGRRDGkiYiIJIohTUREJFEMaQCCIECj0YDXdSEiIilhSAO4f/8+lEol7t+/b+xSiIiIREYN6YMHD6Jr165wc3ODTCbD1q1bdZYLgoCJEyfC1dUVVlZW8Pf3R0pKik6fu3fvon///rCzs0OlSpUwaNAgZGdnv8RPQUREVD6MGtIPHjxAgwYNsGjRomKXz5w5E/Pnz0dsbCwSExNhY2ODgIAA5OTkiH369++P33//HXv37sXOnTtx8OBBDBky5GV9BCIionIjk8oNNmQyGbZs2YIePXoAeLwV7ebmho8//hhjx44FAKjVajg7O2PVqlV4//33cfHiRfj4+ODEiRNo2rQpAGDPnj3o0qULbty4ATc3txK9t0ajgVKphFqthp2dXbl8PiIiIn1J9ph0amoq0tPT4e/vL7YplUo0b94cR48eBQAcPXoUlSpVEgMaAPz9/WFiYoLExMTnjp2bmwuNRqPzICIikhrJhnR6ejoAwNnZWafd2dlZXJaeng4nJyed5WZmZrC3txf7FCcmJgZKpVJ8uLu7G7h6IiKispNsSJenqKgoqNVq8XH9+nVjl0RERFSEZEPaxcUFAJCRkaHTnpGRIS5zcXFBZmamzvKCggLcvXtX7FMcuVwOOzs7nQcREZHUSDakVSoVXFxcEB8fL7ZpNBokJibCz88PAODn54esrCycOnVK7LNv3z5otVo0b978pddMRERkSGbGfPPs7GxcuXJFfJ6amoqkpCTY29ujWrVqGD16NL744gt4e3tDpVJhwoQJcHNzE2eA16lTB4GBgRg8eDBiY2ORn5+PESNG4P333y/xzG4iIiKpMuopWAkJCWjXrl2R9pCQEKxatQqCICA6OhrLli1DVlYW3nrrLSxevBg1a9YU+969excjRozAjh07YGJigqCgIMyfPx8KhaLEdfAULCIikiLJnCdtTAxpIiKSIqPu7qbXg2yyzNglAACE6Ar/96bkSGHd4HpBrzLJThwjIiKq6BjSREREEsWQJiIikiiGNBERkUQxpImIiCSKIU1ERCRRDGkiIiKJYkgTERFJFEOaiIhIohjSREREEsWQJiIikiiGNBERkUQxpImIiCSKIU1ERCRRDGkiIiKJYkgTERFJFEOaiIhIohjSREREEsWQJiIikiiGNBERkUQxpImIiCSKIU1ERCRRDGkiIiKJYkgTERFJFEOaiIhIohjSREREEsWQJiIikiiGNBERkUQxpImIiCSKIU1ERCRRDGkiIiKJYkgTERFJFEOaiIhIohjSREREEsWQJiIikiiGNBERkURJOqQLCwsxYcIEqFQqWFlZoUaNGpg6dSoEQRD7CIKAiRMnwtXVFVZWVvD390dKSooRqyYiIjIMSYf0jBkzsGTJEixcuBAXL17EjBkzMHPmTCxYsEDsM3PmTMyfPx+xsbFITEyEjY0NAgICkJOTY8TKiYiIys7M2AX8myNHjqB79+545513AACenp5Yv349jh8/DuDxVvTcuXPx+eefo3v37gCANWvWwNnZGVu3bsX7779vtNqJiIjKStJb0i1btkR8fDwuX74MADh79iwOHz6Mzp07AwBSU1ORnp4Of39/8TVKpRLNmzfH0aNHnztubm4uNBqNzoOIiEhqJL0lPX78eGg0GtSuXRumpqYoLCzEtGnT0L9/fwBAeno6AMDZ2Vnndc7OzuKy4sTExGDy5MnlVzgREZEBSHpL+vvvv8fatWuxbt06nD59GqtXr8bXX3+N1atXl2ncqKgoqNVq8XH9+nUDVUxERGQ4kt6S/uSTTzB+/Hjx2HL9+vXx119/ISYmBiEhIXBxcQEAZGRkwNXVVXxdRkYGGjZs+Nxx5XI55HJ5udZORERUVpLekn748CFMTHRLNDU1hVarBQCoVCq4uLggPj5eXK7RaJCYmAg/P7+XWisREZGhSXpLumvXrpg2bRqqVauGunXr4syZM5g9ezbCwsIAADKZDKNHj8YXX3wBb29vqFQqTJgwAW5ubujRo4dxiyciIiojSYf0ggULMGHCBAwfPhyZmZlwc3PDRx99hIkTJ4p9Pv30Uzx48ABDhgxBVlYW3nrrLezZsweWlpZGrJyIiKjsZMLTl++qoDQaDZRKJdRqNezs7IxdzitHNllm7BIAAEJ0hV+VJUcK6wbXC3qV6X1MOjo6Gn/99Vd51EJERERP0Tukt23bhho1aqBDhw5Yt24dcnNzy6MuIiKiCk/vkE5KSsKJEydQt25dREREwMXFBcOGDcOJEyfKoz4iIqIKq1SnYDVq1Ajz58/HrVu3sGLFCty4cQOtWrWCr68v5s2bB7Vabeg6iYiIKpwynSctCALy8/ORl5cHQRBQuXJlLFy4EO7u7ti4caOhaiQiIqqQShXSp06dwogRI+Dq6orIyEg0atQIFy9exIEDB5CSkoJp06Zh1KhRhq6VniWTSeNBRETlQu+Qrl+/Plq0aIHU1FSsWLEC169fx5dffgkvLy+xT79+/fD3338btFAiIqKKRu+LmfTp0wdhYWGoWrXqc/tUqVJFvHQnERERlY7eIT1hwoTyqIOIiIieoffu7qCgIMyYMaNI+8yZM9G7d2+DFEVERESlCOmDBw+iS5cuRdo7d+6MgwcPGqQoIiIiKkVIZ2dnw8LCoki7ubk5NBqNQYoiIiKiUs7uLu4c6A0bNsDHx8cgRREREVEpJ4716tULV69eRfv27QEA8fHxWL9+PTZt2mTwAomIiCoqvUO6a9eu2Lp1K6ZPn44ffvgBVlZW8PX1xa+//oo2bdqUR41EREQVkt4hDQDvvPMO3nnnHUPXQkRERE8p07W7iYiIqPzovSVdWFiIOXPm4Pvvv0daWhry8vJ0lt+9e9dgxREREVVkem9JT548GbNnz0bfvn2hVqsxZswY9OrVCyYmJpg0aVI5lEhERFQx6R3Sa9euxTfffIOPP/4YZmZm6NevH5YvX46JEyfi2LFj5VEjERFRhaR3SKenp6N+/foAAIVCAbVaDQB49913sWvXLsNWR0REVIHpHdJvvPEGbt++DQCoUaMGfvnlFwDAiRMnIJfLDVsdERFRBaZ3SPfs2RPx8fEAgJEjR2LChAnw9vbGgAEDEBYWZvACiYiIKiq9Z3d/+eWX4r/79u0LDw8PHDlyBN7e3ujatatBiyMiIqrI9Arp/Px8fPTRR5gwYQJUKhUAoEWLFmjRokW5FEdERFSR6bW729zcHJs3by6vWoiIiOgpeh+T7tGjB7Zu3VoOpRAREdHT9D4m7e3tjSlTpuC3335DkyZNYGNjo7N81KhRBiuOiIioIpMJgiDo84Inx6KLHUwmw59//lnmol42jUYDpVIJtVoNOzs7Y5dTcjKZsSsAAMgmGbuCx4RovVZleglkk42/jpbHelFYWIj8/HyDj0sVg7m5OUxNTUvUV+8t6dTUVL0LIiJ6HQiCgPT0dGRlZRm7FHrFVapUCS4uLpC9YGOrVLeqJCKqiJ4EtJOTE6ytrV/4HyzRswRBwMOHD5GZmQkAcHV1/df+eof0iy5YsnLlSn2HJCKSvMLCQjGgHRwcjF0OvcKsrKwAAJmZmXBycvrXXd96h/S9e/d0nufn5+PChQvIyspC+/bt9R2OiOiV8OQYtLW1tZErodfBk/UoPz/fsCG9ZcuWIm1arRbDhg1DjRo19B2OiOiVwl3cZAglXY/0Pk+62EFMTDBmzBjMmTPHEMMRERERDBTSAHD16lUUFBQYajgiIqIKT+/d3WPGjNF5LggCbt++jV27diEkJMRghRERvSpe5h5w/a5sUXZt27ZFw4YNMXfuXACAp6cnRo8ejdGjR7/cQl6ia9euQaVS4cyZM2jYsKFRa9F7S/rMmTM6j3PnzgEAZs2aJf6IhnTz5k0EBwfDwcEBVlZWqF+/Pk6ePCkuFwQBEydOhKurK6ysrODv74+UlBSD10FE9CoKDQ2FTCYr8rhy5cpLeZ8nD09PzzKN37Zt2xL9YVDSfq8Kvbek9+/fXx51FOvevXto1aoV2rVrh927d8PR0REpKSmoXLmy2GfmzJmYP38+Vq9eDZVKhQkTJiAgIAB//PEHLC0tX1qtRERSFRgYiLi4OJ02R0dHg77HvHnzdG5l7Orqiri4OAQGBgJAia+wRbr03pJOTU0tdks1JSUF165dM0RNohkzZsDd3R1xcXFo1qwZVCoVOnXqJM4iFwQBc+fOxeeff47u3bvD19cXa9aswa1bt8r1JiAymTQeREQlIZfL4eLiovMwNTVFaGgoevToodN39OjRaNu2rd7voVQqdcYH/ndVLRcXF2RkZKBz585QKBRwdnbGf/7zH9y5cwcAkJCQAAsLCxw6dEgcb+bMmXByckJGRgZCQ0Nx4MABzJs3T9wyL2neeHp6Yvr06QgLC4OtrS2qVauGZcuW6fQ5fvw4GjVqBEtLSzRt2hRnzpwpMs6FCxdKXX9Z6B3SoaGhOHLkSJH2xMREhIaGlqmYZ23fvh1NmzZF79694eTkhEaNGuGbb74Rl6empiI9PR3+/v5im1KpRPPmzXH06NHnjpubmwuNRqPzICKi8vHkOhqNGjXCyZMnsWfPHmRkZKBPnz4A/reL+j//+Q/UajXOnDmDCRMmYPny5XB2dsa8efPg5+eHwYMH4/bt27h9+zbc3d1L/P6zZs0Sw3f48OEYNmwYkpOTAQDZ2dl499134ePjg1OnTmHSpEkYO3asQesvi1Idk27VqlWR9hYtWiApKalMxTzrzz//xJIlS+Dt7Y2ff/4Zw4YNw6hRo7B69WoAjy/RB6DIl+Ds7CwuK05MTAyUSqX40OfHJiJ61ezcuRMKhUJ89O7d+6W+/8KFC9GoUSNMnz4dtWvXRqNGjbBy5Urs378fly9fBgB88cUXqFy5MoYMGYLg4GCEhISgW7duAB5vfFlYWMDa2lpnT0BJdenSBcOHD4eXlxfGjRuHKlWqiIdu161bB61WixUrVqBu3bp499138cknnxi0/rLQ+5i0TCbD/fv3i7Sr1WoUFhaWuaCnabVaNG3aFNOnTwcANGrUCBcuXEBsbGyZZpJHRUXpzFLXaDQMaiJ6bbVr1w5LliwRnz97i+HSGjp0KL777jvxeXZ2drH9zp49i/3790OhUBRZdvXqVdSsWRMWFhZYu3YtfH194eHhYdDrbvj6+or/lslkcHFxEa+dffHiRfj6+urMYfLz85NM/XqHdOvWrRETE4P169eLf8kUFhYiJiYGb731lkGKesLV1RU+Pj46bXXq1MHmzZsBQDzukZGRoXOR8oyMjH+dNi+XyyGXyw1aKxGRVNnY2MDLy6tIu4mJCZ69W7E+t+CcMmVKkV3DxcnOzkbXrl0xY8aMIsue/r/7yaHUu3fv4u7duwb7Y8Lc3FznuUwmg1arLfHrjVm/3iE9Y8YMtG7dGrVq1cLbb78NADh06BA0Gg327dtX5oKe1qpVK/G4wROXL1+Gh4cHgMf3tnZxcUF8fLwYyhqNBomJiRg2bJhBayEiet04OjriwoULOm1JSUlFQu15nJyc4OTk9MJ+jRs3xubNm+Hp6Qkzs+Jj5+rVq4iMjMQ333yDjRs3IiQkBL/++itMTB4flbWwsDD43lrg8Ybft99+i5ycHHFr+tixYwavv7T0frWPjw/OnTuHPn36IDMzE/fv38eAAQNw6dIl1KtXr0zFPCsyMhLHjh3D9OnTceXKFaxbtw7Lli1DeHg4gMd/DY0ePRpffPEFtm/fjvPnz2PAgAFwc3MrMmORiIh0tW/fHidPnsSaNWuQkpKC6OjoIqFtCOHh4bh79y769euHEydO4OrVq/j5558xcOBAFBYWorCwEMHBwQgICMDAgQMRFxeHc+fOYdasWeIYnp6eSExMxLVr13Dnzh29toT/zQcffACZTIbBgwfjjz/+wE8//YSvv/7a4PWXVqnuJ+3m5iYeJy5Pb775JrZs2YKoqChMmTIFKpUKc+fORf/+/cU+n376KR48eIAhQ4YgKysLb731Fvbs2cNzpMkopHJq3Mu+KlVF96p+3wEBAZgwYQI+/fRT5OTkICwsDAMGDMD58+cN+j5ubm747bffMG7cOHTq1Am5ubnw8PBAYGAgTExMMHXqVPz111/YuXMngMe7kJctW4Z+/fqhU6dOaNCgAcaOHYuQkBD4+Pjg0aNHSE1NLfMFUgBAoVBgx44dGDp0KBo1agQfHx/MmDEDQUFBBq2/tGTCswckXiAuLq7Y2YGbNm3Cw4cPX8lLg2o0GiiVSqjVatjZ2b2wv2T+I4Y0CpFNMnYFjwnRxv+fUjLrhvG/CgCAbLLxvxBDrRc5OTlITU2FSqXiRgCVWUnXJ713d8fExKBKlSpF2p2cnF7K1jUREVFFoXdIp6WlQaVSFWn38PBAWlqaQYoiIiKiUoS0k5OTeFONp509exYODg4GKYqIiIhKEdL9+vXDqFGjsH//fnFW2759+xAREYH333+/PGokIiKqkPSe3T116lRcu3YNHTp0EM8X02q1GDBgAI9JExERGZDeIW1hYYGNGzdi6tSpOHv2rHiP5ycXGCEiIiLDKNV50gBQs2ZN1KxZ05C1EBER0VNKFdI3btzA9u3bkZaWhry8PJ1ls2fPNkhhREREFZ3eIR0fH49u3bqhevXq4qVAr127BkEQ0Lhx4/KokYiIqELSe3Z3VFQUxo4di/Pnz8PS0hKbN2/G9evX0aZNm5d+j1IiIkmQyV7eQ8I8PT0xd+5cY5fxWtE7pC9evIgBAwYAAMzMzPDo0SMoFApMmTKl2Nt4ERGR8aWnp2PkyJGoXr065HI53N3d0bVrV8THxxu7tFJp27YtZDIZZDIZLC0tUbNmTcTExBS59SYAHD16FKampnjnnXeKHSsvLw9fffUVGjduDBsbGyiVSjRo0ACff/45bt26Vd4f5V/pHdI2NjbicWhXV1dcvXpVXHbnzh3DVUZERAZx7do1NGnSBPv27cNXX32F8+fPY8+ePWjXrp14V0Gpenbe09MGDx6M27dvIzk5GVFRUZg4cSJiY2OL9FuxYgVGjhyJgwcPFgnd3NxcdOzYEdOnT0doaCgOHjyI8+fPY/78+bhz5w4WLFhg8M+kD71DukWLFjh8+DAAoEuXLvj4448xbdo0hIWFoUWLFgYvkIiIymb48OGQyWQ4fvw4goKCULNmTdStWxdjxozBsWPHEBYWhnfffVfnNfn5+XBycsKKFSsAPN5yHTFiBEaMGAGlUokqVapgwoQJRbZcHz58iLCwMNja2qJatWpYtmyZzvLr16+jT58+qFSpEuzt7dG9e3dcu3ZNXB4aGooePXpg2rRpcHNzQ61atZ77uaytreHi4gIPDw8MHDgQvr6+2Lt3r06f7OxsbNy4EcOGDcM777yDVatW6SyfM2cODh8+jH379mHUqFFo0qQJqlWrhjZt2iA2Ntbo1//QO6Rnz56N5s2bAwAmT56MDh06YOPGjfD09BR/TCIikoa7d+9iz549CA8Ph42NTZHllSpVwocffog9e/bg9u3bYvvOnTvx8OFD9O3bV2xbvXo1zMzMcPz4ccybNw+zZ8/G8uXLdcabNWsWmjZtijNnzmD48OEYNmwYkpOTATwO/oCAANja2uLQoUP47bffoFAoEBgYqLPFHB8fj+TkZOzdu1e8/eO/EQQBhw4dwqVLl2BhYaGz7Pvvv0ft2rVRq1YtBAcHY+XKlTp/WKxfvx4dO3ZEo0aNih1bZuR5AHqHdPXq1eHr6wvg8a7v2NhYnDt3Dps3b+YFTYiIJObKlSsQBAG1a9d+bp+WLVuiVq1a+Pbbb8W2uLg49O7dGwqFQmxzd3fHnDlzUKtWLfTv3x8jR47EnDlzdMbq0qULhg8fDi8vL4wbNw5VqlTB/v37AQAbN26EVqvF8uXLUb9+fdSpUwdxcXFIS0tDQkKCOIaNjQ2WL1+OunXrom7dus+te/HixVAoFJDL5WjdujW0Wi1GjRql02fFihUIDg4GAAQGBkKtVuPAgQPi8suXLxfZWu/ZsycUCgUUCgVatmz53Pd/GfQOaSIienUUN5GqOB9++CHi4uIAABkZGdi9ezfCwsJ0+rRo0UJny9LPzw8pKSkoLCwU255sxAGPt0JdXFyQmZkJ4PGNmK5cuQJbW1sxBO3t7ZGTk6Mzv6l+/friFvHatWvFvgqFAocOHRL79e/fH0lJSfjtt9/QuXNnfPbZZzqhmpycjOPHj6Nfv34AHk927tu37wv3+i5evBhJSUkICwvDw4cPS/T9lZdSX3GMiIikz9vbGzKZDJcuXfrXfgMGDMD48eNx9OhRHDlyBCqVCm+//bbe72dubq7zXCaTQavVAnh8fLhJkyZYu3Ztkdc5OjqK/356t3y3bt3EQ6wAULVqVfHfSqUSXl5eAB7v1vby8kKLFi3g7+8P4PFWdEFBAdzc3MTXCIIAuVyOhQsXQqlUwtvbW9wd/4SrqysAwN7eXr8PXw64JU1E9Bqzt7dHQEAAFi1ahAcPHhRZnpWVBQBwcHBAjx49EBcXh1WrVmHgwIFF+iYmJuo8P3bsGLy9vWFqalqiWho3boyUlBQ4OTnBy8tL56FUKot9ja2trU4/KyurYvspFApERERg7NixEAQBBQUFWLNmDWbNmoWkpCTxcfbsWbi5uWH9+vUAHt/Zce/evThz5kyJPsPLxpAmInrNLVq0CIWFhWjWrBk2b96MlJQUXLx4EfPnz4efn5/Y78MPP8Tq1atx8eJFhISEFBknLS0NY8aMQXJyMtavX48FCxYgIiKixHX0798fVapUQffu3XHo0CGkpqYiISEBo0aNwo0bN8r8OT/66CNcvnwZmzdvxs6dO3Hv3j0MGjQI9erV03kEBQWJu7wjIyPh5+eHDh06YN68eTh9+jRSU1Px888/Y/fu3SX+A6S8cHc3EVFZlfC4r7FUr14dp0+fxrRp0/Dxxx/j9u3bcHR0RJMmTbBkyRKxn7+/P1xdXVG3bl2dXcRPDBgwAI8ePUKzZs1gamqKiIgIDBkypMR1WFtb4+DBgxg3bhx69eqF+/fvo2rVqujQoQPs7OzK/Dnt7e0xYMAATJo0CSqVCv7+/sVuoQcFBWHmzJk4d+4cfH19ER8fj7lz5yIuLg5RUVHQarVQqVTo3LkzIiMjy1xXWciEEswqGDNmTIkHfBVvsKHRaKBUKqFWq0u0okjlynwCpFGIbJKxK3hMiDb+f5SSWTeM/1UAAGSTjf+FGGq9yMnJQWpqKlQqFSwtLQ0yptRkZ2ejatWqiIuLQ69evXSWtW3bFg0bNuRlPw2kpOtTibakS7qv3tjnkxERkf60Wi3u3LmDWbNmoVKlSujWrZuxS6L/V6KQfnKOGxERvX7S0tKgUqnwxhtvYNWqVTAz45FQqeAvQURUwXl6er7wfOqnLzZCL0+pQvrkyZP4/vvvkZaWVuTi5z/++KNBCiMiIqro9D4Fa8OGDWjZsiUuXryILVu2ID8/H7///jv27dv33PPciIiISH96h/T06dMxZ84c7NixAxYWFpg3bx4uXbqEPn36oFq1auVRIxERUYWkd0hfvXpVvHG2hYUFHjx4AJlMhsjIyCK3JCMiIqLS0zukK1eujPv37wN4fA3VCxcuAHh8aTljX4iciIjodaL3xLHWrVtj7969qF+/Pnr37o2IiAjs27cPe/fuRYcOHcqjRiIiogpJ75BeuHAhcnJyAACfffYZzM3NceTIEQQFBeHzzz83eIFERFL3Mq+sVporqIWGhiIrKwtbt24V23744QcEBweLlwpNT09HTEwMdu3ahRs3boh3mAoODkZISAisra2fO/7TF7KytbVFrVq18Pnnn6N79+5F+sbExODzzz/Hl19+iU8++aTI8rLU8TrSO6SfvnWXiYkJxo8fb9CCiIiofC1fvhzh4eGIjY3FwIED8eeff6JVq1aoVKkSpk+fjvr160Mul+P8+fNYtmwZqlat+sKrkMXFxSEwMBAajQaLFy/Ge++9h9OnT6N+/fo6/VauXIlPP/0UK1euLBLShqjjdaN3SPv7+yM4OBi9evUyyAXRiYjo5Zk5cyaio6OxYcMG9OzZEwAwfPhwmJmZ4eTJkzr3cq5evTq6d+/+wgudAEClSpXg4uICFxcXTJ06FfPmzcP+/ft1QvrAgQN49OgRpkyZgjVr1uDIkSNo2bKluNwQdbxu9J44VrduXURFRcHFxQW9e/fGtm3bkJ+fXx61ERGRAY0bNw5Tp07Fzp07xYD+559/8MsvvyA8PFwnGJ+mz30ZCgoKxNtAWlhY6CxbsWIF+vXrB3Nzc/Tr10/sVx51vC70Dul58+bh5s2b2Lp1K2xsbDBgwAA4OztjyJAhOHDgQHnUSEREZbR7927MnDkT27Zt05nke+XKFQiCgFq1aun0r1KlChQKBRQKBcaNG/fC8fv16weFQgG5XI7IyEh4enqiT58+4nKNRiMeBweA4OBgfP/998jOzjZoHa8bvUMaeHwsulOnTli1ahUyMjKwdOlSHD9+HO3btzd0fUREZAC+vr7w9PREdHS0GIz/5vjx40hKSkLdunWRm5sLABg6dKgYmAqFQqf/nDlzkJSUhN27d8PHxwfLly/XmcO0fv161KhRAw0aNAAANGzYEB4eHti4caPedVQkpQrpJ9LT0xEbG4sZM2bg3LlzePPNNw1VFxERGVDVqlWRkJCAmzdvIjAwULzehZeXF2QyGZKTk3X6V69eHV5eXrCyshLbpkyZgqSkJPHxNBcXF3h5eaFTp06Ii4tD3759kZmZKS5fsWIFfv/9d5iZmYmPP/74AytXrtS7jopE75DWaDSIi4tDx44d4e7ujiVLlqBbt25ISUnBsWPHyqNG0ZdffgmZTIbRo0eLbTk5OQgPD4eDgwMUCgWCgoKQkZFRrnUQEb2KPDw8cODAAaSnp4tB7eDggI4dO2LhwoV48ODBv77eyckJXl5e4uN5mjVrhiZNmmDatGkAgPPnz+PkyZNISEjQCfmEhAQcPXoUly5d0quOikTvkHZ2dsZnn32GevXq4ejRo0hOTsbEiRNRo0aN8qhPdOLECSxduhS+vr467ZGRkdixYwc2bdqEAwcO4NatW+jVq1e51kJE9Kpyd3dHQkICMjMzERAQIJ4yVVBQgKZNm2Ljxo24ePEikpOT8d133+HSpUswNTXV+31Gjx6NpUuX4ubNm1ixYgWaNWuG1q1bo169euKjdevWePPNN8UJZOVRx6tO75Devn07bty4gTlz5qBp06blUVMR2dnZ6N+/P7755htUrlxZbFer1VixYgVmz56N9u3bo0mTJoiLi8ORI0fKfaueiOhV9cYbbyAhIQF37txBQEAAHB0dcebMGfj7+yMqKgoNGjRA06ZNsWDBAowdOxZTp07V+z0CAwOhUqkwbdo0fPfddwgKCiq2X1BQENasWYP8/HzUqFHD4HW86mRCKU48KygoQEJCAq5evYoPPvgAtra2uHXrFuzs7IpMJjCEkJAQ2NvbY86cOWjbti0aNmyIuXPnYt++fejQoQPu3buHSpUqif09PDwwevRoREZGFjtebm6uzgQEjUYDd3d3qNXqEp37LZWzAARIoxDZJGNX8FhprsRkaJJZN4z/VQB4uVfieh5DrRc5OTlITU2FSqWCpaWlQcakiquk65PeFzP566+/EBgYiLS0NOTm5qJjx46wtbXFjBkzkJubi9jY2DIV/qwNGzbg9OnTOHHiRJFl6enpsLCw0Alo4PEu+fT09OeOGRMTg8mTJxu0TiIiIkPTe3d3REQEmjZtinv37unMtuvZsyfi4+MNWtz169cRERGBtWvXGvQv16ioKKjVavFx/fp1g41NRERkKHpvSR86dAhHjhwpciUZT09P3Lx502CFAcCpU6eQmZmJxo0bi22FhYU4ePAgFi5ciJ9//hl5eXnIysrS2ZrOyMiAi4vLc8eVy+WQy+UGrZWIiMjQ9A5prVaLwsLCIu03btyAra2tQYp6okOHDjh//rxO28CBA1G7dm2MGzcO7u7uMDc3R3x8vDgpITk5GWlpafDz8zNoLURERC+b3iHdqVMnzJ07F8uWLQPw+Fqq2dnZiI6ORpcuXQxanK2tLerVq6fTZmNjAwcHB7F90KBBGDNmDOzt7WFnZ4eRI0fCz88PLVq0MGgtREREL5veIT1r1iwEBATAx8cHOTk5+OCDD5CSkoIqVapg/fr15VHjv5ozZw5MTEwQFBSE3NxcBAQEYPHixS+9DiKqGLRarbFLoNdASdejUp+CtWHDBpw7dw7Z2dlo3Lgx+vfv/8petk2j0UCpVPIUrFLiKVj/I5l1w/hfBYDX6xQsrVaLlJQUmJqawtHRERYWFhXyrkxUNoIgIC8vD3///TcKCwvh7e0NE5Pnz+HWe0saAMzMzMQ7mRARVQQmJiZQqVS4ffs2bt26Zexy6BVnbW2NatWq/WtAAyUM6e3bt6Nz584wNzfH9u3b/7Vvt27dSl4lEdErxMLCAtWqVUNBQUGxE2iJSsLU1BRmZmYl2hNTopDu0aMH0tPT4eTkhB49ejy3n0wm44pLRK81mUwGc3NzmJubG7sUqgBKFNJPH+DmpAkiIqKXQ68rjuXn56NDhw5ISUkpr3qIiIjo/+kV0ubm5jh37lx51UJERERP0fva3cHBweK9P4mIiKj86H0KVkFBAVauXIlff/0VTZo0gY2Njc7y2bNnG6w4IiKiikzvkL5w4YJ4w4vLly8bvCAiIiJ6TO+Q3r9/f3nUQURERM/Q+5h0WFgY7t+/X6T9wYMHCAsLM0hRREREVIqQXr16NR49elSk/dGjR1izZo1BiiIiIiI9dndrNBoIggBBEHD//n1YWlqKywoLC/HTTz/BycmpXIokIiKqiEoc0pUqVYJMJoNMJkPNmjWLLJfJZJg8ebJBiyMiIqrIShzS+/fvhyAIaN++PTZv3gx7e3txmYWFBTw8PODm5lYuRRIREVVEJQ7pNm3aAABSU1NRrVo13keViIionOk9cczDwwOHDx9GcHAwWrZsiZs3bwIAvv32Wxw+fNjgBRIREVVUeof05s2bERAQACsrK5w+fRq5ubkAALVajenTpxu8QCIioopK75D+4osvEBsbi2+++UbnfqqtWrXC6dOnDVocERFRRaZ3SCcnJ6N169ZF2pVKJbKysgxRExEREaEUIe3i4oIrV64UaT98+DCqV69ukKKIiIioFCE9ePBgREREIDExETKZDLdu3cLatWsxduxYDBs2rDxqJCIiqpD0vsHG+PHjodVq0aFDBzx8+BCtW7eGXC7H2LFjMXLkyPKokYiIqEKSCYIglOaFeXl5uHLlCrKzs+Hj4wOFQoFHjx7BysrK0DWWO41GA6VSCbVaDTs7uxf2l8op4gKkUYhskrEreEyILtWqbFCSWTeM/1UAAGSTjf+FSGG9ICotvXd3P2FhYQEfHx80a9YM5ubmmD17NlQqlSFrIyIiqtBKHNK5ubmIiopC06ZN0bJlS2zduhUAEBcXB5VKhTlz5iAyMrK86iQiIqpwSnxMeuLEiVi6dCn8/f1x5MgR9O7dGwMHDsSxY8cwe/Zs9O7dG6ampuVZKxERUYVS4pDetGkT1qxZg27duuHChQvw9fVFQUEBzp49y+t4ExERlYMS7+6+ceMGmjRpAgCoV68e5HI5IiMjGdBERETlpMQhXVhYCAsLC/G5mZkZFApFuRRFREREeuzuFgQBoaGhkMvlAICcnBwMHToUNjY2Ov1+/PFHw1ZIRERUQZU4pENCQnSeBwcHG7wYIiIi+p8Sh3RcXFx51kFERETPKPXFTIiIiKh8MaSJiIgkiiFNREQkUQxpIiIiiZJ0SMfExODNN9+Era0tnJyc0KNHDyQnJ+v0ycnJQXh4OBwcHKBQKBAUFISMjAwjVUxERGQ4kg7pAwcOIDw8HMeOHcPevXuRn5+PTp064cGDB2KfyMhI7NixA5s2bcKBAwdw69Yt9OrVy4hVExERGUap7ydtDH///TecnJxw4MABtG7dGmq1Go6Ojli3bh3ee+89AMClS5dQp04dHD16FC1atCjRuLyfdNnwftL/I5l1w/hfBQDeT5qorCS9Jf0stVoNALC3twcAnDp1Cvn5+fD39xf71K5dG9WqVcPRo0efO05ubi40Go3Og4iISGpemZDWarUYPXo0WrVqhXr16gEA0tPTYWFhgUqVKun0dXZ2Rnp6+nPHiomJgVKpFB/u7u7lWToREVGpvDIhHR4ejgsXLmDDhg1lHisqKgpqtVp8XL9+3QAVEhERGVaJLwtqTCNGjMDOnTtx8OBBvPHGG2K7i4sL8vLykJWVpbM1nZGRARcXl+eOJ5fLxRuFEBERSZWkt6QFQcCIESOwZcsW7Nu3DyqVSmd5kyZNYG5ujvj4eLEtOTkZaWlp8PPze9nlEhERGZSkt6TDw8Oxbt06bNu2Dba2tuJxZqVSCSsrKyiVSgwaNAhjxoyBvb097OzsMHLkSPj5+ZV4ZjcREZFUSTqklyxZAgBo27atTntcXBxCQ0MBAHPmzIGJiQmCgoKQm5uLgIAALF68+CVXSkREZHiSDumSnMJtaWmJRYsWYdGiRS+hIiIiopdH0sekiYiIKjKGNBERkUQxpImIiCSKIU1ERCRRDGkiIiKJYkgTERFJFEOaiIhIohjSREREEsWQJiIikiiGNBERkUQxpImIiCSKIU1ERCRRDGkiIiKJYkgTERFJFEOaiIhIohjSREREEsWQJiIikiiGNBERkUQxpImIiCSKIU1ERCRRDGkiIiKJYkgTERFJFEOaiIhIohjSREREEsWQJiIikiiGNBERkUQxpImIiCSKIU1ERCRRDGkiIiKJYkgTERFJFEOaiIhIohjSREREEsWQJiIikiiGNBERkUQxpImIiCSKIU1ERCRRDGkiIiKJYkgTERFJ1GsT0osWLYKnpycsLS3RvHlzHD9+3NglERERlclrEdIbN27EmDFjEB0djdOnT6NBgwYICAhAZmamsUsjIiIqNTNjF2AIs2fPxuDBgzFw4EAAQGxsLHbt2oWVK1di/PjxRfrn5uYiNzdXfK5WqwEAGo3m5RRsIJKpNsfYBTz2qv1+5UkyX4UE1o3SrBe2traQyWTlUA2RfmSCIAjGLqIs8vLyYG1tjR9++AE9evQQ20NCQpCVlYVt27YVec2kSZMwefLkl1glEb1K1Go17OzsjF0G0au/JX3nzh0UFhbC2dlZp93Z2RmXLl0q9jVRUVEYM2aM+Fyr1eLu3btwcHCoUH89azQauLu74/r16/wPiXRU9HXD1tbW2CUQAXgNQro05HI55HK5TlulSpWMU4wE2NnZVcj/iOnFuG4QGdcrP3GsSpUqMDU1RUZGhk57RkYGXFxcjFQVERFR2b3yIW1hYYEmTZogPj5ebNNqtYiPj4efn58RKyMiIiqb12J395gxYxASEoKmTZuiWbNmmDt3Lh48eCDO9qbiyeVyREdHF9n1T8R1g0gaXvnZ3U8sXLgQX331FdLT09GwYUPMnz8fzZs3N3ZZREREpfbahDQREdHr5pU/Jk1ERPS6YkgTERFJFEOaiIhIohjSZBBt27bF6NGjDT7uqlWrKvSFZqQsNDRU51K8hpKQkACZTIasrCyDj030qmFIl4PnBZaUAqe8QpUMIzQ0FDKZDDKZDBYWFvDy8sKUKVNQUFBQpjHLI1SJqPy8FudJU8nl5eXBwsJCsuPR/wQGBiIuLg65ubn46aefEB4eDnNzc0RFRek1TmFhoUGvSW/o8Yjo+bglbSRPtmq+/vpruLq6wsHBAeHh4cjPzxf7eHp6YurUqejXrx9sbGxQtWpVLFq0SGecrKwsfPjhh3B0dISdnR3at2+Ps2fPissnTZqEhg0bYvny5VCpVLC0tERoaCgOHDiAefPmiVtr165dK3ZLf+vWrTr/IRc33hMFBQUYMWIElEolqlSpggkTJuDpM/xyc3MxduxYVK1aFTY2NmjevDkSEhJ03m/VqlWoVq0arK2t0bNnT/zzzz9l+ZpfaXK5HC4uLvDw8MCwYcPg7++P7du3v/B7fPI7bt++HT4+PpDL5QgLC8Pq1auxbds28TdPSEgodtdyUlKSuE48b7y0tDSx/+TJk8X1b+jQocjLyxOXabVaxMTEQKVSwcrKCg0aNMAPP/yg8zl/+ukn1KxZE1ZWVmjXrp34vkTELWmj2r9/P1xdXbF//35cuXIFffv2RcOGDTF48GCxz1dffYX//ve/mDx5Mn7++WdERESgZs2a6NixIwCgd+/esLKywu7du6FUKrF06VJ06NABly9fhr29PQDgypUr2Lx5M3788UeYmprCw8MDly9fRr169TBlyhQAgKOjY4nrfna8J1avXo1Bgwbh+PHjOHnyJIYMGYJq1aqJn2fEiBH4448/sGHDBri5uWHLli0IDAzE+fPn4e3tjcTERAwaNAgxMTHo0aMH9uzZg+jo6DJ/z68LKysr/PPPPy/8HgHg4cOHmDFjBpYvXw4HBwe4urri0aNH0Gg0iIuLAwDY29vjyJEjJXrvZ8dzcnICAMTHx8PS0hIJCQm4du0aBg4cCAcHB0ybNg0AEBMTg++++w6xsbHw9vbGwYMHERwcDEdHR7Rp0wbXr19Hr169EB4ejiFDhuDkyZP4+OOPy+HbI3pFCWRwbdq0ESIiIoq0x8XFCUqlUhAEQQgJCRE8PDyEgoICcXnv3r2Fvn37is89PDyEwMBAnTH69u0rdO7cWRAEQTh06JBgZ2cn5OTk6PSpUaOGsHTpUkEQBCE6OlowNzcXMjMzX1jj0/U9sWXLFuHp1eTfxqtTp46g1WrFtnHjxgl16tQRBEEQ/vrrL8HU1FS4efOmzus6dOggREVFCYIgCP369RO6dOlS5PM+W1NFEBISInTv3l0QBEHQarXC3r17BblcLoSGhr7we4yLixMACElJSc8d84n9+/cLAIR79+6JbWfOnBEACKmpqS8cz97eXnjw4IHYtmTJEkGhUAiFhYVCTk6OYG1tLRw5ckTndYMGDRL69esnCIIgREVFCT4+PjrLx40bV6QmooqKW9JGVLduXZ0tUVdXV5w/f16nz7M3CfHz88PcuXMBAGfPnkV2djYcHBx0+jx69AhXr14Vn3t4eOi1pfwizxuvRYsWOrvG/fz8MGvWLBQWFuL8+fMoLCxEzZo1dV6Tm5sr1n/x4kX07NlTZ7mfnx/27NljsNpfJTt37oRCoUB+fj60Wi0++OADvPfee1i1atW/fo/A4xvP+Pr6GqyW543XoEEDWFtbi8/9/PyQnZ2N69evIzs7Gw8fPhT3+jyRl5eHRo0aAXj8mz97+V7eGIfofxjS5cDOzg5qtbpIe1ZWFpRKpfjc3NxcZ7lMJoNWqy3x+2RnZ8PV1bXIcV1A9/7YNjY2JRrPxMRE5xgyAJ1j5PqO97Ts7GyYmpri1KlTOn+YAIBCodB7vIqgXbt2WLJkCSwsLODm5gYzMzNs3LixRN+jlZVViSZ3mZg8npby9O9e3G9e0vGelp2dDQDYtWsXqlatqrOMN+4gKhmGdDmoVasWfvnllyLtp0+fLrIF9CLHjh0r8rxOnToAgMaNGyM9PR1mZmbw9PTUa1wLCwsUFhbqtDk6OuL+/ft48OCBGMRJSUklHjMxMbFIrd7e3jA1NUWjRo1QWFiIzMxMvP3228W+vk6dOsWOUVHZ2NjAy8tLp60k3+PzPO83B4Dbt2+jcuXKAPT7zc+ePYtHjx7BysoKwOPfS6FQwN3dHfb29uIkszZt2hT7+jp16mD79u06bRX5Nyd6Fmd3l4Nhw4bh8uXLGDVqFM6dO4fk5GTMnj0b69ev13tSzG+//YaZM2fi8uXLWLRoETZt2oSIiAgAgL+/P/z8/NCjRw/88ssvuHbtGo4cOYLPPvsMJ0+e/NdxPT09kZiYiGvXruHOnTvQarVo3rw5rK2t8d///hdXr17FunXrsGrVqhLXmpaWhjFjxiA5ORnr16/HggULxFpr1qyJ/v37Y8CAAfjxxx+RmpqK48ePIyYmBrt27QIAjBo1Cnv27MHXX3+NlJQULFy4sMLu6n6eknyPz+Pp6Smuj3fu3EF+fj68vLzg7u6OSZMmISUlBbt27cKsWbNKXE9eXh4GDRqEP/74Az/99BOio6MxYsQImJiYwNbWFmPHjkVkZCRWr16Nq1ev4vTp01iwYAFWr14NABg6dChSUlLwySefIDk5We91jui1Z+yD4q+r48ePCx07dhQcHR0FpVIpNG/eXNiyZYu4vLhJPBEREUKbNm3E5x4eHsLkyZOF3r17C9bW1oKLi4swb948nddoNBph5MiRgpubm2Bubi64u7sL/fv3F9LS0gRBeDzRq0GDBkXqS05OFlq0aCFYWVnpTBLasmWL4OXlJVhZWQnvvvuusGzZsiITx4obr02bNsLw4cOFoUOHCnZ2dkLlypWF//73vzoTyfLy8oSJEycKnp6egrm5ueDq6ir07NlTOHfunNhnxYoVwhtvvCFYWVkJXbt2Fb7++usKP3HsWS/6HoubACgIgpCZmSl07NhRUCgUAgBh//79giAIwuHDh4X69esLlpaWwttvvy1s2rSpyMSx4sZ7UuPEiRMFBwcHQaFQCIMHD9aZyKjVaoW5c+cKtWrVEszNzQVHR0chICBAOHDggNhnx44dgpeXlyCXy4W3335bWLlyJSeOEf0/3qpSwjw9PTF69GheGYyIqILi7m4iIiKJYkgTERFJFHd3ExERSRS3pImIiCSKIU1ERCRRDGkiIiKJYkgTERFJFEOaiIhIohjSREREEsWQJiIikiiGNBERkUT9H+bJJwhmAghsAAAAAElFTkSuQmCC",
"text/plain": [
"<Figure size 500x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"categories = ['Unperturbed', 'Perturbed']\n",
"full_text_index_rag_percentage = [full_text_index_based_total_correct_retrieval_percentage, full_text_index_based_total_correct_retrieval_perturbed_percentage]\n",
"neo4j_rag_percentage = [neo4j_rag_total_correct_retrieval_percentage, neo4j_rag_total_correct_retrieval_perturbed_percentage]\n",
"kg_rag_percentage = [kg_rag_total_correct_retrieval_percentage, kg_rag_total_correct_retrieval_perturbed_percentage]\n",
"\n",
"full_text_index_color = 'blue'\n",
"neo4j_color = 'red'\n",
"kg_rag_color = 'green'\n",
"\n",
"fig, ax = plt.subplots(figsize=(5, 3))\n",
"bar_width = 0.25\n",
"index = np.arange(len(categories))\n",
"\n",
"ax.bar(index - bar_width, full_text_index_rag_percentage, bar_width, color=full_text_index_color, label='Full-Text Index')\n",
"ax.bar(index, neo4j_rag_percentage, bar_width, color=neo4j_color, label='Cypher-RAG')\n",
"ax.bar(index + bar_width, kg_rag_percentage, bar_width, color=kg_rag_color, label='KG-RAG')\n",
"\n",
"ax.set_ylabel('Retrieval accuracy')\n",
"ax.set_xticks(index)\n",
"ax.set_xticklabels(categories)\n",
"ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))\n",
"\n",
"sns.despine()\n",
"plt.tight_layout()\n",
"plt.show()\n",
"\n",
"fig_path = '../data/results/figures'\n",
"os.makedirs(fig_path, exist_ok=True)\n",
"fig.savefig(os.path.join(fig_path, 'retrieval_accuracy_three_way_comparison.svg'), format='svg', bbox_inches='tight')\n"
]
},
{
"cell_type": "markdown",
"id": "11a596c4-597f-48c9-87d1-8e74b5522389",
"metadata": {},
"source": [
"## Token usage plot for full-text, cypher-rag and kg-rag"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "799e6fd9-7fd7-4884-bda9-350a2f7a050d",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAEgCAYAAACwxdQWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAABCGElEQVR4nO3dd1iT1xcH8G8CsgQEB8uBIgqyhNqqVMUBiopaq617170XKnUVt9RtrVpr3VoX7lW0ooC4B6KIoCCKgJstK+f3B81bIvqr0QwI5/M876O57004SSAnd7z3ioiIwBhjjP1DrO4AGGOMlSycGBhjjMngxMAYY0wGJwbGGGMyODEwxhiTwYmBMcaYDE4MjDHGZHBiYIwxJoMTg4IQEdLS0sDXCzLGSjtODAqSnp6OChUqID09Xd2hMMbYZ+HEwBhjTAYnBsYYYzI4MTDGGJPBiYExxpgMTgyMMcZkcGJgjDEmQ1vdATD1SkpKQlJSktz3s7S0hKWlpRIiYoypGyeGMm79+vXw9/eX+36zZ8/GTz/9pPiAGGNqJ+KtPRUjLS0NFSpUQGpqKoyNjdUdzkd7X4shOzsbTZs2BQCEhoZCX1+/2P24xcCY5uLEoCClNTG8T2ZmJgwNDQEAGRkZKF++vJojYoypEg8+M8YYk8GJgTHGmAxODIwxxmRwYmCMMSaDEwNjjDEZnBgYY4zJ+KzE8PbtW0XFwRhjrISQOzFIJBLMnTsXVatWhaGhIR4+fAgAmDlzJjZu3KjwABljjKmW3Ilh3rx52Lx5MwICAqCjoyOUOzk54ffff1docIwxxlRP7sSwdetW/Pbbb+jduze0tLSE8vr16+PevXsKDY4xxpjqyZ0YEhMTYWtrW6xcIpEgLy9PIUExxhhTH7kTg4ODA0JCQoqV79u3D25ubgoJijHGmPrIvez2rFmz0L9/fyQmJkIikSAwMBDR0dHYunUrjh49qowYNZ5IpO4IPuyftfRKHF76kTHlkbvF8M033+DIkSM4ffo0ypcvj1mzZiEqKgpHjhxB69atlREjY4wxFeJltxXkc5bdLnkthkwA0qZCBoCSt+w2/9Yypjx85TNjjDEZco8xmJqaQvSer7gikQh6enqwtbXFgAEDMHDgQIUEyBhjTLXkbjHMmjULYrEYPj4+8Pf3h7+/P3x8fCAWizFq1CjUrVsXI0aMwIYNG/7zsc6fP4+OHTvCysoKIpEIBw8elDlPRJg1axYsLS2hr68PLy8vxMTEyNR59eoVevfuDWNjY5iYmOCHH35ARkaGTJ2IiAg0a9YMenp6qF69OgICAorFsnfvXtjb20NPTw/Ozs44fvy4vC8NY4xpBpJTly5daO3atcXK161bR126dCEiolWrVpGTk9N/Ptbx48dp+vTpFBgYSADowIEDMucXLVpEFSpUoIMHD9KtW7eoU6dOVKtWLcrOzhbqtG3blurXr08XL16kkJAQsrW1pZ49ewrnU1NTydzcnHr37k2RkZG0a9cu0tfXp/Xr1wt1wsLCSEtLiwICAuju3bs0Y8YMKleuHN2+ffujX5fU1FQCQKmpqR99H6nCHvOSdGQQgH+OjBIQT/GDMaY8cv+JlS9fnmJiYoqVx8TEUPny5YmIKDY2lgwMDOQL5J3EIJFIyMLCgn7++Weh7M2bN6Srq0u7du0iIqK7d+8SALpy5YpQ58SJEyQSiSgxMZGIiH799VcyNTWlnJwcoc7UqVPJzs5OuN2tWzfy8fGRiadRo0Y0bNiwj46fEwMnBsY0hdxdSRUrVsSRI0eKlR85cgQVK1YEULiZvJGR0Se3YgAgLi4OycnJ8PLyEsoqVKiARo0aITw8HAAQHh4OExMTfPnll0IdLy8viMViXLp0Sajj4eEhs66Tt7c3oqOj8fr1a6FO0Z8jrSP9Oe+Tk5ODtLQ0mYMxxjSB3IPPM2fOxIgRI3D27Fk0bNgQAHDlyhUcP34c69atAwAEBQWhefPmnxVYcnIyAMDc3Fym3NzcXDiXnJwMMzMzmfPa2tqoWLGiTJ1atWoVewzpOVNTUyQnJ//fn/M+CxcuhL+//yc8M8YYK9nkTgxDhgyBg4MDfvnlFwQGBgIA7OzscO7cOXz99dcAgEmTJik2yhLIz88PEydOFG6npaWhevXqaoyIMcYUQ+7EAABNmjRBkyZNFB2LDAsLCwBASkoKLC0thfKUlBS4uroKdZ49eyZzv/z8fLx69Uq4v4WFBVJSUmTqSG//Vx3p+ffR1dWFrq7uJzwzxhgr2T57Bzdl9bPXqlULFhYWOHPmjFCWlpaGS5cuwd3dHQDg7u6ON2/e4Nq1a0Kdv//+GxKJBI0aNRLqnD9/Xmbl16CgINjZ2cHU1FSoU/TnSOtIfw5jjJUp8o5WZ2Zm0qhRo6hKlSokFouLHfJIT0+nGzdu0I0bNwgALVu2jG7cuEGPHj0iosLpqiYmJnTo0CGKiIigb7755r3TVd3c3OjSpUsUGhpKderUkZmu+ubNGzI3N6e+fftSZGQk/fnnn2RgYFBsuqq2tjYtWbKEoqKiaPbs2TxdlWclMVZmyf0nNnLkSKpXrx7t27eP9PX16Y8//qC5c+dStWrVaPv27XI91tmzZ4t8AP179O/fn4gKp6zOnDmTzM3NSVdXlzw9PSk6OlrmMV6+fEk9e/YkQ0NDMjY2poEDB1J6erpMnVu3blHTpk1JV1eXqlatSosWLSoWy549e6hu3bqko6NDjo6OdOzYMbmeS+lNDE8JuPbOEVrk/Qh9z/lr/9yPEwNjmkjuRfRq1KiBrVu3okWLFjA2Nsb169dha2uLbdu2YdeuXWX2iuHSu4jeTwA+ZXbV7H/uqx7y/dYyxuQh9+Dzq1evYGNjAwAwNjbGq1evAABNmzbFiBEjFBsdU4FhADp9wv0s/7sKY6xUkjsx2NjYIC4uDjVq1IC9vT327NmDhg0b4siRIzAxMVFCiEy5LMEf8oyxouSelTRw4EDcunULADBt2jSsWbMGenp6mDBhAnx9fRUeIGOMMdX67I16Hj16hGvXrsHW1hYuLi6KiqvUKb1jDKUTjzEwpjyfdIFbUdbW1qhQoQJ3IzHGmIaQuytp8eLF2L17t3C7W7duqFSpEqpWrSp0MTHGGCu95E4M69atE9YECgoKQlBQEE6cOIF27drxGANjjGkAubuSkpOThcRw9OhRdOvWDW3atEHNmjWFZSgYY4yVXnK3GExNTfH48WMAwMmTJ4V9DIgIBQUFio2OMcaYysndYujSpQt69eqFOnXq4OXLl2jXrh0A4MaNG7C1tVV4gIwxxlRL7sSwfPly1KxZE48fP0ZAQAAMDQ0BAElJSRg5cqTCA2SMMaZan30dAyvE1zGoFv/WMqY8crcYtm7d+n/P9+vX75ODYYwxpn5ytxikm9tI5eXlISsrCzo6OjAwMBAW1StruMWgWtxiYEx55J6V9Pr1a5kjIyMD0dHRaNq0KXbt2qWMGBljjKmQwsYYrl69ij59+uDevXuKeLhSh1sMqsUtBsaU57P2fC5KW1sbT58+VdTDMcYYUxO5B58PHz4sc5uIkJSUhF9++QVNmjRRWGCMMcbUQ+7E0LlzZ5nbIpEIVapUQatWrbB06VJFxcUYY0xN5E4MEolEGXEwxhgrIRQ2xsAYY0wzfPZGPYyxz3Pz5k3cuXNH7vs5OjrC1dVV8QGxMo8TA2Nqni88HsC5T7hfcwDBCo1EDjxfWKNxYmBMzVYAkL+9ADgqOA7GpDgxMKZmrv8cjJUUn5QY3rx5g8uXL+PZs2fFZinxInqMMVbKkZwOHz5MRkZGJBKJqEKFCmRiYiIcpqam8j7cf7K2tiYAxY6RI0cSEVHz5s2LnRs2bJjMYzx69Ijat29P+vr6VKVKFZo8eTLl5eXJ1Dl79iy5ubmRjo4O1a5dmzZt2iRXnKmpqQSAUlNT5X6OhR22fMhzKJS6n0xpPJhGk7vFMGnSJAwaNAgLFiyAgYGB4jLUB1y5ckVmy9DIyEi0bt0a33//vVA2ZMgQzJkzR7hdNK6CggL4+PjAwsICFy5cQFJSEvr164dy5cphwYIFAIC4uDj4+Phg+PDh2LFjB86cOYPBgwfD0tIS3t7eSn+OjDFWosibSQwMDOjBgwfKSFIfZdy4cVS7dm2SSCREVNhiGDdu3AfrHz9+nMRiMSUnJwtla9euJWNjY8rJySEioilTppCjo6PM/bp3707e3t4fHRe3GErxF1Z1P5nSeDCNJvcFbt7e3rh69aoSUtR/y83Nxfbt2zFo0CCIikwx3LFjBypXrgwnJyf4+fkhKytLOBceHg5nZ2eYm5sLZd7e3khLSxPmjoeHh8PLy0vmZ3l7eyM8PPyDseTk5CAtLU3mYIwxTSB3V5KPjw98fX1x9+5dODs7o1y5cjLnO3XqpLDg3nXw4EG8efMGAwYMEMp69eoFa2trWFlZISIiAlOnTkV0dDQCAwMBAMnJyTJJAYBwOzk5+f/WSUtLQ3Z2NvT19YvFsnDhQvj7+yvy6THGWIkgd2IYMmQIAMj06UuJRCKZ8QBF27hxI9q1awcrKyuhbOjQocL/nZ2dYWlpCU9PTzx48AC1a9dWWix+fn6YOHGicDstLQ3Vq1dX2s9jjDFVKTWL6D169AinT58WWgIf0qhRIwBAbGwsateuDQsLC1y+fFmmTkpKCgDAwsJC+FdaVrSOsbHxe1sLAKCrqwtdXd1Pei6MMVaSfdYiem/fvlVUHP9p06ZNMDMzg4+Pz/+td/PmTQCApaUlAMDd3R23b9/Gs2fPhDpBQUEwNjaGg4ODUOfMmTMyjxMUFAR3d3cFPgPGGCsl5B2tzs/Ppzlz5pCVlRVpaWkJM5RmzJhBv//+u8JHx4mICgoKqEaNGjR16lSZ8tjYWJozZw5dvXqV4uLi6NChQ2RjY0MeHh4y8To5OVGbNm3o5s2bdPLkSapSpQr5+fkJdR4+fEgGBgbk6+tLUVFRtGbNGtLS0qKTJ09+dIw8K6kUT4pR95MpjQfTaHK/w/7+/mRjY0Pbt28nfX19ITH8+eef1LhxY4UHSER06tQpAkDR0dEy5QkJCeTh4UEVK1YkXV1dsrW1JV9f32IfzvHx8dSuXTvS19enypUr06RJk957gZurqyvp6OiQjY0NX+BWwg+FUveTKY0H02giIiJ5Whi2trZYv349PD09YWRkhFu3bsHGxgb37t2Du7s7Xr9+rZSWTUmXlpaGChUqIDU1FcbGxnLdV82Le5ZK8v3W/gd+A+Sn0DeAlTRyjzEkJibC1ta2WLlEIkFeXp5CgmKMMaY+cicGBwcHhISEFCvft28f3NzcFBIUY4wx9ZF7uuqsWbPQv39/JCYmQiKRIDAwENHR0di6dSuOHj2qjBgZY4ypkNwthm+++QZHjhzB6dOnUb58ecyaNQtRUVE4cuQIWrdurYwYGWOMqZDcg89PnjxBtWrV3nvu4sWLaNy4sUICK2148Fm1ePBZzXjwWaPJ3WJo06YNXr16Vaw8LCwMbdu2VUhQjDHG1EfuxNC4cWO0adMG6enpQtn58+fRvn17zJ49W6HBMcYYUz25E8Pvv/+OGjVqoGPHjsjJycHZs2fh4+ODOXPmYMKECcqIkTHGmArJPcYAFO6L4OPjg6ysLERERGDhwoUYPXq0MuIrNXiMQbV4jEHNeIxBo31UYoiIiChWlp6ejp49e8LHxwcjRowQyl1cXBQbYSnBiUG1ODGoGScGjfZRiUEsFkMkEqFo1aK3pf9X9n4MJRknBtXixKBmnBg02kdd4BYXF6fsOBhjjJUQH5UYrK2tlR0HY4yxEkLuJTEA4MGDB1ixYgWioqIAFK6fNG7cOKVupckYY0w15J6ueurUKTg4OODy5ctwcXGBi4sLLl26BEdHRwQFBSkjRsYYYyok93RVNzc3eHt7Y9GiRTLl06ZNw19//YXr168rNMDSggefVYsHn9WMB581mtyJQU9PD7dv30adOnVkyu/fvw8XFxeV7gNdknBiUC1ODGrGiUGjyd2VVKVKFdy8ebNY+c2bN2FmZqaImBhjjKmR3IPPQ4YMwdChQ/Hw4UN8/fXXAAoX0Fu8eDEmTpyo8AAZY4ypltxdSUSEFStWYOnSpXj69CkAwMrKCr6+vhg7dixEZbRZzl1JqsVdSWrGXUka7ZPWSpKSrrBqZGSksIBKK04MqsWJQc04MWg0uccYWrVqhTdv3gAoTAjSpJCWloZWrVopNDjGGGOqJ3diCA4ORm5ubrHyt2/fIiQkRCFBMcYYU5+PHnwuusLq3bt3kZycLNwuKCjAyZMnUbVqVcVGxxhjTOU+OjG4urpCJBJBJBK9t8tIX18fq1evVmhwjDHGVO+ju5Li4uLw4MEDEBEuX76MuLg44UhMTERaWhoGDRqk0OB++uknIRlJD3t7e+H827dvMWrUKFSqVAmGhobo2rUrUlJSZB4jISEBPj4+MDAwgJmZGXx9fZGfny9TJzg4GF988QV0dXVha2uLzZs3K/R5MMZYafLRLQbpCqsSiURpwbyPo6MjTp8+LdzW1v435AkTJuDYsWPYu3cvKlSogNGjR6NLly4ICwsDUNjF5ePjAwsLC1y4cAFJSUno168fypUrhwULFgAoTHg+Pj4YPnw4duzYgTNnzmDw4MGwtLSEt7e3Sp8rY4yVCFSCzZ49m+rXr//ec2/evKFy5crR3r17hbKoqCgCQOHh4UREdPz4cRKLxZScnCzUWbt2LRkbG1NOTg4REU2ZMoUcHR1lHrt79+7k7e0tV6ypqakEgFJTU+W6HxFR4dw/PuQ5FErdT6Y0HkyjyT0rSdViYmJgZWUFGxsb9O7dGwkJCQCAa9euIS8vD15eXkJde3t71KhRA+Hh4QCA8PBwODs7w9zcXKjj7e2NtLQ03LlzR6hT9DGkdaSPwRhjZc0n7cegKo0aNcLmzZthZ2eHpKQk+Pv7o1mzZoiMjERycjJ0dHRgYmIicx9zc3NhxlRycrJMUpCel577f3XS0tKQnZ0NfX3998aWk5ODnJwc4XZaWtpnPVfGGCspSnRiaNeunfB/FxcXNGrUCNbW1tizZ88HP7BVZeHChfD391drDIwxpgyf1JX05s0b/P777/Dz88OrV68AANevX0diYqJCg3uXiYkJ6tati9jYWFhYWCA3N1e4ClsqJSUFFhYWAAALC4tis5Skt/+rjrGx8f9NPn5+fkhNTRWOx48ff+7TY4yxEkHuxBAREYG6deti8eLFWLJkifDBHBgYCD8/P0XHJyMjIwMPHjyApaUlGjRogHLlyuHMmTPC+ejoaCQkJMDd3R0A4O7ujtu3b+PZs2dCnaCgIBgbG8PBwUGoU/QxpHWkj/Ehurq6MDY2ljkYY0wjyDta7enpSb6+vkREZGhoSA8ePCAiorCwMLK2tlboyPikSZMoODiY4uLiKCwsjLy8vKhy5cr07NkzIiIaPnw41ahRg/7++2+6evUqubu7k7u7u3D//Px8cnJyojZt2tDNmzfp5MmTVKVKFfLz8xPqPHz4kAwMDMjX15eioqJozZo1pKWlRSdPnpQrVp6VVIonxaj7yZTGg2k0ud9hY2Njio2NJSLZxBAfH0+6uroKDa579+5kaWlJOjo6VLVqVerevbvws4mIsrOzaeTIkWRqakoGBgb07bffUlJSksxjxMfHU7t27UhfX58qV65MkyZNory8PJk6Z8+eJVdXV9LR0SEbGxvatGmT3LFyYijFn0vqfjKl8WAaTe5lt83MzHDq1Cm4ubnByMgIt27dgo2NDYKCgjBo0KAy29fOy26rlny/tf+B3wD5KfQNYCWN3GMMnTp1wpw5c5CXlwcAEIlESEhIwNSpU9G1a1eFB8gYY0y15E4MS5cuRUZGBszMzJCdnY3mzZvD1tYWRkZGmD9/vjJiZIwxpkKfvINbaGgoIiIikJGRgS+++KLY1cNlDXclqRZ3JakZdyVptM/a2pP9ixODanFiUDP+2NBocl/5vGrVqveWi0Qi6OnpwdbWFh4eHtDS0vrs4BhjjKme3C2GWrVq4fnz58jKyoKpqSkA4PXr1zAwMIChoSGePXsGGxsbnD17FtWrV1dK0CURtxhUi1sMasYtBo0m9+DzggUL8NVXXyEmJgYvX77Ey5cvcf/+fTRq1AgrV65EQkICLCwsMGHCBGXEyxhjTMnkbjHUrl0b+/fvh6urq0z5jRs30LVrVzx8+BAXLlxA165dkZSUpMhYSzRuMagWtxjUjFsMGk3uMYakpKRiW2MCQH5+vrCUtZWVFdLT0z8/OsYYU7KkpKRP+hJraWkJS0tLJUSkfnInhpYtW2LYsGH4/fff4ebmBqCwtTBixAi0atUKAHD79m3UqlVLsZEyxpgSrF+//pOW0J89ezZ++uknxQdUAsjdlZScnIy+ffvizJkzKFeuHIDC1oKnpye2bdsGc3NznD17Fnl5eWjTpo1Sgi6JuCtJtbgrSc00qCvpfS2G7OxsNG3aFEDhNVvvW4Jfk1sMn3wdw71793D//n0AgJ2dHezs7BQaWGnDiUG1ODGomQYlhvfJzMyEoaEhgMLl/suXL6/miFTrk3dws7e3h729vSJjYYwxVgJ8UmJ48uQJDh8+jISEBOTm5sqcW7ZsmUICY4yVDSL/EthiK/KxZrjAENBRXygfQrOV12qTOzGcOXMGnTp1go2NDe7duwcnJyfEx8eDiPDFF18oI0bGGGMqJPcFbn5+fpg8eTJu374NPT097N+/H48fP0bz5s3x/fffKyNGxhhjKiR3iyEqKgq7du0qvLO2NrKzs2FoaIg5c+bgm2++wYgRIxQeJGOMKU36P0dRRS/VSsb7PymN/jk0kNyJoXz58sK4gqWlJR48eABHR0cAwIsXLxQbHWOMKdtVAOf+z/k/PlDeHEBLxYdTEsidGBo3bozQ0FDUq1cP7du3x6RJk3D79m0EBgaicePGyoiRMcaU50sAnzLbXkNbC8AnJIZly5YhIyMDAODv74+MjAzs3r0bderU4RlJjLHSR4O7hD6VXImhoKAAT548gYuLC4DCbqV169YpJTDGGGPqIdesJC0tLbRp0wavX79WVjyMMcbUTO7pqk5OTnj48KEyYmGMMVYCyJ0Y5s2bh8mTJ+Po0aNISkpCWlqazMEYY6x0k3sRPbH431wiKrL4GBFBJBKhoKBAcdGVIryInmrxInpqpsA3oEQuiVEKlKglMc6ePauMOBhjjJUQcnclNW/e/P8eirRw4UJ89dVXMDIygpmZGTp37ozo6GiZOi1atIBIJJI5hg8fLlMnISEBPj4+MDAwgJmZGXx9fYvtQhccHIwvvvgCurq6sLW1xebNmxX6XBhjrLSQOzEAQEhICPr06YOvv/4aiYmJAIBt27YhNDRUocGdO3cOo0aNwsWLFxEUFCRs/pOZmSlTb8iQIcJmG0lJSQgICBDOFRQUwMfHB7m5ubhw4QK2bNmCzZs3Y9asWUKduLg4+Pj4oGXLlrh58ybGjx+PwYMH49SpUwp9PowxVhrI3ZW0f/9+9O3bF71798b169eRk5MDAEhNTcWCBQtw/PhxhQV38uRJmdubN2+GmZkZrl27Bg8PD6HcwMAAFhYW732Mv/76C3fv3sXp06dhbm4OV1dXzJ07F1OnTsVPP/0EHR0drFu3DrVq1cLSpUsBAPXq1UNoaCiWL18Ob29vhT0fxhgrDT5pVtK6deuwYcMGYWtPAGjSpAmuX7+u0ODelZqaCgCoWLGiTPmOHTtQuXJlODk5wc/PD1lZWcK58PBwODs7w9zcXCjz9vZGWloa7ty5I9Tx8vKSeUxvb2+Eh4d/MJacnByekcUY00hytxiio6Nlvq1LVahQAW/evFFETO8lkUgwfvx4NGnSBE5OTkJ5r169YG1tDSsrK0RERGDq1KmIjo5GYGAggMI9qosmBQDC7eTk5P9bJy0tDdnZ2e/d73XhwoWftIE4Y4yVdHInBgsLC8TGxqJmzZoy5aGhobCxsVFUXMWMGjUKkZGRxcYxhg4dKvzf2dkZlpaW8PT0xIMHD1C7dm2lxePn54eJEycKt9PS0lC9enWl/TzGGFMVubuShgwZgnHjxuHSpUsQiUR4+vQpduzYgcmTJyttL4bRo0fj6NGjOHv2LKpVq/Z/6zZq1AgAEBsbC6AwkaWkpMjUkd6Wjkt8qI6xsfF7WwsAoKurC2NjY5mDMcY0gdwthmnTpkEikcDT0xNZWVnw8PCArq4uJk+ejDFjxig0OCLCmDFjcODAAQQHB6NWrVr/eZ+bN28CKNwrAgDc3d0xf/58PHv2DGZmZgCAoKAgGBsbw8HBQajz7qB5UFAQ3N3dFfhsGGOsdJD7ymep3NxcxMbGIiMjAw4ODjA0NFR0bBg5ciR27tyJQ4cOwc7u3wXTK1SoAH19fTx48AA7d+5E+/btUalSJURERGDChAmoVq0azp0r3HmjoKAArq6usLKyQkBAAJKTk9G3b18MHjwYCxYsAFA4XdXJyQmjRo3CoEGD8Pfff2Ps2LE4duzYR89K4iufVYuvfFYzvvJZ7ZR55bPciWH79u3o0qULDAwMlBWTQPSBP9hNmzZhwIABePz4Mfr06YPIyEhkZmaievXq+PbbbzFjxgyZD+dHjx5hxIgRCA4ORvny5dG/f38sWrQI2tr/NpiCg4MxYcIE3L17F9WqVcPMmTMxYMCAj46VE4NqcWJQM04MaleiEkOVKlWQnZ2NTp06oU+fPvD29oaWlpay4is1ODGoFicGNePEoHbKTAxyDz4nJSXhzz//hEgkQrdu3WBpaYlRo0bhwoULyoiPMcaYismdGLS1tdGhQwfs2LEDz549w/LlyxEfH4+WLVsqdXooY4wx1ZB7VlJRBgYG8Pb2xuvXr/Ho0SNERUUpKi7GGGNq8kmL6GVlZWHHjh1o3749qlatihUrVuDbb78VlphgjDFWesndYujRoweOHj0KAwMDdOvWDTNnzuT5/owxpkHkTgxaWlrYs2fPe2cjRUZGyqxjxBhjrPSROzHs2LFD5nZ6ejp27dqF33//HdeuXSuzW3syxpim+KQxBgA4f/48+vfvD0tLSyxZsgStWrXCxYsXFRkbY4wxNZCrxZCcnIzNmzdj48aNSEtLQ7du3ZCTk4ODBw8K6w4xxhgr3T66xdCxY0fY2dkhIiICK1aswNOnT7F69WplxsYYY0wNPrrFcOLECYwdOxYjRoxAnTp1lBkTY4wxNfroFkNoaCjS09PRoEEDNGrUCL/88gtevHihzNgYY4ypwUcnhsaNG2PDhg1ISkrCsGHD8Oeff8LKygoSiQRBQUFIT09XZpyMMcZU5JP3YwAK93/euHEjtm3bhjdv3qB169Y4fPiwIuMrNXh1VdXi1VXVjFdXVbsStbpqUXZ2dggICMCTJ0+wa9cuRcXEGGNMjT6rxcD+xS0G1eIWg5pxi0HtSmyLgTHGmObhxMAYY0wGJwbGGGMyODEwxhiTwYmBMcaYDE4MjDHGZHBiYIwxJoMTA2OMMRmcGBhjjMngxMAYY0wGJ4Z3rFmzBjVr1oSenh4aNWqEy5cvqzskxhhTKU4MRezevRsTJ07E7Nmzcf36ddSvXx/e3t549uyZukNjjDGV4cRQxLJlyzBkyBAMHDgQDg4OWLduHQwMDPDHH3+oOzTGGFOZj97aU9Pl5ubi2rVr8PPzE8rEYjG8vLwQHh5erH5OTg5ycnKE26mpqQAKV1llyscvs5op8g14q7iHKks+9bPGyMgIov9YUZgTwz9evHiBgoICmJuby5Sbm5vj3r17xeovXLgQ/v7+xcqrV6+utBjZvypUUHcEZRy/AWpXYdGnvQcfszUAJ4ZP5Ofnh4kTJwq3JRIJXr16hUqVKv1nNi4N0tLSUL16dTx+/Fju/SWYYvB7oF6a+vobGRn9Zx1ODP+oXLkytLS0kJKSIlOekpICCwuLYvV1dXWhq6srU2ZiYqLMENXC2NhYo/4oSiN+D9SrLL7+PPj8Dx0dHTRo0ABnzpwRyiQSCc6cOQN3d3c1RsYYY6rFLYYiJk6ciP79++PLL79Ew4YNsWLFCmRmZmLgwIHqDo0xxlSGE0MR3bt3x/PnzzFr1iwkJyfD1dUVJ0+eLDYgXRbo6upi9uzZxbrLmOrwe6BeZfn1FxEpdFt1xhhjpRyPMTDGGJPBiYExxpgMTgyMMcZkcGJgjDEmgxMDY4wxGZwYyhjpJDSJRKLmSBhPCGQlFSeGMkYkEuHMmTPo2bOnukMpk16+fImkpCSkpqZqxJpaTDPxBW5l0K1bt3jzITU4ePAg9uzZA4lEgr59+8LHxwdAYcuBk4TyJSYm4ty5c8jOzoa3tzeqVaum7pBKLG4xlAHvdlnY2dnhzp07ePz4sZoiKnv++OMPDBs2DB4eHhg+fLiQFKQtB+7aU647d+7Ax8cHx48fx/379zkp/AduMZQBIpEIV69ehYGBAczMzFC+fHmYmZmpO6wy4+DBgxg/fjw2bNiA7t27C+WDBw/GnTt3sHv3btSoUQMSiQRiMX9XU7Q7d+6gWbNmGDVqFCZPnowK/+wlcfToUYhEIiFJs3/xkhhlwLNnz/DVV19BLBbjzZs3aNCgAf7++298//33GDRoEExNTWFvbw9tbW0YGBioO1yNIZFIkJmZiQEDBsDe3h5z5syBlpYWAKBr164IDQ2Fra0tdHV1sXnzZk4OSvDq1St07twZ9evXx+rVq4XygIAATJs2DZ6enhg/fjwnh3fwb2AZULlyZVy/fh1hYWHYvn07xowZAyMjIxw6dAjTpk1DixYt4OLigo4dOyI3N1fd4WoMsViMgoIChISEoHbt2kJSOHPmDHJzc3H58mX4+flBT08PvXr1QkJCAicFBUtKSsLTp0/RtWtXobtu7dq1mDFjBlatWiXcPnr0qDrDLHG4K0kDSQczHz16BIlEgvT0dLi4uAAArKysABR+Y7W3t8fEiRMRExODFy9ewMLCAjo6OuoMXeO8ePECaWlpqFKlilDm6emJhg0bwsjICNbW1sjIyMCYMWNw7tw59O3bV43Rap5r167h0aNHaNGihVDm4+MDR0dHeHh4wMPDAxMmTMC8efNgY2MDBwcH9QVbgvDXEw0jTQoHDx5Ehw4d0LFjR7Ru3RojRoxAfHy8UM/S0hIHDx6ESCRCvXr10KxZM9SpU0d9gWsoCwsLODs749dff0VycrJQbmhoiIKCAgBAvXr14OTkxK+/EtSsWRNaWlo4ePAggMK/jxo1asDDwwMSiQQuLi7o1q0bJBIJKlWqpN5gSxBODBpGep1Cnz59MGbMGISEhGD16tVYv349bt68KTSn7e3t8erVK6F7gymW9HU2NDREhw4dcO7cOaxZs0aYJiwSiaClpYXMzEzMmDEDxsbGaNiwoTpD1kg1a9aEkZERtm7dioSEBJlpwdJuu/v376NWrVo8vlYUsVItLS2tWNnUqVNp9OjRREQUGxtLtra2NGTIEJk6oaGhVLt2bXr27BlJJBKVxKrpjhw5QqtWrSIiovz8fJlz3333HZUrV44GDhxIN27coOTkZPrrr7+oZcuW5OjoSLm5uUREVFBQoPK4NUVqaiq9evWKiIgkEonwe717927S0dGh/v370927d4X6aWlp5OvrSxUrVqTIyEi1xFxScWIoxdatW0d2dnaUkJAglBUUFJCnpyctXLiQ3r59S1WrVqWhQ4cKfyQrV66kkydPUlpaGj19+lRdoWuchw8fkkgkIm1tbWrXrh3NmDGD7t+/L1Nn+PDhZG1tTWKxmAwNDal+/frUqVMnISnk5eWpI3SN8ODBA6pVqxa1bt2a9uzZI/OFKTs7m1avXk1aWlrk4OBAgwcPphEjRlDHjh3JwsKCrl+/rsbISyaerlqKxcfHw8vLCxYWFvjzzz+Fi3bWrl2L3bt3IyoqCt999x1Wr14tzJCRTk9dunQpdyMpUHp6OqZNm4bWrVsjMzMTf/31Fw4cOIDJkyfD0dERXbt2BQDExMTg1q1byM/Ph729PVxcXCAWi5Gfnw9tbZ4L8qn279+PadOmYezYsViyZAmaNm2K6tWrY/78+SAiaGtrIyQkBMuWLUNMTAxMTEzQpEkTDB48mMd23kfNiYl9ImmXQ2JiIjk6OpK7uzvFx8cTEdH58+epcePG5OzsTDdv3iSiwm9NP/74I1WrVo2io6PVFrcm+/HHH8nV1VXoRjp9+jQNGDCAxGIxDRw4kA4cOEA5OTnF7sfdR5/v9evXZGNjQ/v27aMXL17Qpk2byM3NjZo1a0ZTp06lqKgodYdYqnCLoZSSXgh1//59REZG4rvvvkO7du2wYcMGWFlZYfv27Vi9ejXevHmDmjVrAihcI+nEiRNwc3NTb/AagIqsb1T0ojQvLy+0bdsWkydPBgDUqVNHmBnz6tUrxMTE4PTp02jQoIHaYtc0BQUF0NLSwpYtW7B3715s3LgR5ubmAICqVasiLy8P6enpmDBhAmrXro0ffvgBAK9R9f/wrKRSSiwWIzAwEI0bN0ZYWBhatWqFy5cvo0uXLkhOTkafPn2wfPlyjBs3DtWqVYOPjw9CQ0M5KSiBtCsoPz8fbm5uuH//PgCgfv36MDc3x4kTJ3D48GGsWbMGw4YNQ/369dUcsWaRdok6Ozvj4cOHiImJAVC45IhEIkFYWBjWrl2LK1euYO7cuTIzw9j7cYuhlEpKSoK7uztGjhyJKVOmIC8vD3fv3sX333+PihUrIjAwULiYjSnWyZMncfLkSbx9+xYeHh7o1auXcC4uLg4uLi7Iy8uDu7s79u7di8qVKxd7DB5T+HSvXr1CSkoKDAwMULVqVZnX0dfXF9euXUPlypUREhKCo0ePCq2zp0+fQldXl69X+Bjq7Mdiny4hIYGsra3p3LlzMuXXrl0jU1NT+uabb+jhw4dqik5z/fbbb2RiYkK9e/emmjVrkrW1NS1fvpyI/p2i+uOPP5Kbmxs9ePBAjZFqptu3b9MXX3xBdnZ2pKOjQ7Nnz6YnT54I569fv041a9Yke3t7ioiIUGOkpRt3JZVSFhYWKCgowKlTp2TK7ezsYG9vj8OHD2PYsGHC1bXs8/3xxx8YOXIktm3bhu3btyM0NBQSiQSnT58GEQldGu7u7nj48KHQZcFLaitGREQE3N3d0aJFC2zatAkjRozAokWLcPHiRaGOm5sb3NzcUKVKFTg7OwPgnfI+ibozE/tv0msQsrKyZC6cWrRoEbm4uNBvv/0mU3/kyJF0+vRpbjEo0Pnz50kkEtH48eNlyl1cXMjOzo7u3bsnM+Ood+/eVL9+fXrz5o2qQ9VIkZGRVL58eZoyZYpQFh8fT+XLl6f+/fvL1L1x4wZZW1vTzp07VRyl5uAWQwlH/8ycOHbsGPr16wcvLy/s27cPqamp6N+/Pxo3bowVK1Zg4sSJOHToEMaMGYO9e/eiXr16qFWrlrrD1xgWFhbw8PBATEwMDh8+DAD47rvvEBMTg5o1a6J3795o27YtunbtivDwcBgZGaFZs2YwNjZWc+SaYc+ePcjKysLXX3+NnJwcAMCWLVuQlZWFrKwszJkzB8HBwbh79y5cXV1hYmKCCxcuIC8vT82Rl048+FwKhISEwNvbGwMHDkRcXBxu3LiBvn37YurUqSgoKMDOnTuxatUq6OnpQUtLC9u2bYOrq6u6w9YY0umo0dHRGDduHEQiEV69eoXs7GycOHECZmZmePHiBcLCwrB+/Xrcv38fX375Jfbs2QMtLS3eY0FBhg4dip07d+LAgQO4fPkyfv75Z8ycORMVK1ZESEgIIiMjkZiYiA4dOiAjIwMzZsxAvXr11B126aTmFgv7AOlFT0+fPqVZs2bRihUrhHMrV64kOzs7mjRpkrCsRX5+Pj179oxSU1PVEq+mk3bnRUVFUdu2bcnU1JTWrFnz3rphYWHC+8cXr32+ot2nAwcOJJFIROXLl6fjx4/L1IuNjaW9e/eSl5eXcLEn+zScGEoI6QdIZmamUHbnzh1h5svatWtl6q9cuZLq1q1LU6ZM4as6laDowoLSBdmk71FsbCx5e3sL6/JIvXtVMycFxSmaHCZMmEBisZgCAwMpKytLKJe+Z+8uYMjkx+3bEkIsFuPx48cYMmSI0Ift4OCA7t27Izk5GdeuXcPz58+F+mPHjsWYMWOwadMm7Ny5E/n5+eoKXeNIJBKZi59EIhGICGKxGPHx8ahduzZWrlwJsViMDRs2YP/+/QBQbJMj7j5SHCrS471s2TL07dsXffv2xeHDh4UxB+l7xq+7Aqg7M7F/Xb16lVxcXKhz58505MgRoVy6xtGSJUvo2bNnMvdZt24dxcbGqjpUjVW0pTB69Gj64YcfhLI9e/ZQ1apVhRbavXv3qF27duTm5kZ///23WuItC6QtgAcPHtDkyZOF8sGDB5OJiQlt3ryZ3r59q67wNBInhhLm0qVL5OHhQR06dJBJDr6+vlSjRg1asmQJPX/+XI0Raq6iSSEsLIxcXFzo/PnzREQUGBhIenp69MsvvxDRv91Et2/fpgkTJnC3kQIkJibSvn37aPv27ZSUlERE/74n8fHxVLVqVerZs6ewTDkRUffu3alq1arv3ZeEfTqelVQCXbp0CVOmTIGxsTGGDRuGDh06AACmTJmCwMBA9O/fHyNHjuRL+5Vk3759CAwMROXKlbFq1Sq8efMG8+fPR506dTB06FCh3ruzjXj20ae7c+cO+vXrh7p166J69eoICAgQzqWnp6Np06Zo3Lgx1q1bB5FIJCycBxQuD2Npaamu0DWTujMTe7+LFy++t+UwcuRIcnZ2ppcvX6oxOs2VkpJCHTt2pIoVK1KXLl2Ecn69lScyMpJMTU1p5syZwg5sREQnTpygs2fPUm5uLh08eLDYoLL0Nu9AqHicGNToQ90P0l/0ot1KR48eFc6npKSoJL6y4H3vwY0bN6hXr15UuXJlmavKubtI8Z4/f07u7u40ZswYmfLFixeTlpYWtW3blsLCwoRyfg9Ug9u9KiRdMyc7OxtA4eyJ2NjYYvWks2AaNmyIn3/+GVlZWVi8eDFOnjwJAKhSpYrqgtZgRbt+4uLiEB0djYyMDLi6usLf3x+tW7fGli1bsH37dgCF7xeve6RY8fHxePHiBXr06CG8titXrsSsWbOwYMECZGVlYenSpfj7778B8IwjlVF3Zipr4uPjafDgwXT//n0KDAwkkUj0wesQpC2H0NBQ8vHxkdnbmX2eot0Ps2bNIicnJ7KxsaHq1avTmjVrKCcnhyIjI6lXr17UtGlT2r59uxqj1TzSweL169eTvr6+zLnIyEg6e/YsERX+7jdt2pRatGhBMTExqg6zzOLEoGInTpwgJycnatasGenp6dG2bduI6MP9pNLy7OxslcVYlsyfP5/MzMzoxIkTRETUrl07srKyotu3bxMRUUREBPXp04fq1q1LJ0+eVGeoGiMqKoo8PT3p+fPndOLECdLT06O//vqLiIpfWEhEtGTJEmrUqBGP86gQt8tUrG3btujZsydCQ0NRv359YTcvaffRu6QX7ejq6qo0zrIgMzMTZ8+eRUBAANq2bYtjx47hwoULmDlzJpycnJCfnw9nZ2eMHz8evXr1gpeXl7pD1ggXL15EVlYWKleujBo1akBbWxs7duxAUlLSe3dVS0xMhK2tLf8NqBAnBhWRfujn5OTAwsIC06dPh1gsxvz58xEeHg7gw8lBeo4pDhEhOzsbjx49gqenJ4KDg9GjRw8sWrQIw4cPR3Z2NlauXIm4uDg0aNAAs2fPhpaWFu9voQBJSUnIzc1FQUEBHBwcsHTpUmzfvh0LFy7EgwcPhHpZWVmYOnUqtm7dih9//BHly5dXY9RlC+8tqCIikQjBwcFYsmQJDhw4gHLlyqFhw4aYN2+esLxCo0aNIBKJcPXqVXz55ZfqDlmj0Dsbv4tEIlSuXBn29vbo2bMnbty4gdWrV2PgwIEACrePPHjwICwsLGSWL5fOnWfyefv2LfT09AAUbmtqamoKLS0tEBH69euHly9fYvr06QgPD0fz5s0hkUgQHx+PsLAwBAUFwcHBQc3PoGzhFoMKxcbGIikpCeXKlQMAdOzYETNmzEB8fDyWL1+Ow4cPY+7cuWjYsCFevHih5mg1R9GkkJCQgMTEROFcnz59kJKSgsaNGwtJISMjA0OHDoW2tjZ69Oihlpg1SWJiIvr164egoCAAQEFBgczFmXp6evDz88Phw4dhamqK/fv3IyQkBNbW1ggJCYGbm5u6Qi+zuMWgRO9+S3VycsKjR49w79492NnZQSQSoWPHjtDS0sKyZcvg6+uLvLw8XL58+b0byDP5/Pbbb3B3dxe2ePTz88OxY8eQkJCAIUOG4IcffkCXLl1w79497Ny5E66urrC1tUViYiKys7Nx5coVofuIWwqfLicnB0+ePMHy5ctRqVIl5ObmCgsOFv376NChg7CXgqGhIV9Jrka8JIaSXbhwAdra2qhUqRKSk5MxaNAgHDhwoFjTOC4uDpmZmahYsSKsrKzUFK3mCA8PR7NmzTBs2DBMnjwZly9fhq+vLxYvXozk5GSsXr0aX375JWbOnIl69erh4sWL2LFjB/T09FC9enWMHTsW2trayM/Ph7Y2f3/6XLGxsRg9ejTKly+PR48egYjg5OQEsVgMsViMnJwciMViZGRkoHr16li+fDlEIhGPrakJJwYlev36NZo3b47MzEwkJyejUaNGCA4ORosWLTBy5EgYGRmhfv36KFeuHK97pAT79u3DxIkT0bNnT4jFYtjb26N///4AgODgYIwbNw5169bFlClT8NVXXxW7P7cUFCs6OhoTJkxASEgIdHV18f333+Phw4cQiUQwNDREfn4+8vLysGjRIqGVx9SDE4OSvX37FllZWYiOjsbbt28xaNAgpKSkwNnZGVFRUTAxMUGVKlUQFhYmDM6xz1O0C2/37t2YPHkynj9/jrlz58LX11eoFxwcjPHjx6NevXoYPHgwPD091RVymREbG4vx48cjNzcXS5cu5QRQQnFiUADpB5G02yE2NhZv375FamoqmjRpIlN31KhRMDU1xbx585CUlITk5GSYmJjIzHxhn076XhT9tn/kyBH88MMPcHd3x8KFC2W68c6dO4cePXpgyJAhmDNnjrrCLlPu37+PsWPHAgCmT5+OZs2aCefeHZdjaqLSy+k0jHRBr6IrQgYGBpKdnR05OTmRpaUlffvtt3T37l3h/IoVK8jZ2VlmS0KmGEUXWJO+vtKrZ//880+qWrUqjRw5stgSJNevX+ftIFXs/v371KFDB2rcuDFdvHhR3eGwd/CQ/yeSzpi4efMm7OzsEBsbi7Nnz6Jfv37w9fXF5cuXsX37dhw8eBA3b94UFgirVasWXr9+XWwbSPZ56J+tNwHg559/RteuXdG5c2fMmDEDOTk56N69OwICAnDo0CGsXr0a0dHRwn3d3Nz44jUVq1OnDn7++WdUq1aN91IoidSdmUoj6TfTmzdvkoGBAU2dOpWIiObNm0dDhw4lIqKYmBiqXbu2cFsqMjKS7O3thR2q2Ocrur5OQEAAGRkZ0YwZM6hHjx7k7OxMDg4OlJGRQUREO3fuJGtra+rZsyc9evRIXSGzf+Tk5Kg7BPYePA9PTtKWQnR0NJo2bYoxY8Zg0aJFkEgkuHz5MurUqYOcnBy0bNkS7du3x7p16wAAa9asQcWKFdG1a1cEBwfD3Nxczc9Ec0j7pK9cuYJbt25h165d8PHxAQBcv34dw4YNQ/PmzREeHo6ePXsiPz8fgYGBqFatmjrDZgC3nEso7kqSQ9HuI3d3d2RmZqJ9+/bIy8uDWCxGjx49EB4ejqpVq6Jjx45Yv349gMJujlu3biE4OBgikYiTgoJQkXkT+/btw+DBg3HhwgWZ60Dq16+PJUuWIDs7G0eOHAEA9O3bFwcOHOD9FRj7AE4MH6loUmjSpAkmTJiArl274rvvvkNQUBAkEgmcnJygp6cHMzMzdO/eHUDh8gozZ87EsWPHMHHiRGE5DPb5pC2FzMxMNGnSBDY2Nnjy5AkCAwOFOlpaWqhfvz4yMzPx6NGjYo/BV9YyVhx3JX0ksViM+Ph4eHp6YuTIkZg5cyYAoFOnTujfvz+2bduGtm3bYurUqZg3bx4GDhwIMzMzGBgY4N69ezh27Bjs7OzU/Cw0z8qVKxEdHY1ff/0Va9asgZaWFk6dOoUaNWpgyJAhAAqXLDc2NuYrmBn7SHwdgxwiIiIQGRmJXr16ySyV0KlTJ1y4cAHbt29H27ZtcffuXdy+fRsXLlyAq6srmjdvDhsbGzVHr5nWrVuHiRMn4saNG7Czs0NiYiJGjx6NyMhINGzYEC4uLrh48SLu3LmDu3fvcnJg7CNwYvgI9J6LbogIubm5wuYhRZNDmzZtuItCCYq+D9L/p6SkoE+fPvD29sbEiRMhFouRlJSEcePGITAwEG3atIGXlxcmTpwIgJe5YOxj8KfX/3H37l28ffsWIpFIZpAyPz8fIpFI5gPm8OHD+PrrrzFo0CAcOXIE+fn56ghZo0mTQlZWlvB/c3Nz2NvbY+PGjUIytrS0xOrVq9GpUydhAUMpTtiM/Tf+K/mAvXv3wtvbG/v378fbt28hFotBRCgoKIC2tjbi4uLg4uKCpKQk4cKow4cPo27duvD19UVOTo6an4HmuHXrFg4dOoSbN2/iwIED6NChA0JDQ/Hq1SsAwOLFi0FEWLx4MYDCiQLm5uZYtWoVtLS0sGXLFmGGGC+3wNh/48TwAd999x1cXFywdOlSHDhwQGg5aGlp4dGjR/Dw8MCXX34JCwsLmatmg4ODcebMGd6GUEF27NiBAQMGYOPGjdi7dy+eP38OCwsLdOzYEYMHD8bKlStBRGjZsiXu378PoLBVkJ+fj2rVqmHNmjUACtdLSk1NVedTYazU4DGGdxw9ehSVK1dG48aNQUTo3Lkz4uLi4Ofnh86dO0NfXx+dO3eGlZUV1qxZI/MNlPuvFWvr1q0YPnw4/vjjD7Rp0wYVK1YUzp05cwbnz5/H6tWr4eXlhfT0dJw6dQonTpyAt7c3gH+nGEtbdXxBG2MfSfUXW5dckZGRZGNjQ3379qWrV68K5d988w05OzvTzp07iahwuYuiC7YxxYuMjCRHR0fasGGDTPm7i90lJyfTzJkzqXv37iQSiahXr16Unp4uLJPB7xNj8uOupCIcHR3h7++P6Oho/PLLL7h69SoA4ODBg6hVqxbmzZuH3bt3o1q1asKYA1OOxMREZGVlwcPDQ+Z1lrbIpGXm5uaYNWsWduzYgYCAAJw+fRovXrwQWnI82MyY/Piv5h/SMYI+ffpg7NixuH37NtasWSMkh0OHDqFOnTqYP38+AgMDhTEHphzXrl1Deno66tatC5FIVCwJi0QiREVF4fz589DW1oaWlhYmT54Ma2trbNy4UU1RM6YZODH8o+g3y969e783OUhbDkuWLMGuXbt45pES2draIjMzE3/99ReA988m2rp1K7Zv3w4AwnRifX19voiNsc/EieEfIpEI4eHh+PHHHwEA/fr1+2DLoUKFCti4cSMnBiVq0KABdHR08NtvvyEhIUEol7Yc0tLSEBMTI2wNKRaLceXKFYSEhODbb79VS8yMaQy1jnCUILm5uTRt2jSqU6cOzZw5UyjfsmULNWjQgAYMGCAzIP348WN1hFmm7Nq1i3R1dalXr150/fp1oTwxMZHatWtHTZo0oby8PJn78D4XjH0+nq5aRHJyMn799VccPXoU7dq1w/z58wEUdln8+uuvqFatGqZPnw43Nzc1R1o2FBQUYNOmTRg5ciTMzc3h5OQEiUSC1NRUSCQShIWFoVy5cjxNmDEFK1NdSe9be//x48fC/y0sLDBq1Ci0bdsWJ0+eFFZQ7devHwYNGoSXL1/yXgoqpKWlhcGDB+Py5cv49ttvIZFIUL16dfTt2xfh4eEoV64c8vPzOSkwpmBlpsUgvdjp+vXrOHbsGGbOnIk7d+5g0KBB6NWrF8aNGyfUTUpKwsKFC3HgwAGMHj0aU6dOBQCkpqaiQoUK6noK7B3cUmBMOcpEi0GaFCIiIvDVV18JSyPo6uqidu3a2LdvH9auXSvUt7S0xIQJE5Cfn4+AgAD4+/sDAIyNjdUSP8N7rxnhpMCYcmh8YpAmhVu3bqFx48aYNm0alixZAqBwSuTPP/+MunXrYuvWrTLJQUdHB02bNsXYsWMxcOBAALwAmzrxa8+Y6pSJrqTY2Fg4Oztj8uTJmDt3rrCW/5YtW/DFF1/A2NgYc+bMwZ07d9ChQwcMHDgQa9asQWRkJDZv3iyzRg9jjGm6MtFi+OOPP2BkZCSsyy8SiTBv3jxMnjwZWVlZsLa2xvTp09GyZUusXr0aX3/9NbZs2QJ/f39OCoyxMqdMtBiePn2KgIAAXLx4EQMGDEBaWhqWLFmCLVu2oF27dkILIjMzE0lJSYiLi4OjoyOsrKzUHTpjjKlcmUgMQOE1CvPnz0dQUBAePHiAU6dOoVWrVjyzhTHG3qHxXUlSFhYWmDFjBry9veHg4IAbN24AgMwmO4wxxoAytdqYubk5/Pz8IJFIsHfvXuTn52Pq1KnQ0tISZi8xxlhZV2a6koqSdivduHEDnp6ewnUKjDHGylBXUlEWFhaYPn066tSpgwsXLuDly5fqDokxxkqMMtlikEpJSQEAXv+IMcaKKNOJgTHGWHFlsiuJMcbYh3FiYIwxJoMTA2OMMRmcGBhjjMngxMAYY0wGJwbGGGMyODEwxhiTwYmBMcaYDE4MjDHGZHBiYIwxJoMTA2OMMRn/AwW5atV5V7tgAAAAAElFTkSuQmCC",
"text/plain": [
"<Figure size 400x300 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"There is 53.9% reduction in token usage for KG-RAG compared to Cypher-RAG\n",
"There is 65.1% reduction in token usage for KG-RAG compared compared to Full-Text Index\n"
]
}
],
"source": [
"\n",
"data.loc[:, 'token_usage_combined'] = 0.5*(data.token_usage + data.token_usage_after_perturbation)\n",
"neo4j_rag.loc[:, 'token_usage'] = 0.5*(neo4j_rag.total_tokens_used + neo4j_rag.total_tokens_used_perturbed)\n",
"kg_rag.loc[:, 'token_usage'] = 0.5*(kg_rag.total_tokens_used + kg_rag.total_tokens_used_perturbed)\n",
"\n",
"neo4j_avg = neo4j_rag['token_usage'].mean()\n",
"neo4j_sem = neo4j_rag['token_usage'].sem()\n",
"\n",
"kg_avg = kg_rag['token_usage'].mean()\n",
"kg_sem = kg_rag['token_usage'].sem()\n",
"\n",
"full_text_avg = data['token_usage_combined'].mean()\n",
"full_text_sem = data['token_usage_combined'].sem()\n",
"\n",
"\n",
"fig = plt.figure(figsize=(4, 3))\n",
"plt.bar(0, full_text_avg, yerr=full_text_sem, color='blue', ecolor='black', capsize=5, label='Full-Text Index')\n",
"plt.bar(1, neo4j_avg, yerr=neo4j_sem, color='red', ecolor='black', capsize=5, label='Cypher-RAG')\n",
"plt.bar(2, kg_avg, yerr=kg_sem, color='green', ecolor='black', capsize=5, label='KG-RAG')\n",
"\n",
"plt.ylabel('Average token usage')\n",
"plt.xticks([0, 1, 2], ['Full-Text Index', 'Cypher-RAG', 'KG-RAG'], rotation=45, ha='right')\n",
"plt.tight_layout()\n",
"sns.despine()\n",
"# plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))\n",
"plt.show()\n",
"\n",
"percentage_of_reduction_cypher_to_kg = round(100*(neo4j_avg-kg_avg)/neo4j_avg,1)\n",
"percentage_of_reduction_fulltext_to_kg = round(100*(full_text_avg-kg_avg)/full_text_avg,1)\n",
"\n",
"print(f'There is {percentage_of_reduction_cypher_to_kg}% reduction in token usage for KG-RAG compared to Cypher-RAG')\n",
"print(f'There is {percentage_of_reduction_fulltext_to_kg}% reduction in token usage for KG-RAG compared compared to Full-Text Index')\n",
"\n",
"fig_path = '../data/results/figures'\n",
"os.makedirs(fig_path, exist_ok=True)\n",
"fig.savefig(os.path.join(fig_path, 'token_usage_three_way_comparison.svg'), format='svg', bbox_inches='tight')"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "ccb52e21-9bb5-4867-bd60-fea79f11130d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10590.367088607594"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"full_text_avg"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e2627b4-9f6a-431d-af05-a17ccdc37b36",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (cypher_rag)",
"language": "python",
"name": "cypher_rag"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|