In [1]:
import os
os.chdir('..')


In [2]:
from kg_rag.utility import *
from tqdm import tqdm


  from .autonotebook import tqdm as notebook_tqdm


## Choose the LLM

In [3]:
LLM_MODEL = 'gpt-4-32k'


## Configure KG-RAG

In [4]:
SYSTEM_PROMPT = system_prompts["KG_RAG_BASED_TEXT_GENERATION"]
CONTEXT_VOLUME = int(config_data["CONTEXT_VOLUME"])
QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD = float(config_data["QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD"])
QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY = float(config_data["QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY"])
VECTOR_DB_PATH = config_data["VECTOR_DB_PATH"]
NODE_CONTEXT_PATH = config_data["NODE_CONTEXT_PATH"]
SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL = config_data["SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL"]
SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL = config_data["SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL"]
TEMPERATURE = config_data["LLM_TEMPERATURE"]

CHAT_MODEL_ID = LLM_MODEL
EDGE_EVIDENCE = True

CHAT_DEPLOYMENT_ID = CHAT_MODEL_ID


vectorstore = load_chroma(VECTOR_DB_PATH, SENTENCE_EMBEDDING_MODEL_FOR_NODE_RETRIEVAL)
embedding_function_for_context_retrieval = load_sentence_transformer(SENTENCE_EMBEDDING_MODEL_FOR_CONTEXT_RETRIEVAL)
node_context_df = pd.read_csv(NODE_CONTEXT_PATH)


## Load test data

In [5]:
data = pd.read_csv('data/rag_comparison_data.csv')


## Function for chat completion with token usage tracking

In [6]:

def chat_completion_with_token_usage(instruction, system_prompt, chat_model_id, chat_deployment_id, temperature):
    response = openai.ChatCompletion.create(
        temperature=temperature,
        deployment_id=chat_deployment_id,
        model=chat_model_id,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": instruction}
        ]
    )
    return response['choices'][0]['message']['content'], response.usage.total_tokens


## Run on test data

In [7]:
%%time

kg_rag_answer = []
total_tokens_used = []

for index, row in tqdm(data.iterrows()):
    question = row['question']
    context = retrieve_context(question, vectorstore, embedding_function_for_context_retrieval, node_context_df, CONTEXT_VOLUME, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY, EDGE_EVIDENCE)
    enriched_prompt = "Context: "+ context + "\n" + "Question: " + question
    output, token_usage = chat_completion_with_token_usage(enriched_prompt, SYSTEM_PROMPT, CHAT_MODEL_ID, CHAT_DEPLOYMENT_ID, temperature=TEMPERATURE)
    kg_rag_answer.append(output)
    total_tokens_used.append(token_usage)
    
data.loc[:,'kg_rag_answer'] = kg_rag_answer
data.loc[:, 'total_tokens_used'] = total_tokens_used


100it [11:13,  6.74s/it]

CPU times: user 3min 37s, sys: 9.86 s, total: 3min 47s
Wall time: 11min 13s





## Run on perturbed test data

In [8]:
%%time

kg_rag_answer = []
total_tokens_used = []

for index, row in tqdm(data.iterrows()):
    question = row['question']
    context = retrieve_context(question, vectorstore, embedding_function_for_context_retrieval, node_context_df, CONTEXT_VOLUME, QUESTION_VS_CONTEXT_SIMILARITY_PERCENTILE_THRESHOLD, QUESTION_VS_CONTEXT_MINIMUM_SIMILARITY, EDGE_EVIDENCE)
    enriched_prompt = "Context: "+ context + "\n" + "Question: " + question
    output, token_usage = chat_completion_with_token_usage(enriched_prompt, SYSTEM_PROMPT, CHAT_MODEL_ID, CHAT_DEPLOYMENT_ID, temperature=TEMPERATURE)
    kg_rag_answer.append(output)
    total_tokens_used.append(token_usage)
    
data.loc[:,'kg_rag_answer_perturbed'] = kg_rag_answer
data.loc[:, 'total_tokens_used_perturbed'] = total_tokens_used


100it [09:49,  5.90s/it]

CPU times: user 3min 36s, sys: 9.04 s, total: 3min 45s
Wall time: 9min 49s





## Save the result

In [9]:
save_path = 'data/results'
os.makedirs(save_path, exist_ok=True)
data.to_csv(os.path.join(save_path, 'kg_rag_output.csv'), index=False)

