diff options
Diffstat (limited to 'kg_rag/prompt_based_generation')
7 files changed, 244 insertions, 0 deletions
diff --git a/kg_rag/prompt_based_generation/GPT/run_mcq_qa.py b/kg_rag/prompt_based_generation/GPT/run_mcq_qa.py new file mode 100644 index 0000000..762242e --- /dev/null +++ b/kg_rag/prompt_based_generation/GPT/run_mcq_qa.py @@ -0,0 +1,31 @@ +from kg_rag.utility import * +import sys +from tqdm import tqdm + +CHAT_MODEL_ID = sys.argv[1] + +QUESTION_PATH = config_data["MCQ_PATH"] +SYSTEM_PROMPT = system_prompts["MCQ_QUESTION_PROMPT_BASED"] +SAVE_PATH = config_data["SAVE_RESULTS_PATH"] +TEMPERATURE = config_data["LLM_TEMPERATURE"] + +CHAT_DEPLOYMENT_ID = CHAT_MODEL_ID + +save_name = "_".join(CHAT_MODEL_ID.split("-"))+"_prompt_based_response_for_two_hop_mcq_from_monarch_and_robokop.csv" + + +def main(): + start_time = time.time() + question_df = pd.read_csv(QUESTION_PATH) + answer_list = [] + for index, row in tqdm(question_df.head(50).iterrows(), total=50): + question = "Question: "+ row["text"] + output = get_GPT_response(question, SYSTEM_PROMPT, CHAT_MODEL_ID, CHAT_DEPLOYMENT_ID, temperature=TEMPERATURE) + answer_list.append((row["text"], row["correct_node"], output)) + answer_df = pd.DataFrame(answer_list, columns=["question", "correct_answer", "llm_answer"]) + answer_df.to_csv(os.path.join(SAVE_PATH, save_name), index=False, header=True) + print("Completed in {} min".format((time.time()-start_time)/60)) + + +if __name__ == "__main__": + main()
\ No newline at end of file diff --git a/kg_rag/prompt_based_generation/GPT/run_true_false_generation.py b/kg_rag/prompt_based_generation/GPT/run_true_false_generation.py new file mode 100644 index 0000000..0d248db --- /dev/null +++ b/kg_rag/prompt_based_generation/GPT/run_true_false_generation.py @@ -0,0 +1,33 @@ +from kg_rag.utility import * +import sys + + +CHAT_MODEL_ID = sys.argv[1] + +QUESTION_PATH = config_data["TRUE_FALSE_PATH"] +SYSTEM_PROMPT = system_prompts["TRUE_FALSE_QUESTION_PROMPT_BASED"] +SAVE_PATH = config_data["SAVE_RESULTS_PATH"] +TEMPERATURE = config_data["LLM_TEMPERATURE"] + +CHAT_DEPLOYMENT_ID = CHAT_MODEL_ID + +save_name = "_".join(CHAT_MODEL_ID.split("-"))+"_prompt_based_one_hop_true_false_binary_response.csv" + + +def main(): + start_time = time.time() + question_df = pd.read_csv(QUESTION_PATH) + answer_list = [] + for index, row in question_df.iterrows(): + question = "Question: "+ row["text"] + output = get_GPT_response(question, SYSTEM_PROMPT, CHAT_MODEL_ID, CHAT_DEPLOYMENT_ID, temperature=TEMPERATURE) + answer_list.append((row["text"], row["label"], output)) + answer_df = pd.DataFrame(answer_list, columns=["question", "label", "llm_answer"]) + answer_df.to_csv(os.path.join(SAVE_PATH, save_name), index=False, header=True) + print("Completed in {} min".format((time.time()-start_time)/60)) + + + +if __name__ == "__main__": + main() + diff --git a/kg_rag/prompt_based_generation/GPT/text_generation.py b/kg_rag/prompt_based_generation/GPT/text_generation.py new file mode 100644 index 0000000..235ece7 --- /dev/null +++ b/kg_rag/prompt_based_generation/GPT/text_generation.py @@ -0,0 +1,33 @@ +from kg_rag.utility import * +import argparse + + + +parser = argparse.ArgumentParser() +parser.add_argument('-g', type=str, default='gpt-35-turbo', help='GPT model selection') +args = parser.parse_args() + +CHAT_MODEL_ID = args.g + +SYSTEM_PROMPT = system_prompts["PROMPT_BASED_TEXT_GENERATION"] +TEMPERATURE = config_data["LLM_TEMPERATURE"] + +CHAT_DEPLOYMENT_ID = CHAT_MODEL_ID + + +def main(): + print(" ") + question = input("Enter your question : ") + print("Here is the prompt-based answer:") + print("") + output = get_GPT_response(question, SYSTEM_PROMPT, CHAT_MODEL_ID, CHAT_DEPLOYMENT_ID, temperature=TEMPERATURE) + stream_out(output) + + + + + +if __name__ == "__main__": + main() + + diff --git a/kg_rag/prompt_based_generation/Llama/run_mcq_qa.py b/kg_rag/prompt_based_generation/Llama/run_mcq_qa.py new file mode 100644 index 0000000..e3eb1b0 --- /dev/null +++ b/kg_rag/prompt_based_generation/Llama/run_mcq_qa.py @@ -0,0 +1,38 @@ +from langchain import PromptTemplate, LLMChain +from kg_rag.utility import * + + +QUESTION_PATH = config_data["MCQ_PATH"] +SYSTEM_PROMPT = system_prompts["MCQ_QUESTION_PROMPT_BASED"] +SAVE_PATH = config_data["SAVE_RESULTS_PATH"] +MODEL_NAME = config_data["LLAMA_MODEL_NAME"] +BRANCH_NAME = config_data["LLAMA_MODEL_BRANCH"] +CACHE_DIR = config_data["LLM_CACHE_DIR"] + + + +save_name = "_".join(MODEL_NAME.split("/")[-1].split("-"))+"_prompt_based_two_hop_mcq_from_monarch_and_robokop_response.csv" + +INSTRUCTION = "Question: {question}" + + +def main(): + start_time = time.time() + llm = llama_model(MODEL_NAME, BRANCH_NAME, CACHE_DIR) + template = get_prompt(INSTRUCTION, SYSTEM_PROMPT) + prompt = PromptTemplate(template=template, input_variables=["question"]) + llm_chain = LLMChain(prompt=prompt, llm=llm) + question_df = pd.read_csv(QUESTION_PATH) + answer_list = [] + for index, row in question_df.iterrows(): + question = row["text"] + output = llm_chain.run(question) + answer_list.append((row["text"], row["correct_node"], output)) + answer_df = pd.DataFrame(answer_list, columns=["question", "correct_answer", "llm_answer"]) + answer_df.to_csv(os.path.join(SAVE_PATH, save_name), index=False, header=True) + print("Completed in {} min".format((time.time()-start_time)/60)) + + + +if __name__ == "__main__": + main() diff --git a/kg_rag/prompt_based_generation/Llama/run_mcq_qa_medgpt.py b/kg_rag/prompt_based_generation/Llama/run_mcq_qa_medgpt.py new file mode 100644 index 0000000..91a23cb --- /dev/null +++ b/kg_rag/prompt_based_generation/Llama/run_mcq_qa_medgpt.py @@ -0,0 +1,38 @@ +from langchain import PromptTemplate, LLMChain +from kg_rag.utility import * + + +QUESTION_PATH = config_data["MCQ_PATH"] +SYSTEM_PROMPT = system_prompts["MCQ_QUESTION_PROMPT_BASED"] +SAVE_PATH = config_data["SAVE_RESULTS_PATH"] +MODEL_NAME = 'PharMolix/BioMedGPT-LM-7B' +BRANCH_NAME = 'main' +CACHE_DIR = config_data["LLM_CACHE_DIR"] + + + +save_name = "_".join(MODEL_NAME.split("/")[-1].split("-"))+"_prompt_based_mcq_from_monarch_and_robokop_response.csv" + +INSTRUCTION = "Question: {question}" + + +def main(): + start_time = time.time() + llm = llama_model(MODEL_NAME, BRANCH_NAME, CACHE_DIR) + template = get_prompt(INSTRUCTION, SYSTEM_PROMPT) + prompt = PromptTemplate(template=template, input_variables=["question"]) + llm_chain = LLMChain(prompt=prompt, llm=llm) + question_df = pd.read_csv(QUESTION_PATH) + answer_list = [] + for index, row in question_df.iterrows(): + question = row["text"] + output = llm_chain.run(question) + answer_list.append((row["text"], row["correct_node"], output)) + answer_df = pd.DataFrame(answer_list, columns=["question", "correct_answer", "llm_answer"]) + answer_df.to_csv(os.path.join(SAVE_PATH, save_name), index=False, header=True) + print("Completed in {} min".format((time.time()-start_time)/60)) + + + +if __name__ == "__main__": + main() diff --git a/kg_rag/prompt_based_generation/Llama/run_true_false_generation.py b/kg_rag/prompt_based_generation/Llama/run_true_false_generation.py new file mode 100644 index 0000000..81a98a1 --- /dev/null +++ b/kg_rag/prompt_based_generation/Llama/run_true_false_generation.py @@ -0,0 +1,35 @@ +from langchain import PromptTemplate, LLMChain +from kg_rag.utility import * + +QUESTION_PATH = config_data["TRUE_FALSE_PATH"] +SYSTEM_PROMPT = system_prompts["TRUE_FALSE_QUESTION_PROMPT_BASED"] +SAVE_PATH = config_data["SAVE_RESULTS_PATH"] +MODEL_NAME = config_data["LLAMA_MODEL_NAME"] +BRANCH_NAME = config_data["LLAMA_MODEL_BRANCH"] +CACHE_DIR = config_data["LLM_CACHE_DIR"] + + +INSTRUCTION = "Question: {question}" + +save_name = "_".join(MODEL_NAME.split("/")[-1].split("-"))+"_prompt_based_one_hop_true_false_binary_response.csv" + +def main(): + start_time = time.time() + llm = llama_model(MODEL_NAME, BRANCH_NAME, CACHE_DIR) + template = get_prompt(INSTRUCTION, SYSTEM_PROMPT) + prompt = PromptTemplate(template=template, input_variables=["question"]) + llm_chain = LLMChain(prompt=prompt, llm=llm) + question_df = pd.read_csv(QUESTION_PATH) + answer_list = [] + for index, row in question_df.iterrows(): + question = row["text"] + output = llm_chain.run(question) + answer_list.append((row["text"], row["label"], output)) + answer_df = pd.DataFrame(answer_list, columns=["question", "label", "llm_answer"]) + answer_df.to_csv(os.path.join(SAVE_PATH, save_name), index=False, header=True) + print("Completed in {} min".format((time.time()-start_time)/60)) + + + +if __name__ == "__main__": + main() diff --git a/kg_rag/prompt_based_generation/Llama/text_generation.py b/kg_rag/prompt_based_generation/Llama/text_generation.py new file mode 100644 index 0000000..49bfebb --- /dev/null +++ b/kg_rag/prompt_based_generation/Llama/text_generation.py @@ -0,0 +1,36 @@ +from langchain import PromptTemplate, LLMChain +from kg_rag.utility import * +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('-m', type=str, default='method-1', help='Method to choose for Llama model') +args = parser.parse_args() + +METHOD = args.m + + +SYSTEM_PROMPT = system_prompts["PROMPT_BASED_TEXT_GENERATION"] +MODEL_NAME = config_data["LLAMA_MODEL_NAME"] +BRANCH_NAME = config_data["LLAMA_MODEL_BRANCH"] +CACHE_DIR = config_data["LLM_CACHE_DIR"] + + + +INSTRUCTION = "Question: {question}" + + +def main(): + llm = llama_model(MODEL_NAME, BRANCH_NAME, CACHE_DIR, stream=True, method=METHOD) + template = get_prompt(INSTRUCTION, SYSTEM_PROMPT) + prompt = PromptTemplate(template=template, input_variables=["question"]) + llm_chain = LLMChain(prompt=prompt, llm=llm) + print(" ") + question = input("Enter your question : ") + print("Here is the prompt-based answer:") + print("") + output = llm_chain.run(question) + + + +if __name__ == "__main__": + main() |
