Spaces:

seanpedrickcase
/

Light-PDF-Web-QA-Chatbot

Running

@@ -1,6 +1,6 @@
 import os
 from typing import Type
-from langchain_huggingface.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 import gradio as gr
 import pandas as pd
@@ -8,17 +8,15 @@ from torch import float16, float32
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,  AutoModelForCausalLM
-import zipfile
-from chatfuncs.ingest import embed_faiss_save_to_zip
-from chatfuncs.helper_functions import get_connection_params, reveal_feedback_buttons, wipe_logs
-from chatfuncs.aws_functions import upload_file_to_s3
-from chatfuncs.auth import authenticate_user
-from chatfuncs.config import FEEDBACK_LOGS_FOLDER, ACCESS_LOGS_FOLDER, USAGE_LOGS_FOLDER, HOST_NAME, COGNITO_AUTH, INPUT_FOLDER, OUTPUT_FOLDER, MAX_QUEUE_SIZE, DEFAULT_CONCURRENCY_LIMIT, MAX_FILE_SIZE, GRADIO_SERVER_PORT, ROOT_PATH, DEFAULT_EMBEDDINGS_LOCATION, EMBEDDINGS_MODEL_NAME, DEFAULT_DATA_SOURCE, HF_TOKEN, LARGE_MODEL_REPO_ID, LARGE_MODEL_GGUF_FILE, LARGE_MODEL_NAME, SMALL_MODEL_NAME, SMALL_MODEL_REPO_ID, DEFAULT_DATA_SOURCE_NAME, DEFAULT_EXAMPLES, DEFAULT_MODEL_CHOICES, RUN_GEMINI_MODELS, LOAD_LARGE_MODEL
-from chatfuncs.model_load import torch_device, gpu_config, cpu_config, context_length
-import chatfuncs.chatfuncs as chatf
-import chatfuncs.ingest as ing
 PandasDataFrame = Type[pd.DataFrame]
@@ -34,41 +32,14 @@ if isinstance(DEFAULT_EXAMPLES, str): default_examples_set = eval(DEFAULT_EXAMPL
 if isinstance(DEFAULT_MODEL_CHOICES, str): default_model_choices = eval(DEFAULT_MODEL_CHOICES)
 # Disable cuda devices if necessary
-#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
 ###
 # Load preset embeddings, vectorstore, and model
 ###
-def load_embeddings_model(embeddings_model = EMBEDDINGS_MODEL_NAME):
-    embeddings_func = HuggingFaceEmbeddings(model_name=embeddings_model)
-    #global embeddings
-    #embeddings = embeddings_func
-    return embeddings_func
-def get_faiss_store(faiss_vstore_folder:str, embeddings_model:object):
-    with zipfile.ZipFile(faiss_vstore_folder + '/' + faiss_vstore_folder + '.zip', 'r') as zip_ref:
-        zip_ref.extractall(faiss_vstore_folder)
-    faiss_vstore = FAISS.load_local(folder_path=faiss_vstore_folder, embeddings=embeddings_model, allow_dangerous_deserialization=True)
-    os.remove(faiss_vstore_folder + "/index.faiss")
-    os.remove(faiss_vstore_folder + "/index.pkl")
-    #global vectorstore
-    #vectorstore = faiss_vstore
-    return faiss_vstore #vectorstore
 # Load in default embeddings and embeddings model name
 embeddings_model = load_embeddings_model(EMBEDDINGS_MODEL_NAME)
-vectorstore = get_faiss_store(faiss_vstore_folder=DEFAULT_EMBEDDINGS_LOCATION,embeddings_model=embeddings_model)#globals()["embeddings"])
 chatf.embeddings = embeddings_model
 chatf.vectorstore = vectorstore
@@ -87,7 +58,6 @@ def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings_model=embeddings_mod
     return out_message, vectorstore_func
 def create_hf_model(model_name:str, hf_token=HF_TOKEN):
     if torch_device == "cuda":
         if "flan" in model_name:
@@ -167,12 +137,11 @@ def load_model(model_type:str, gpu_layers:int, gpu_config:dict=gpu_config, cpu_c
     return model_type, load_confirmation, model_type#model, tokenizer, model_type
 ###
 # RUN UI
 ###
-app = gr.Blocks(theme = gr.themes.Base(), fill_width=True)#css=".gradio-container {background-color: black}")
 with app:
     model_type = SMALL_MODEL_NAME

 import os
 from typing import Type
+#from langchain_huggingface.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 import gradio as gr
 import pandas as pd
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,  AutoModelForCausalLM
+from tools.ingest import embed_faiss_save_to_zip, load_embeddings_model, get_faiss_store
+from tools.helper_functions import get_connection_params, reveal_feedback_buttons, wipe_logs
+from tools.aws_functions import upload_file_to_s3
+from tools.auth import authenticate_user
+from tools.config import FEEDBACK_LOGS_FOLDER, ACCESS_LOGS_FOLDER, USAGE_LOGS_FOLDER, HOST_NAME, COGNITO_AUTH, INPUT_FOLDER, OUTPUT_FOLDER, MAX_QUEUE_SIZE, DEFAULT_CONCURRENCY_LIMIT, MAX_FILE_SIZE, GRADIO_SERVER_PORT, ROOT_PATH, DEFAULT_EMBEDDINGS_LOCATION, EMBEDDINGS_MODEL_NAME, DEFAULT_DATA_SOURCE, HF_TOKEN, LARGE_MODEL_REPO_ID, LARGE_MODEL_GGUF_FILE, LARGE_MODEL_NAME, SMALL_MODEL_NAME, SMALL_MODEL_REPO_ID, DEFAULT_DATA_SOURCE_NAME, DEFAULT_EXAMPLES, DEFAULT_MODEL_CHOICES, RUN_GEMINI_MODELS, LOAD_LARGE_MODEL
+from tools.model_load import torch_device, gpu_config, cpu_config, context_length
+import tools.chatfuncs as chatf
+import tools.ingest as ing
 PandasDataFrame = Type[pd.DataFrame]
 if isinstance(DEFAULT_MODEL_CHOICES, str): default_model_choices = eval(DEFAULT_MODEL_CHOICES)
 # Disable cuda devices if necessary
+#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
 ###
 # Load preset embeddings, vectorstore, and model
 ###
 # Load in default embeddings and embeddings model name
 embeddings_model = load_embeddings_model(EMBEDDINGS_MODEL_NAME)
+vectorstore = get_faiss_store(zip_file_path=DEFAULT_EMBEDDINGS_LOCATION,embeddings_model=embeddings_model)#globals()["embeddings"])
 chatf.embeddings = embeddings_model
 chatf.vectorstore = vectorstore
     return out_message, vectorstore_func
 def create_hf_model(model_name:str, hf_token=HF_TOKEN):
     if torch_device == "cuda":
         if "flan" in model_name:
     return model_type, load_confirmation, model_type#model, tokenizer, model_type
 ###
 # RUN UI
 ###
+app = gr.Blocks(theme = gr.themes.Default(primary_hue="blue"), fill_width=True)#css=".gradio-container {background-color: black}")
 with app:
     model_type = SMALL_MODEL_NAME

app_save_docstore.py DELETED Viewed

@@ -1,307 +0,0 @@
-# Load in packages
-import os
-from typing import Type
-from langchain_community.embeddings import HuggingFaceEmbeddings#, HuggingFaceInstructEmbeddings
-from langchain_community.vectorstores import FAISS
-import gradio as gr
-import pandas as pd
-from transformers import AutoTokenizer
-import torch
-from llama_cpp import Llama
-from huggingface_hub import hf_hub_download
-PandasDataFrame = Type[pd.DataFrame]
-# Disable cuda devices if necessary
-#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
-#from chatfuncs.chatfuncs import *
-import chatfuncs.ingest as ing
-##  Load preset embeddings, vectorstore, and model
-embeddings_name = "BAAI/bge-base-en-v1.5"
-def load_embeddings(embeddings_name = embeddings_name):
-    embeddings_func = HuggingFaceEmbeddings(model_name=embeddings_name)
-    global embeddings
-    embeddings = embeddings_func
-    return embeddings
-def get_faiss_store(faiss_vstore_folder,embeddings):
-    import zipfile
-    with zipfile.ZipFile(faiss_vstore_folder + '/' + faiss_vstore_folder + '.zip', 'r') as zip_ref:
-        zip_ref.extractall(faiss_vstore_folder)
-    faiss_vstore = FAISS.load_local(folder_path=faiss_vstore_folder, embeddings=embeddings, allow_dangerous_deserialization=True)
-    os.remove(faiss_vstore_folder + "/index.faiss")
-    os.remove(faiss_vstore_folder + "/index.pkl")
-    global vectorstore
-    vectorstore = faiss_vstore
-    return vectorstore
-import chatfuncs.chatfuncs as chatf
-chatf.embeddings = load_embeddings(embeddings_name)
-chatf.vectorstore = get_faiss_store(faiss_vstore_folder="faiss_embedding",embeddings=globals()["embeddings"])
-def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_device=None):
-    print("Loading model")
-    # Default values inside the function
-    if gpu_config is None:
-        gpu_config = chatf.gpu_config
-    if cpu_config is None:
-        cpu_config = chatf.cpu_config
-    if torch_device is None:
-        torch_device = chatf.torch_device
-    if model_type == "Phi 3 Mini (larger, slow)":
-        if torch_device == "cuda":
-            gpu_config.update_gpu(gpu_layers)
-            print("Loading with", gpu_config.n_gpu_layers, "model layers sent to GPU.")
-        else:
-            gpu_config.update_gpu(gpu_layers)
-            cpu_config.update_gpu(gpu_layers)
-            print("Loading with", cpu_config.n_gpu_layers, "model layers sent to GPU.")
-        print(vars(gpu_config))
-        print(vars(cpu_config))
-        try:
-            model = Llama(
-            model_path=hf_hub_download(
-            repo_id=os.environ.get("REPO_ID", "QuantFactory/Phi-3-mini-128k-instruct-GGUF"),# "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), # "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
-            filename=os.environ.get("MODEL_FILE", "Phi-3-mini-128k-instruct.Q4_K_M.gguf") #"Phi-3-mini-128k-instruct.Q4_K_M.gguf")  #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf")#"mistral-7b-openorca.Q4_K_M.gguf"),
-        ),
-        **vars(gpu_config) # change n_gpu_layers if you have more or less VRAM
-        )
-        except Exception as e:
-            print("GPU load failed")
-            print(e)
-            model = Llama(
-            model_path=hf_hub_download(
-            repo_id=os.environ.get("REPO_ID", "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), #"QuantFactory/Phi-3-mini-128k-instruct-GGUF"), #, "microsoft/Phi-3-mini-4k-instruct-gguf"),#"QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
-            filename=os.environ.get("MODEL_FILE", "Phi-3-mini-128k-instruct.Q4_K_M.gguf"), # "Phi-3-mini-128k-instruct.Q4_K_M.gguf") # , #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf"),#"mistral-7b-openorca.Q4_K_M.gguf"),
-        ),
-        **vars(cpu_config)
-        )
-        tokenizer = []
-    if model_type == "Flan Alpaca (small, fast)":
-        # Huggingface chat model
-        hf_checkpoint = 'declare-lab/flan-alpaca-large'#'declare-lab/flan-alpaca-base' # # #
-        def create_hf_model(model_name):
-            from transformers import AutoModelForSeq2SeqLM,  AutoModelForCausalLM
-            if torch_device == "cuda":
-                if "flan" in model_name:
-                    model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
-                else:
-                    model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
-            else:
-                if "flan" in model_name:
-                    model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16)
-                else:
-                    model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16)
-            tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = chatf.context_length)
-            return model, tokenizer, model_type
-        model, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint)
-    chatf.model = model
-    chatf.tokenizer = tokenizer
-    chatf.model_type = model_type
-    load_confirmation = "Finished loading model: " + model_type
-    print(load_confirmation)
-    return model_type, load_confirmation, model_type
-# Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
-model_type = "Phi 3 Mini (larger, slow)"
-load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
-model_type = "Flan Alpaca (small, fast)"
-load_model(model_type, 0, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
-def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
-    print(f"> Total split documents: {len(docs_out)}")
-    print(docs_out)
-    vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
-    chatf.vectorstore = vectorstore_func
-    out_message = "Document processing complete"
-    return out_message, vectorstore_func, out_file
- # Gradio chat
-block = gr.Blocks(theme = gr.themes.Base())#css=".gradio-container {background-color: black}")
-with block:
-    ingest_text = gr.State()
-    ingest_metadata = gr.State()
-    ingest_docs = gr.State()
-    model_type_state = gr.State(model_type)
-    embeddings_state = gr.State(chatf.embeddings)#globals()["embeddings"])
-    vectorstore_state = gr.State(chatf.vectorstore)#globals()["vectorstore"])
-    model_state = gr.State() # chatf.model (gives error)
-    tokenizer_state = gr.State() # chatf.tokenizer (gives error)
-    chat_history_state = gr.State()
-    instruction_prompt_out = gr.State()
-    gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
-    gr.Markdown("Chat with PDF, web page or (new) csv/Excel documents. The default is a small model (Flan Alpaca), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. The alternative (Phi 3 Mini (larger, slow)), can reason a little better, but is much slower (See Advanced tab).\n\nBy default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nCaution: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.")
-    with gr.Row():
-        current_source = gr.Textbox(label="Current data source(s)", value="Lambeth_2030-Our_Future_Our_Lambeth.pdf", scale = 10)
-        current_model = gr.Textbox(label="Current model", value=model_type, scale = 3)
-    with gr.Tab("Chatbot"):
-        with gr.Row():
-            #chat_height = 500
-            chatbot = gr.Chatbot(avatar_images=('user.jfif', 'bot.jpg'),bubble_full_width = False, scale = 1) # , height=chat_height
-            with gr.Accordion("Open this tab to see the source paragraphs used to generate the answer", open = False):
-                sources = gr.HTML(value = "Source paragraphs with the most relevant text will appear here") # , height=chat_height
-        with gr.Row():
-            message = gr.Textbox(
-                label="Enter your question here",
-                lines=1,
-            )
-        with gr.Row():
-            submit = gr.Button(value="Send message", variant="secondary", scale = 1)
-            clear = gr.Button(value="Clear chat", variant="secondary", scale=0)
-            stop = gr.Button(value="Stop generating", variant="secondary", scale=0)
-        examples_set = gr.Radio(label="Examples for the Lambeth Borough Plan",
-            #value = "What were the five pillars of the previous borough plan?",
-            choices=["What were the five pillars of the previous borough plan?",
-                "What is the vision statement for Lambeth?",
-                "What are the commitments for Lambeth?",
-                "What are the 2030 outcomes for Lambeth?"])
-        current_topic = gr.Textbox(label="Feature currently disabled - Keywords related to current conversation topic.", placeholder="Keywords related to the conversation topic will appear here")
-    with gr.Tab("Load in a different file to chat with"):
-        with gr.Accordion("PDF file", open = False):
-            in_pdf = gr.File(label="Upload pdf", file_count="multiple", file_types=['.pdf'])
-            load_pdf = gr.Button(value="Load in file", variant="secondary", scale=0)
-        with gr.Accordion("Web page", open = False):
-            with gr.Row():
-                in_web = gr.Textbox(label="Enter web page url")
-                in_div = gr.Textbox(label="(Advanced) Web page div for text extraction", value="p", placeholder="p")
-            load_web = gr.Button(value="Load in webpage", variant="secondary", scale=0)
-        with gr.Accordion("CSV/Excel file", open = False):
-            in_csv = gr.File(label="Upload CSV/Excel file", file_count="multiple", file_types=['.csv', '.xlsx'])
-            in_text_column = gr.Textbox(label="Enter column name where text is stored")
-            load_csv = gr.Button(value="Load in CSV/Excel file", variant="secondary", scale=0)
-        with gr.Row():
-        	ingest_embed_out = gr.Textbox(label="File/web page preparation progress")
-        	out_file_box = gr.File(count='single', filetype=['.zip'])
-    with gr.Tab("Advanced features"):
-        out_passages = gr.Slider(minimum=1, value = 2, maximum=10, step=1, label="Choose number of passages to retrieve from the document. Numbers greater than 2 may lead to increased hallucinations or input text being truncated.")
-        temp_slide = gr.Slider(minimum=0.1, value = 0.5, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
-        with gr.Row():
-            model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca (small, fast)", choices = ["Flan Alpaca (small, fast)", "Phi 3 Mini (larger, slow)"])
-            change_model_button = gr.Button(value="Load model", scale=0)
-        with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False):
-            gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=True)
-        load_text = gr.Text(label="Load status")
-    gr.HTML(
-        "<center>This app is based on the models Flan Alpaca and Phi 3 Mini. It powered by Gradio, Transformers, and Llama.cpp.</a></center>"
-    )
-    examples_set.change(fn=chatf.update_message, inputs=[examples_set], outputs=[message])
-    change_model_button.click(fn=chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
-    then(fn=load_model, inputs=[model_choice, gpu_layer_choice], outputs = [model_type_state, load_text, current_model]).\
-    then(lambda: chatf.restore_interactivity(), None, [message], queue=False).\
-    then(chatf.clear_chat, inputs=[chat_history_state, sources, message, current_topic], outputs=[chat_history_state, sources, message, current_topic]).\
-    then(lambda: None, None, chatbot, queue=False)
-    # Load in a pdf
-    load_pdf_click = load_pdf.click(ing.parse_file, inputs=[in_pdf], outputs=[ingest_text, current_source]).\
-             then(ing.text_to_docs, inputs=[ingest_text], outputs=[ingest_docs]).\
-             then(docs_to_faiss_save, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
-             then(chatf.hide_block, outputs = [examples_set])
-    # Load in a webpage
-    load_web_click = load_web.click(ing.parse_html, inputs=[in_web, in_div], outputs=[ingest_text, ingest_metadata, current_source]).\
-             then(ing.html_text_to_docs, inputs=[ingest_text, ingest_metadata], outputs=[ingest_docs]).\
-             then(docs_to_faiss_save, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
-             then(chatf.hide_block, outputs = [examples_set])
-    # Load in a csv/excel file
-    load_csv_click = load_csv.click(ing.parse_csv_or_excel, inputs=[in_csv, in_text_column], outputs=[ingest_text, current_source]).\
-             then(ing.csv_excel_text_to_docs, inputs=[ingest_text, in_text_column], outputs=[ingest_docs]).\
-             then(docs_to_faiss_save, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
-             then(chatf.hide_block, outputs = [examples_set])
-    # Load in a webpage
-    # Click/enter to send message action
-    response_click = submit.click(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state, model_type_state, out_passages], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False, api_name="retrieval").\
-                then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
-                then(chatf.produce_streaming_answer_chatbot, inputs=[chatbot, instruction_prompt_out, model_type_state, temp_slide], outputs=chatbot)
-    response_click.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
-                then(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
-                then(lambda: chatf.restore_interactivity(), None, [message], queue=False)
-    response_enter = message.submit(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state, model_type_state, out_passages], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False).\
-                then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
-                then(chatf.produce_streaming_answer_chatbot, [chatbot, instruction_prompt_out, model_type_state, temp_slide], chatbot)
-    response_enter.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
-                then(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
-                then(lambda: chatf.restore_interactivity(), None, [message], queue=False)
-    # Stop box
-    stop.click(fn=None, inputs=None, outputs=None, cancels=[response_click, response_enter])
-    # Clear box
-    clear.click(chatf.clear_chat, inputs=[chat_history_state, sources, message, current_topic], outputs=[chat_history_state, sources, message, current_topic])
-    clear.click(lambda: None, None, chatbot, queue=False)
-    # Thumbs up or thumbs down voting function
-    chatbot.like(chatf.vote, [chat_history_state, instruction_prompt_out, model_type_state], None)
-block.queue().launch(debug=True)

chatfuncs/ingest_borough_plan.py DELETED Viewed

@@ -1,14 +0,0 @@
-import ingest as ing
-borough_plan_text, file_names = ing.parse_file([open("Lambeth_2030-Our_Future_Our_Lambeth.pdf")])
-print("Borough plan text created")
-print(borough_plan_text)
-borough_plan_docs = ing.text_to_docs(borough_plan_text)
-print("Borough plan docs created")
-embedding_model =  "BAAI/bge-base-en-v1.5" # "mixedbread-ai/mxbai-embed-xsmall-v1" #
-embeddings = ing.load_embeddings(model_name = embedding_model)
-ing.embed_faiss_save_to_zip(borough_plan_docs, save_to="faiss_embedding", model_name = embedding_model)

faiss_embedding/faiss_embedding.zip CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:35bdbf12bc8094c1a4460f790dcd3cd78b4af883fb08f0d10afec7bcfa9eff5a
-size 421142

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca011e3baf4b92201d378f35ab5f2fe0b6d16ac2eaac4f0705b8c4e84e24a6ae
+size 243109

ingest_borough_plan.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from tools.ingest import parse_file, text_to_docs, load_embeddings_model, embed_faiss_save_to_zip
+borough_plan_text, file_names = parse_file([open("Lambeth_2030-Our_Future_Our_Lambeth.pdf")])
+print("Borough plan text created")
+#print(borough_plan_text)
+borough_plan_docs = text_to_docs(borough_plan_text)
+print("Borough plan docs created")
+embedding_model =  "mixedbread-ai/mxbai-embed-xsmall-v1" # "mixedbread-ai/mxbai-embed-xsmall-v1" #
+embeddings = load_embeddings_model(embeddings_model = embedding_model)
+embed_faiss_save_to_zip(borough_plan_docs, save_folder="borough_plan", embeddings_model_object= embeddings, save_to="faiss_embedding", model_name = embedding_model)

requirements.txt CHANGED Viewed

@@ -7,7 +7,8 @@ pandas==2.2.3
 transformers==4.51.3
 # For Windows https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-#cp311-win_amd64.whl -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
 llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu # For linux if dependencies for below build command are not available in the environment
-#llama-cpp-python==0.3.8 -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
 torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu
 sentence_transformers==4.1.0
 faiss-cpu==1.10.0

 transformers==4.51.3
 # For Windows https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-#cp311-win_amd64.whl -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
 llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu # For linux if dependencies for below build command are not available in the environment
+#llama-cpp-python==0.3.9 -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS" # CPU
+#llama-cpp-python==0.3.9 -C cmake.args="-DGGML_CUDA=on" # With CUDA
 torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu
 sentence_transformers==4.1.0
 faiss-cpu==1.10.0

test/test_module.py CHANGED Viewed

@@ -16,8 +16,8 @@
 # +
 import pytest
 import gradio as gr
-from ..chatfuncs.ingest import *
-from ..chatfuncs.chatfuncs import *
 def test_read_docx():
     content = read_docx('sample.docx')

 # +
 import pytest
 import gradio as gr
+from ..tools.ingest import *
+from ..tools.chatfuncs import *
 def test_read_docx():
     content = read_docx('sample.docx')

{chatfuncs → tools}/__init__.py RENAMED Viewed

File without changes

{chatfuncs → tools}/auth.py RENAMED Viewed

@@ -4,7 +4,7 @@ import boto3
 import hmac
 import hashlib
 import base64
-from chatfuncs.config import AWS_CLIENT_ID, AWS_CLIENT_SECRET, AWS_USER_POOL_ID, AWS_REGION
 def calculate_secret_hash(client_id:str, client_secret:str, username:str):
     message = username + client_id

 import hmac
 import hashlib
 import base64
+from tools.config import AWS_CLIENT_ID, AWS_CLIENT_SECRET, AWS_USER_POOL_ID, AWS_REGION
 def calculate_secret_hash(client_id:str, client_secret:str, username:str):
     message = username + client_id

{chatfuncs → tools}/aws_functions.py RENAMED Viewed

@@ -2,7 +2,7 @@ from typing import Type, List
 import pandas as pd
 import boto3
 import os
-from chatfuncs.config import AWS_REGION, RUN_AWS_FUNCTIONS, QA_CHATBOT_BUCKET
 PandasDataFrame = Type[pd.DataFrame]

 import pandas as pd
 import boto3
 import os
+from tools.config import AWS_REGION, RUN_AWS_FUNCTIONS, QA_CHATBOT_BUCKET
 PandasDataFrame = Type[pd.DataFrame]

{chatfuncs → tools}/chatfuncs.py RENAMED Viewed

@@ -14,6 +14,7 @@ from nltk.corpus import stopwords
 from nltk.tokenize import RegexpTokenizer
 from nltk.stem import WordNetLemmatizer
 from keybert import KeyBERT
 # For Name Entity Recognition model
 #from span_marker import SpanMarkerModel # Not currently used
@@ -32,9 +33,9 @@ from langchain_community.retrievers import SVMRetriever
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.docstore.document import Document
-from chatfuncs.prompts import instruction_prompt_template_alpaca, instruction_prompt_mistral_orca, instruction_prompt_phi3, instruction_prompt_llama3, instruction_prompt_qwen, instruction_prompt_template_orca, instruction_prompt_gemma, instruction_prompt_template_gemini_aws
-from chatfuncs.model_load import temperature, max_new_tokens, sample, repetition_penalty, top_p, top_k, torch_device, CtransGenGenerationConfig, max_tokens
-from chatfuncs.config import GEMINI_API_KEY, AWS_DEFAULT_REGION, LARGE_MODEL_NAME, SMALL_MODEL_NAME, RUN_AWS_FUNCTIONS, FEEDBACK_LOGS_FOLDER
 model_object = [] # Define empty list for model functions to run
 tokenizer = [] # Define empty list for model functions to run
@@ -75,51 +76,6 @@ ner_model = []#SpanMarkerModel.from_pretrained("tomaarsen/span-marker-mbert-base
 # Used to pull out keywords from chat history to add to user queries behind the scenes
 kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
-# Vectorstore funcs
-# def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
-#     print(f"> Total split documents: {len(docs_out)}")
-#     vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
-#     '''
-#     #with open("vectorstore.pkl", "wb") as f:
-#         #pickle.dump(vectorstore, f)
-#     '''
-#     #if Path(save_to).exists():
-#     #    vectorstore_func.save_local(folder_path=save_to)
-#     #else:
-#     #    os.mkdir(save_to)
-#     #    vectorstore_func.save_local(folder_path=save_to)
-#     global vectorstore
-#     vectorstore = vectorstore_func
-#     out_message = "Document processing complete"
-#     #print(out_message)
-#     #print(f"> Saved to: {save_to}")
-#     return out_message
-# def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings_model=embeddings_model):
-#     print(f"> Total split documents: {len(docs_out)}")
-#     print(docs_out)
-#     vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings_model)
-#     vectorstore = vectorstore_func
-#     out_message = "Document processing complete"
-#     return out_message, vectorstore_func
-# Prompt functions
 def base_prompt_templates(model_type:str = SMALL_MODEL_NAME):
@@ -141,7 +97,6 @@ def base_prompt_templates(model_type:str = SMALL_MODEL_NAME):
         INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_phi3, input_variables=['question', 'summaries'])
     else:
         INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_gemini_aws, input_variables=['question', 'summaries'])
     return INSTRUCTION_PROMPT, CONTENT_PROMPT
@@ -149,14 +104,44 @@ def write_out_metadata_as_string(metadata_in:str):
     metadata_string = [f"{'  '.join(f'{k}: {v}' for k, v in d.items() if k != 'page_section')}" for d in metadata_in] # ['metadata']
     return metadata_string
-def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt:str, content_prompt:str, extracted_memory:list, vectorstore:object, embeddings:object, relevant_flag:bool = True, out_passages:int = 2, total_output_passage_chunks_size:int=5): # ,
     question =  inputs["question"]
     chat_history = inputs["chat_history"]
     if relevant_flag == True:
         new_question_kworded = adapt_q_from_chat_history(question, chat_history, extracted_memory) # new_question_keywords,
-        docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val = 25, out_passages = out_passages, vec_score_cut_off = 0.85, vec_weight = 1, bm25_weight = 1, svm_weight = 1)
     else:
         new_question_kworded = question
         doc_df = pd.DataFrame()
@@ -164,7 +149,7 @@ def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt:str, con
         docs_keep_out = []
     if (not docs_keep_as_doc) | (doc_df.empty):
-        sorry_prompt = """Say 'Sorry, there is no relevant information to answer this question.'"""
         return sorry_prompt, "No relevant sources found.", new_question_kworded
     # Expand the found passages to the neighbouring context
@@ -198,7 +183,7 @@ def create_full_prompt(user_input:str,
                        history:list[dict],
                        extracted_memory:str,
                        vectorstore:object,
-                       embeddings:object,
                        model_type:str,
                        out_passages:list[str],
                        api_key:str="",
@@ -213,7 +198,7 @@ def create_full_prompt(user_input:str,
     print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
     history = history or []
     # Create instruction prompt
     instruction_prompt, content_prompt = base_prompt_templates(model_type=model_type)
@@ -225,7 +210,7 @@ def create_full_prompt(user_input:str,
     instruction_prompt_out, docs_content_string, new_question_kworded =\
                 generate_expanded_prompt({"question": user_input, "chat_history": history}, #vectorstore,
-                                    instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings, relevant_flag, out_passages)
     history.append({"metadata":None, "options":None, "role": 'user', "content": user_input})
@@ -259,8 +244,6 @@ def call_aws_claude(prompt: str, system_prompt: str, temperature: float, max_tok
         ],
     }
-    print("prompt_config:", prompt_config)
     body = json.dumps(prompt_config)
     modelId = model_choice
@@ -367,8 +350,6 @@ def send_request(prompt: str, conversation_history: List[dict], model: object, c
     elif "claude" in model_choice:
         try:
             print("Calling AWS Claude model")
-            print("prompt:", prompt)
-            print("system_prompt:", system_prompt)
             response = call_aws_claude(prompt, system_prompt, temperature, max_tokens, model_choice)
         except Exception as e:
             # If fails, try again after x seconds in case there is a throttle limit
@@ -420,9 +401,8 @@ def process_requests(prompts: List[str], system_prompt_with_table: str, conversa
     response, conversation_history = send_request(prompts[0], conversation_history, model=model, config=config, model_choice=model_choice, system_prompt=system_prompt_with_table, temperature=temperature)
-    print(response.text)
-    #"Okay, I'm ready. What source are we discussing, and what's your question about it? Please provide as much context as possible so I can give you the best answer."]
-    print(response.usage_metadata)
     responses.append(response)
     # Create conversation txt object
@@ -464,8 +444,6 @@ def produce_streaming_answer_chatbot(
     history = chat_history
-    print("history at start of streaming function:", history)
     if relevant_query_bool == False:
         history.append({"metadata":None, "options":None, "role": "assistant", "content": 'No relevant query found. Please retry your question'})
@@ -557,8 +535,6 @@ def produce_streaming_answer_chatbot(
     elif "claude" in model_type:
         system_prompt = "You are answering questions from the user based on source material. Make sure to fully answer the questions with all required detail."
-        print("full_prompt:", full_prompt)
         if isinstance(full_prompt, str):
             full_prompt = [full_prompt]
@@ -622,7 +598,7 @@ def produce_streaming_answer_chatbot(
             history[-1]['content'] += char
             yield history
-        print("history at end of function:", history)
 # Chat helper functions
@@ -691,164 +667,188 @@ def create_doc_df(docs_keep_out):
             return doc_df
-def hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val, out_passages,
-                           vec_score_cut_off, vec_weight, bm25_weight, svm_weight): # ,vectorstore, embeddings
-            #vectorstore=globals()["vectorstore"]
-            #embeddings=globals()["embeddings"]
-            doc_df = pd.DataFrame()
-            docs = vectorstore.similarity_search_with_score(new_question_kworded, k=k_val)
-            # Keep only documents with a certain score
-            docs_len = [len(x[0].page_content) for x in docs]
-            docs_scores = [x[1] for x in docs]
-            # Only keep sources that are sufficiently relevant (i.e. similarity search score below threshold below)
-            score_more_limit = pd.Series(docs_scores) < vec_score_cut_off
-            docs_keep = list(compress(docs, score_more_limit))
-            if not docs_keep:
-                return [], pd.DataFrame(), []
-            # Only keep sources that are at least 100 characters long
-            length_more_limit = pd.Series(docs_len) >= 100
-            docs_keep = list(compress(docs_keep, length_more_limit))
-            if not docs_keep:
-                return [], pd.DataFrame(), []
-            docs_keep_as_doc = [x[0] for x in docs_keep]
-            docs_keep_length = len(docs_keep_as_doc)
-            if docs_keep_length == 1:
-                content=[]
-                meta_url=[]
-                score=[]
-                for item in docs_keep:
-                    content.append(item[0].page_content)
-                    meta_url.append(item[0].metadata['source'])
-                    score.append(item[1])
-                # Create df from 'winning' passages
-                doc_df = pd.DataFrame(list(zip(content, meta_url, score)),
-                columns =['page_content', 'meta_url', 'score'])
-                docs_content = doc_df['page_content'].astype(str)
-                docs_url = doc_df['meta_url']
-                return docs_keep_as_doc, doc_df, docs_content, docs_url
-            # Check for if more docs are removed than the desired output
-            if out_passages > docs_keep_length:
-                out_passages = docs_keep_length
-                k_val = docs_keep_length
-            vec_rank = [*range(1, docs_keep_length+1)]
-            vec_score = [(docs_keep_length/x)*vec_weight for x in vec_rank]
-            print("Number of documents remaining: ", docs_keep_length)
-            # 2nd level check using BM25s package to do keyword search on retrieved passages.
-            content_keep=[]
-            for item in docs_keep:
-                content_keep.append(item[0].page_content)
-            # Prepare Corpus (Tokenized & Optional Stemming)
-            corpus = [doc.lower() for doc in content_keep]
-            #stemmer = SnowballStemmer("english", ignore_stopwords=True)  # NLTK stemming not compatible
-            stemmer = Stemmer.Stemmer("english")
-            corpus_tokens = bm25s.tokenize(corpus, stopwords="en", stemmer=stemmer)
-            # Create and Index with BM25s
-            retriever = bm25s.BM25()
-            retriever.index(corpus_tokens)
-            # Query Processing (Stemming applied consistently if used above)
-            query_tokens = bm25s.tokenize(new_question_kworded.lower(), stemmer=stemmer)
-            results, scores = retriever.retrieve(query_tokens, corpus=corpus, k=len(corpus)) # Retrieve all docs
-            for i in range(results.shape[1]):
-                doc, score = results[0, i], scores[0, i]
-                print(f"Rank {i+1} (score: {score:.2f}): {doc}")
-            #print("BM25 results:", results)
-            #print("BM25 scores:", scores)
-            # Rank Calculation (Custom Logic for Your BM25 Score)
-            bm25_rank = list(range(1, len(results[0]) + 1))
-            #bm25_rank = results[0]#.tolist()[0]  # Since you have a single query
-            bm25_score = [(docs_keep_length / (rank + 1)) * bm25_weight for rank in bm25_rank]
-            # +1 to avoid division by 0 for rank 0
-            # Result Ordering (Using the calculated ranks)
-            pairs = list(zip(bm25_rank, docs_keep_as_doc))
-            pairs.sort()
-            bm25_result = [value for rank, value in pairs]
-            # 3rd level check on retrieved docs with SVM retriever
-            # Check the type of the embeddings object
-            embeddings_type = type(embeddings)
-            #hf_embeddings = HuggingFaceEmbeddings(**embeddings)
-            hf_embeddings = embeddings
-            svm_retriever = SVMRetriever.from_texts(content_keep, hf_embeddings, k = k_val)
-            svm_result = svm_retriever.invoke(new_question_kworded)
-            svm_rank=[]
-            svm_score = []
-            for vec_item in docs_keep:
-                x = 0
-                for svm_item in svm_result:
-                    x = x + 1
-                    if svm_item.page_content == vec_item[0].page_content:
-                        svm_rank.append(x)
-                        svm_score.append((docs_keep_length/x)*svm_weight)
-            ## Calculate final score based on three ranking methods
-            final_score = [a  + b + c for a, b, c in zip(vec_score, bm25_score, svm_score)]
-            final_rank = [sorted(final_score, reverse=True).index(x)+1 for x in final_score]
-            # Force final_rank to increment by 1 each time
-            final_rank = list(pd.Series(final_rank).rank(method='first'))
-            #print("final rank: " + str(final_rank))
-            #print("out_passages: " + str(out_passages))
-            best_rank_index_pos = []
-            for x in range(1,out_passages+1):
-                try:
-                    best_rank_index_pos.append(final_rank.index(x))
-                except IndexError: # catch the error
-                    pass
-            # Adjust best_rank_index_pos to
-            best_rank_pos_series = pd.Series(best_rank_index_pos)
-            docs_keep_out = [docs_keep[i] for i in best_rank_index_pos]
-            # Keep only 'best' options
-            docs_keep_as_doc = [x[0] for x in docs_keep_out]
-            # Make df of best options
-            doc_df = create_doc_df(docs_keep_out)
-            return docs_keep_as_doc, doc_df, docs_keep_out
 def get_expanded_passages(vectorstore, docs, width):

 from nltk.tokenize import RegexpTokenizer
 from nltk.stem import WordNetLemmatizer
 from keybert import KeyBERT
+from langchain_huggingface.embeddings import HuggingFaceEmbeddings
 # For Name Entity Recognition model
 #from span_marker import SpanMarkerModel # Not currently used
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.docstore.document import Document
+from tools.prompts import instruction_prompt_template_alpaca, instruction_prompt_mistral_orca, instruction_prompt_phi3, instruction_prompt_llama3, instruction_prompt_qwen, instruction_prompt_template_orca, instruction_prompt_gemma, instruction_prompt_template_gemini_aws
+from tools.model_load import temperature, max_new_tokens, sample, repetition_penalty, top_p, top_k, torch_device, CtransGenGenerationConfig, max_tokens
+from tools.config import GEMINI_API_KEY, AWS_DEFAULT_REGION, LARGE_MODEL_NAME, SMALL_MODEL_NAME, RUN_AWS_FUNCTIONS, FEEDBACK_LOGS_FOLDER
 model_object = [] # Define empty list for model functions to run
 tokenizer = [] # Define empty list for model functions to run
 # Used to pull out keywords from chat history to add to user queries behind the scenes
 kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
 def base_prompt_templates(model_type:str = SMALL_MODEL_NAME):
         INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_phi3, input_variables=['question', 'summaries'])
     else:
         INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_gemini_aws, input_variables=['question', 'summaries'])
     return INSTRUCTION_PROMPT, CONTENT_PROMPT
     metadata_string = [f"{'  '.join(f'{k}: {v}' for k, v in d.items() if k != 'page_section')}" for d in metadata_in] # ['metadata']
     return metadata_string
+def generate_expanded_prompt(
+    inputs: Dict[str, str],
+    instruction_prompt: str,
+    content_prompt: str,
+    extracted_memory: list,
+    vectorstore: object,
+    embeddings_model: object,
+    relevant_flag: bool = True,
+    out_passages: int = 2,
+    total_output_passage_chunks_size: int = 5
+):
+    """
+    Generate an expanded prompt for a language model by retrieving and formatting relevant document passages.
+    Args:
+        inputs (Dict[str, str]): Dictionary containing the user's question and chat history.
+        instruction_prompt (str): The instruction prompt template to use for the model.
+        content_prompt (str): The content prompt template for formatting passages.
+        extracted_memory (list): List of previous conversation memory or context.
+        vectorstore (object): The vector store object used for document retrieval.
+        embeddings_model (object): The embeddings model used for vector search.
+        relevant_flag (bool, optional): Whether to perform relevant document retrieval. Defaults to True.
+        out_passages (int, optional): Number of passages to retrieve. Defaults to 2.
+        total_output_passage_chunks_size (int, optional): Number of neighboring chunks to expand for context. Defaults to 5.
+    Returns:
+        tuple: (instruction_prompt_out, sources_docs_content_string, new_question_kworded)
+            instruction_prompt_out (str): The fully formatted instruction prompt for the model.
+            sources_docs_content_string (str): The formatted string of source passages and metadata for user display.
+            new_question_kworded (str): The (possibly keyword-adapted) user question.
+    """
     question =  inputs["question"]
     chat_history = inputs["chat_history"]
     if relevant_flag == True:
         new_question_kworded = adapt_q_from_chat_history(question, chat_history, extracted_memory) # new_question_keywords,
+        docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, vectorstore, embeddings_model, k_val = 25, out_passages = out_passages, vec_score_cut_off = 1, vec_weight = 1, bm25_weight = 1, svm_weight = 1)
     else:
         new_question_kworded = question
         doc_df = pd.DataFrame()
         docs_keep_out = []
     if (not docs_keep_as_doc) | (doc_df.empty):
+        sorry_prompt = """Respond 'Sorry, there is no relevant information to answer this question.'"""
         return sorry_prompt, "No relevant sources found.", new_question_kworded
     # Expand the found passages to the neighbouring context
                        history:list[dict],
                        extracted_memory:str,
                        vectorstore:object,
+                       embeddings_model:object,
                        model_type:str,
                        out_passages:list[str],
                        api_key:str="",
     print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
     history = history or []
     # Create instruction prompt
     instruction_prompt, content_prompt = base_prompt_templates(model_type=model_type)
     instruction_prompt_out, docs_content_string, new_question_kworded =\
                 generate_expanded_prompt({"question": user_input, "chat_history": history}, #vectorstore,
+                                    instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings_model, relevant_flag, out_passages)
     history.append({"metadata":None, "options":None, "role": 'user', "content": user_input})
         ],
     }
     body = json.dumps(prompt_config)
     modelId = model_choice
     elif "claude" in model_choice:
         try:
             print("Calling AWS Claude model")
             response = call_aws_claude(prompt, system_prompt, temperature, max_tokens, model_choice)
         except Exception as e:
             # If fails, try again after x seconds in case there is a throttle limit
     response, conversation_history = send_request(prompts[0], conversation_history, model=model, config=config, model_choice=model_choice, system_prompt=system_prompt_with_table, temperature=temperature)
+    #print(response.text)
+    #print(response.usage_metadata)
     responses.append(response)
     # Create conversation txt object
     history = chat_history
     if relevant_query_bool == False:
         history.append({"metadata":None, "options":None, "role": "assistant", "content": 'No relevant query found. Please retry your question'})
     elif "claude" in model_type:
         system_prompt = "You are answering questions from the user based on source material. Make sure to fully answer the questions with all required detail."
         if isinstance(full_prompt, str):
             full_prompt = [full_prompt]
             history[-1]['content'] += char
             yield history
+        #print("history at end of function:", history)
 # Chat helper functions
             return doc_df
+def hybrid_retrieval(
+    new_question_kworded: str,
+    vectorstore:FAISS,
+    embeddings_model:HuggingFaceEmbeddings,
+    k_val: int,
+    out_passages: int,
+    vec_score_cut_off: float,
+    vec_weight: float,
+    bm25_weight: float,
+    svm_weight: float
+) -> tuple:
+    """
+    Perform hybrid retrieval of relevant documents based on a query using vector similarity, BM25, and SVM weights.
+    Args:
+        new_question_kworded (str): The keyword-adapted user query.
+        vectorstore: The vectorstore object for similarity search.
+        embeddings_model: The embeddings model used for vector search.
+        k_val (int): Number of top documents to retrieve.
+        out_passages (int): Number of passages to output.
+        vec_score_cut_off (float): Similarity score threshold for filtering.
+        vec_weight (float): Weight for vector similarity.
+        bm25_weight (float): Weight for BM25 retrieval.
+        svm_weight (float): Weight for SVM retrieval.
+    Returns:
+        tuple: (docs_keep_as_doc, doc_df, docs_keep_out)
+            docs_keep_as_doc: List of kept document objects.
+            doc_df: DataFrame of kept documents and metadata.
+            docs_keep_out: List of kept (document, score) tuples.
+    """
+    doc_df = pd.DataFrame()
+    docs = vectorstore.similarity_search_with_score(new_question_kworded, k=k_val)
+    # Keep only documents with a certain score
+    docs_len = [len(x[0].page_content) for x in docs]
+    docs_scores = [x[1] for x in docs]
+    # Only keep sources that are sufficiently relevant (i.e. similarity search score above threshold below)
+    score_more_limit = pd.Series(docs_scores) > vec_score_cut_off
+    docs_keep = list(compress(docs, score_more_limit))
+    if not docs_keep:
+        return [], pd.DataFrame(), []
+    # Only keep sources that are at least 100 characters long
+    length_more_limit = pd.Series(docs_len) >= 100
+    docs_keep = list(compress(docs_keep, length_more_limit))
+    if not docs_keep:
+        return [], pd.DataFrame(), []
+    docs_keep_as_doc = [x[0] for x in docs_keep]
+    docs_keep_length = len(docs_keep_as_doc)
+    if docs_keep_length == 1:
+        content=[]
+        meta_url=[]
+        score=[]
+        for item in docs_keep:
+            content.append(item[0].page_content)
+            meta_url.append(item[0].metadata['source'])
+            score.append(item[1])
+        # Create df from 'winning' passages
+        doc_df = pd.DataFrame(list(zip(content, meta_url, score)),
+        columns =['page_content', 'meta_url', 'score'])
+        docs_content = doc_df['page_content'].astype(str)
+        docs_url = doc_df['meta_url']
+        return docs_keep_as_doc, doc_df, docs_content, docs_url
+    # Check for if more docs are removed than the desired output
+    if out_passages > docs_keep_length:
+        out_passages = docs_keep_length
+        k_val = docs_keep_length
+    vec_rank = [*range(1, docs_keep_length+1)]
+    vec_score = [(docs_keep_length/x)*vec_weight for x in vec_rank]
+    print("Number of documents remaining: ", docs_keep_length)
+    # 2nd level check using BM25s package to do keyword search on retrieved passages.
+    content_keep=[]
+    for item in docs_keep:
+        content_keep.append(item[0].page_content)
+    # Prepare Corpus (Tokenized & Optional Stemming)
+    corpus = [doc.lower() for doc in content_keep]
+    #stemmer = SnowballStemmer("english", ignore_stopwords=True)  # NLTK stemming not compatible
+    stemmer = Stemmer.Stemmer("english")
+    corpus_tokens = bm25s.tokenize(corpus, stopwords="en", stemmer=stemmer)
+    # Create and Index with BM25s
+    retriever = bm25s.BM25()
+    retriever.index(corpus_tokens)
+    # Query Processing (Stemming applied consistently if used above)
+    query_tokens = bm25s.tokenize(new_question_kworded.lower(), stemmer=stemmer)
+    results, scores = retriever.retrieve(query_tokens, corpus=corpus, k=len(corpus)) # Retrieve all docs
+    for i in range(results.shape[1]):
+        doc, score = results[0, i], scores[0, i]
+        print(f"Rank {i+1} (score: {score:.2f}): {doc}")
+    #print("BM25 results:", results)
+    #print("BM25 scores:", scores)
+    # Rank Calculation (Custom Logic for Your BM25 Score)
+    bm25_rank = list(range(1, len(results[0]) + 1))
+    #bm25_rank = results[0]#.tolist()[0]  # Since you have a single query
+    bm25_score = [(docs_keep_length / (rank + 1)) * bm25_weight for rank in bm25_rank]
+    # +1 to avoid division by 0 for rank 0
+    # Result Ordering (Using the calculated ranks)
+    pairs = list(zip(bm25_rank, docs_keep_as_doc))
+    pairs.sort()
+    bm25_result = [value for rank, value in pairs]
+    # 3rd level check on retrieved docs with SVM retriever
+    # Check the type of the embeddings_model object
+    embeddings_type = type(embeddings_model)
+    #hf_embeddings = HuggingFaceEmbeddings(**embeddings)
+    hf_embeddings = embeddings_model
+    svm_retriever = SVMRetriever.from_texts(content_keep, hf_embeddings, k = k_val)
+    svm_result = svm_retriever.invoke(new_question_kworded)
+    svm_rank=[]
+    svm_score = []
+    for vec_item in docs_keep:
+        x = 0
+        for svm_item in svm_result:
+            x = x + 1
+            if svm_item.page_content == vec_item[0].page_content:
+                svm_rank.append(x)
+                svm_score.append((docs_keep_length/x)*svm_weight)
+    ## Calculate final score based on three ranking methods
+    final_score = [a  + b + c for a, b, c in zip(vec_score, bm25_score, svm_score)]
+    final_rank = [sorted(final_score, reverse=True).index(x)+1 for x in final_score]
+    # Force final_rank to increment by 1 each time
+    final_rank = list(pd.Series(final_rank).rank(method='first'))
+    #print("final rank: " + str(final_rank))
+    #print("out_passages: " + str(out_passages))
+    best_rank_index_pos = []
+    for x in range(1,out_passages+1):
+        try:
+            best_rank_index_pos.append(final_rank.index(x))
+        except IndexError: # catch the error
+            pass
+    # Adjust best_rank_index_pos to
+    best_rank_pos_series = pd.Series(best_rank_index_pos)
+    docs_keep_out = [docs_keep[i] for i in best_rank_index_pos]
+    # Keep only 'best' options
+    docs_keep_as_doc = [x[0] for x in docs_keep_out]
+    # Make df of best options
+    doc_df = create_doc_df(docs_keep_out)
+    return docs_keep_as_doc, doc_df, docs_keep_out
 def get_expanded_passages(vectorstore, docs, width):

{chatfuncs → tools}/config.py RENAMED Viewed

@@ -200,17 +200,18 @@ if LOAD_LARGE_MODEL == "1":
     default_model_choices.append(LARGE_MODEL_NAME)
 if RUN_AWS_FUNCTIONS == "1":
-    default_model_choices.extend(["anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0"])
 if RUN_GEMINI_MODELS == "1":
-    default_model_choices.extend(["gemini-2.0-flash-001", "gemini-2.5-flash-preview-04-17", "models/gemini-2.5-pro-exp-03-25"])
 DEFAULT_MODEL_CHOICES = get_or_create_env_var("DEFAULT_MODEL_CHOICES", str(default_model_choices))
-EMBEDDINGS_MODEL_NAME = get_or_create_env_var('EMBEDDINGS_MODEL_NAME', "BAAI/bge-base-en-v1.5") #"mixedbread-ai/mxbai-embed-xsmall-v1"
-DEFAULT_EMBEDDINGS_LOCATION = get_or_create_env_var('DEFAULT_EMBEDDINGS_LOCATION', "faiss_embedding")
 DEFAULT_DATA_SOURCE_NAME = get_or_create_env_var('DEFAULT_DATA_SOURCE_NAME', "Document redaction app documentation")

     default_model_choices.append(LARGE_MODEL_NAME)
 if RUN_AWS_FUNCTIONS == "1":
+    default_model_choices.extend(["anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-7-sonnet-20250219-v1:0"])
 if RUN_GEMINI_MODELS == "1":
+    GEMINI_MODELS = ["gemini-2.5-flash-lite", "gemini-2.5-flash", "gemini-2.5-pro"]
+    default_model_choices.extend(GEMINI_MODELS)
 DEFAULT_MODEL_CHOICES = get_or_create_env_var("DEFAULT_MODEL_CHOICES", str(default_model_choices))
+EMBEDDINGS_MODEL_NAME = get_or_create_env_var('EMBEDDINGS_MODEL_NAME', "mixedbread-ai/mxbai-embed-xsmall-v1") #"mixedbread-ai/mxbai-embed-xsmall-v1"
+DEFAULT_EMBEDDINGS_LOCATION = get_or_create_env_var('DEFAULT_EMBEDDINGS_LOCATION', "faiss_embedding/faiss_embedding.zip")
 DEFAULT_DATA_SOURCE_NAME = get_or_create_env_var('DEFAULT_DATA_SOURCE_NAME', "Document redaction app documentation")

{chatfuncs → tools}/helper_functions.py RENAMED Viewed

@@ -3,7 +3,7 @@ import gradio as gr
 import pandas as pd
 import boto3
 from botocore.exceptions import ClientError
-from chatfuncs.config import CUSTOM_HEADER_VALUE, CUSTOM_HEADER, OUTPUT_FOLDER, INPUT_FOLDER, SESSION_OUTPUT_FOLDER, AWS_USER_POOL_ID
 def get_or_create_env_var(var_name, default_value):
     # Get the environment variable if it exists

 import pandas as pd
 import boto3
 from botocore.exceptions import ClientError
+from tools.config import CUSTOM_HEADER_VALUE, CUSTOM_HEADER, OUTPUT_FOLDER, INPUT_FOLDER, SESSION_OUTPUT_FOLDER, AWS_USER_POOL_ID
 def get_or_create_env_var(var_name, default_value):
     # Get the environment variable if it exists

{chatfuncs → tools}/ingest.py RENAMED Viewed

@@ -6,19 +6,31 @@ import re
 import requests
 import pandas as pd
 import dateutil.parser
-from typing import Type, List
 import shutil
 #from langchain_community.embeddings import HuggingFaceEmbeddings # HuggingFaceInstructEmbeddings,
 from langchain_community.vectorstores.faiss import FAISS
 #from langchain_community.vectorstores import Chroma
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.docstore.document import Document
 #from chatfuncs.config import EMBEDDINGS_MODEL_NAME
 from bs4 import BeautifulSoup
 from docx import Document as Doc
 from pypdf import PdfReader
 PandasDataFrame = Type[pd.DataFrame]
@@ -558,22 +570,130 @@ def docs_elements_from_csv_save(docs_path="documents.csv"):
 # ## Create embeddings and save faiss vector store to the path specified in `save_to`
-# def load_embeddings_model(embeddings_model = EMBEDDINGS_MODEL_NAME):
-#     embeddings_func = HuggingFaceEmbeddings(model_name=embeddings_model)
-#     #global embeddings
-#     #embeddings = embeddings_func
-#     return embeddings_func
-def embed_faiss_save_to_zip(docs_out, save_folder, embeddings_model_object, save_to="faiss_embeddings", model_name="BAAI/bge-base-en-v1.5"):
-    #load_embeddings(model_name=model_name)
     print(f"> Total split documents: {len(docs_out)}")
-    vectorstore = FAISS.from_documents(documents=docs_out, embedding=embeddings_model_object)
     save_to_path = Path(save_folder, save_to)
     save_to_path.mkdir(parents=True, exist_ok=True)
@@ -603,14 +723,68 @@ def embed_faiss_save_to_zip(docs_out, save_folder, embeddings_model_object, save
     index_faiss.unlink(missing_ok=True)
     index_pkl.unlink(missing_ok=True)
-    # Move ZIP inside the folder for easier reference
-    #final_zip_path = save_to_path.with_suffix('.zip')
     print("> Archive complete")
     print(f"> Final ZIP path: {final_zip_path}")
-    return "Document processing complete", vectorstore, final_zip_path
 # def sim_search_local_saved_vec(query, k_val, save_to="faiss_lambeth_census_embedding"):

 import requests
 import pandas as pd
 import dateutil.parser
+from typing import Type, List, Tuple
 import shutil
+import numpy as np
+import gradio as gr
+import zipfile
+import tempfile
+from pathlib import Path
+from langchain_huggingface.embeddings import HuggingFaceEmbeddings
 #from langchain_community.embeddings import HuggingFaceEmbeddings # HuggingFaceInstructEmbeddings,
 from langchain_community.vectorstores.faiss import FAISS
 #from langchain_community.vectorstores import Chroma
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.docstore.document import Document
 #from chatfuncs.config import EMBEDDINGS_MODEL_NAME
+from langchain_core.embeddings import Embeddings # Import Embeddings for type hinting
+from tqdm import tqdm
+from langchain_community.docstore.in_memory import InMemoryDocstore # To manually build the docstore
+from uuid import uuid4 # To generate unique IDs for documents in the docstore
 from bs4 import BeautifulSoup
 from docx import Document as Doc
 from pypdf import PdfReader
+import faiss # For directly creating the FAISS index
+from tools.config import EMBEDDINGS_MODEL_NAME
 PandasDataFrame = Type[pd.DataFrame]
 # ## Create embeddings and save faiss vector store to the path specified in `save_to`
+def load_embeddings_model(embeddings_model = EMBEDDINGS_MODEL_NAME):
+    embeddings_func = HuggingFaceEmbeddings(model_name=embeddings_model)
+    #global embeddings
+    #embeddings = embeddings_func
+    return embeddings_func
+# def embed_faiss_save_to_zip(docs_out, save_folder, embeddings_model_object, save_to="faiss_embeddings", model_name="mixedbread-ai/mxbai-embed-xsmall-v1"):
+#     print(f"> Total split documents: {len(docs_out)}")
+#     vectorstore = FAISS.from_documents(documents=docs_out, embedding=embeddings_model_object)
+#     save_to_path = Path(save_folder, save_to)
+#     save_to_path.mkdir(parents=True, exist_ok=True)
+#     vectorstore.save_local(folder_path=str(save_to_path))
+#     print("> FAISS index saved")
+#     print(f"> Saved to: {save_to}")
+#     # Ensure files are written before archiving
+#     index_faiss = save_to_path / "index.faiss"
+#     index_pkl = save_to_path / "index.pkl"
+#     if not index_faiss.exists() or not index_pkl.exists():
+#         raise FileNotFoundError("Expected FAISS index files not found before zipping.")
+#     # Flush file system writes by forcing a sync (works best on Unix)
+#     try:
+#         os.sync()
+#     except AttributeError:
+#         pass  # os.sync() not available on Windows
+#     # Create ZIP archive
+#     final_zip_path = shutil.make_archive(str(save_to_path), 'zip', root_dir=str(save_to_path))
+#     # Remove individual index files to avoid leaking large raw files
+#     index_faiss.unlink(missing_ok=True)
+#     index_pkl.unlink(missing_ok=True)
+#     # Move ZIP inside the folder for easier reference
+#     #final_zip_path = save_to_path.with_suffix('.zip')
+#     print("> Archive complete")
+#     print(f"> Final ZIP path: {final_zip_path}")
+#     return "Document processing complete", vectorstore, final_zip_path
+def embed_faiss_save_to_zip(
+    docs_out: List[Document],
+    save_folder: str,
+    embeddings_model_object: Embeddings, # Type hint for clarity
+    save_to: str = "faiss_embeddings",
+    model_name: str = "mixedbread-ai/mxbai-embed-xsmall-v1", # This is a descriptive name, not directly used in FAISS build
+    progress: gr.Progress = gr.Progress(track_tqdm=True)
+) -> Tuple[str, FAISS, Path]:
     print(f"> Total split documents: {len(docs_out)}")
+    # --- Progress Bar Integration Starts Here ---
+    print("Starting embedding generation and FAISS index construction...")
+    texts = []
+    metadatas = []
+    vectors = []
+    docstore = InMemoryDocstore()
+    index_to_docstore_id = {} # Maps FAISS index position to docstore ID
+    if not docs_out:
+        print("No documents provided. Skipping FAISS index creation.")
+        return "No documents to process", None, None # Or handle as an error
+    # 1. Generate Embeddings and Populate Data Structures with tqdm
+    # Wrap the iteration over docs_out with tqdm for a progress bar
+    for i, doc in tqdm(enumerate(docs_out), desc="Generating Embeddings", total=len(docs_out)):
+        # Store text and metadata
+        texts.append(doc.page_content)
+        metadatas.append(doc.metadata)
+        # Generate embedding for the current document
+        # embeddings_model_object.embed_documents expects a list of strings
+        # and returns a list of lists (embeddings). We take the first element.
+        vector = embeddings_model_object.embed_documents([doc.page_content])[0]
+        vectors.append(vector)
+        # Populate the internal docstore that FAISS uses
+        doc_id = str(uuid4()) # Generate a unique ID for each document
+        docstore.add({doc_id: doc}) # Add the full Document object to the docstore
+        index_to_docstore_id[i] = doc_id # Map FAISS index position (i) to its doc_id
+    print("\nEmbedding generation complete. Building FAISS index...")
+    # 2. Build the Raw FAISS Index
+    # Ensure all embeddings are numpy float32, which FAISS expects.
+    # BGE models (like bge-base-en-v1.5) typically produce L2-normalized embeddings,
+    # which are ideal for Inner Product (IP) similarity, equivalent to cosine similarity.
+    # If your model *does not* output normalized vectors and you want cosine similarity,
+    # you must normalize them here: `np.array([v / np.linalg.norm(v) for v in vectors]).astype("float32")`
+    # Otherwise, you might use IndexFlatL2 for Euclidean distance.
+    # For common embedding models and cosine similarity, `IndexFlatIP` with pre-normalized vectors is standard.
+    embeddings_np = np.array(vectors).astype("float32")
+    embedding_dimension = embeddings_np.shape[1]
+    # Create a raw FAISS index (e.g., IndexFlatIP for cosine similarity)
+    raw_faiss_index = faiss.IndexFlatIP(embedding_dimension)
+    raw_faiss_index.add(embeddings_np) # Add all vectors to the raw FAISS index
+    # 3. Create the LangChain FAISS Vectorstore from the components
+    # The `embedding_function` is used for subsequent queries to the vectorstore,
+    # not for building the initial index here (as we've already done that).
+    vectorstore = FAISS(
+        embedding_function=embeddings_model_object.embed_query,
+        index=raw_faiss_index,
+        docstore=docstore,
+        index_to_docstore_id=index_to_docstore_id
+        # distance_strategy defaults to COSINE, which is appropriate for IndexFlatIP
+    )
+    # --- Progress Bar Integration Ends Here ---
     save_to_path = Path(save_folder, save_to)
     save_to_path.mkdir(parents=True, exist_ok=True)
     index_faiss.unlink(missing_ok=True)
     index_pkl.unlink(missing_ok=True)
     print("> Archive complete")
     print(f"> Final ZIP path: {final_zip_path}")
+    return "Document processing complete", vectorstore, final_zip_path # Return Path object for consistency
+def get_faiss_store(zip_file_path: str, embeddings_model: Embeddings) -> FAISS:
+    """
+    Loads a FAISS vector store from a ZIP archive.
+    Args:
+        zip_file_path: The string path pointing to the .zip archive containing
+                       index.faiss and index.pkl. This should be the
+                       final_zip_path returned by embed_faiss_save_to_zip.
+        embeddings_model: The embeddings model object (e.g., OpenAIEmbeddings, HuggingFaceEmbeddings)
+                          used to create the index. This is crucial for proper deserialization.
+    Returns:
+        A FAISS vector store object.
+    """
+    zip_file_path = Path(zip_file_path)
+    if not zip_file_path.exists():
+        raise FileNotFoundError(f"ZIP archive not found at: {zip_file_path}")
+    if not zip_file_path.suffix == '.zip':
+        raise ValueError(f"Expected a .zip file, but got: {zip_file_path}")
+    # Create a temporary directory to extract the FAISS index files
+    # tempfile.TemporaryDirectory() handles cleanup automatically when the 'with' block exits.
+    with tempfile.TemporaryDirectory() as temp_dir_str:
+        temp_extract_path = Path(temp_dir_str)
+        print(f"> Extracting {zip_file_path} to temporary directory: {temp_extract_path}")
+        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
+            # The zip file contains 'index.faiss' and 'index.pkl' directly at its root.
+            # So, extracting to temp_extract_path will place them as temp_extract_path/index.faiss
+            zip_ref.extractall(temp_extract_path)
+        # Verify that the files were extracted successfully
+        extracted_faiss_file = temp_extract_path / "index.faiss"
+        extracted_pkl_file = temp_extract_path / "index.pkl"
+        if not extracted_faiss_file.exists() or not extracted_pkl_file.exists():
+            raise FileNotFoundError(
+                f"Required FAISS index files (index.faiss, index.pkl) not found "
+                f"in extracted location: {temp_extract_path}. "
+                f"ZIP content might be structured unexpectedly."
+            )
+        print("> Loading FAISS index from extracted files...")
+        faiss_vstore = FAISS.load_local(
+            folder_path=str(temp_extract_path), # FAISS.load_local expects a string path
+            embeddings=embeddings_model,
+            allow_dangerous_deserialization=True
+        )
+        print("> FAISS index loaded successfully.")
+    # The temporary directory and its contents are automatically removed here
+    # when the `with tempfile.TemporaryDirectory()` block exits.
+    # No need for manual os.remove() calls for index.faiss and index.pkl.
+    return faiss_vstore
 # def sim_search_local_saved_vec(query, k_val, save_to="faiss_lambeth_census_embedding"):

{chatfuncs → tools}/llm_api_call.py RENAMED Viewed

@@ -12,8 +12,9 @@ from gradio import Progress
 from typing import List, Tuple
 from io import StringIO
-from chatfuncs.prompts import prompt1, prompt2, prompt3, system_prompt, summarise_system_prompt, summarise_prompt
-from chatfuncs.helper_functions import output_folder, detect_file_type, get_file_path_end, read_file, get_or_create_env_var
 # ResponseObject class for AWS Bedrock calls
 class ResponseObject:
@@ -171,33 +172,6 @@ def construct_gemini_generative_model(in_api_key: str, temperature: float, model
     #model = ai.GenerativeModel.from_cached_content(cached_content=cache, generation_config=config)
     model = ai.GenerativeModel(model_name='models/' + model_choice, system_instruction=system_prompt, generation_config=config)
-    # Upload CSV file (replace with your actual file path)
-    #file_id = ai.upload_file(upload_file_path)
-    # if file_type == 'xlsx':
-    #     print("Running through all xlsx sheets")
-    #     #anon_xlsx = pd.ExcelFile(upload_file_path)
-    #     if not in_excel_sheets:
-    #         out_message.append("No Excel sheets selected. Please select at least one to anonymise.")
-    #         continue
-    #     anon_xlsx = pd.ExcelFile(upload_file_path)
-    #     # Create xlsx file:
-    #     anon_xlsx_export_file_name = output_folder + out_file_part + "_redacted.xlsx"
-    ### QUERYING LARGE LANGUAGE MODEL ###
-    # Prompt caching the table and system prompt. See here: https://ai.google.dev/gemini-api/docs/caching?lang=python
-    # Create a cache with a 5 minute TTL. ONLY FOR CACHES OF AT LEAST 32k TOKENS!
-    # cache = ai.caching.CachedContent.create(
-    # model='models/' + model_choice,
-    # display_name=out_file_part, # used to identify the cache
-    # system_instruction=system_prompt_with_table,
-    # ttl=datetime.timedelta(minutes=5),
-    # )
     return model, config
 def call_aws_claude(prompt: str, system_prompt: str, temperature: float, max_tokens: int, model_choice: str) -> ResponseObject:
@@ -276,7 +250,7 @@ def send_request(prompt: str, conversation_history: List[dict], model: object, c
     #print("full_prompt:", full_prompt)
     # Generate the model's response
-    if model_choice in ["gemini-1.5-flash-002", "gemini-1.5-pro-002"]:
         try:
             response = model.generate_content(contents=full_prompt, generation_config=config)
         except Exception as e:
@@ -701,7 +675,7 @@ def llm_query(file_data:pd.DataFrame, existing_topics_w_references_table:pd.Data
         #print("normalised_simple_markdown_table:", normalised_simple_markdown_table)
         # Prepare Gemini models before query
-        if model_choice in ["gemini-1.5-flash-002", "gemini-1.5-pro-002"]:
             print("Using Gemini model:", model_choice)
             model, config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=summarise_system_prompt, max_tokens=max_tokens)
         else:
@@ -772,17 +746,12 @@ def llm_query(file_data:pd.DataFrame, existing_topics_w_references_table:pd.Data
         summary_prompt_list = [formatted_summary_prompt]
-        print("master_summary_prompt_list:", summary_prompt_list[0])
         summary_conversation_history = []
         summary_whole_conversation = []
         # Process requests to large language model
         master_summary_response, summary_conversation_history, whole_summary_conversation, whole_conversation_metadata = process_requests(summary_prompt_list, summarise_system_prompt, summary_conversation_history, summary_whole_conversation, whole_conversation_metadata, model, config, model_choice, temperature, reported_batch_no, master = True)
-        print("master_summary_response:", master_summary_response[-1].text)
-        print("Whole conversation metadata:", whole_conversation_metadata)
         new_topic_table_out_path, new_reference_table_out_path, new_unique_topics_df_out_path, new_topic_df, new_markdown_table, new_reference_df, new_unique_topics_df, master_batch_out_file_part, is_error =  write_llm_output_and_logs(master_summary_response, whole_summary_conversation, whole_conversation_metadata, out_file_part, latest_batch_completed, start_row, end_row, model_choice_clean, temperature, log_files_output_paths, existing_reference_df, existing_unique_topics_df, first_run=False)
         # If error in table parsing, leave function
@@ -832,7 +801,7 @@ def llm_query(file_data:pd.DataFrame, existing_topics_w_references_table:pd.Data
         #system_prompt_with_table = system_prompt + normalised_simple_markdown_table
         # Prepare Gemini models before query
-        if model_choice in ["gemini-1.5-flash-002", "gemini-1.5-pro-002"]:
             print("Using Gemini model:", model_choice)
             model, config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=system_prompt, max_tokens=max_tokens)
         else:
@@ -857,9 +826,6 @@ def llm_query(file_data:pd.DataFrame, existing_topics_w_references_table:pd.Data
         #print("Whole conversation metadata before:", whole_conversation_metadata)
-        print("responses:", responses[-1].text)
-        print("Whole conversation metadata:", whole_conversation_metadata)
         topic_table_out_path, reference_table_out_path, unique_topics_df_out_path, topic_table_df, markdown_table, reference_df, new_unique_topics_df, batch_out_file_part, is_error =  write_llm_output_and_logs(responses, whole_conversation, whole_conversation_metadata, out_file_part, latest_batch_completed, start_row, end_row, model_choice_clean, temperature, log_files_output_paths, existing_reference_df, existing_unique_topics_df, first_run=True)
         # If error in table parsing, leave function
@@ -879,8 +845,6 @@ def llm_query(file_data:pd.DataFrame, existing_topics_w_references_table:pd.Data
         new_unique_topics_df = pd.concat([new_unique_topics_df, existing_unique_topics_df]).drop_duplicates('Subtopic')
-        print("new_unique_topics_df:", new_unique_topics_df)
         new_unique_topics_df.to_csv(unique_topics_df_out_path, index=None)
         out_file_paths.append(unique_topics_df_out_path)

 from typing import List, Tuple
 from io import StringIO
+from tools.prompts import prompt1, prompt2, prompt3, system_prompt, summarise_system_prompt, summarise_prompt
+from tools.helper_functions import output_folder, detect_file_type, get_file_path_end, read_file, get_or_create_env_var
+from tools.config import GEMINI_MODELS
 # ResponseObject class for AWS Bedrock calls
 class ResponseObject:
     #model = ai.GenerativeModel.from_cached_content(cached_content=cache, generation_config=config)
     model = ai.GenerativeModel(model_name='models/' + model_choice, system_instruction=system_prompt, generation_config=config)
     return model, config
 def call_aws_claude(prompt: str, system_prompt: str, temperature: float, max_tokens: int, model_choice: str) -> ResponseObject:
     #print("full_prompt:", full_prompt)
     # Generate the model's response
+    if model_choice in GEMINI_MODELS:
         try:
             response = model.generate_content(contents=full_prompt, generation_config=config)
         except Exception as e:
         #print("normalised_simple_markdown_table:", normalised_simple_markdown_table)
         # Prepare Gemini models before query
+        if model_choice in GEMINI_MODELS:
             print("Using Gemini model:", model_choice)
             model, config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=summarise_system_prompt, max_tokens=max_tokens)
         else:
         summary_prompt_list = [formatted_summary_prompt]
         summary_conversation_history = []
         summary_whole_conversation = []
         # Process requests to large language model
         master_summary_response, summary_conversation_history, whole_summary_conversation, whole_conversation_metadata = process_requests(summary_prompt_list, summarise_system_prompt, summary_conversation_history, summary_whole_conversation, whole_conversation_metadata, model, config, model_choice, temperature, reported_batch_no, master = True)
         new_topic_table_out_path, new_reference_table_out_path, new_unique_topics_df_out_path, new_topic_df, new_markdown_table, new_reference_df, new_unique_topics_df, master_batch_out_file_part, is_error =  write_llm_output_and_logs(master_summary_response, whole_summary_conversation, whole_conversation_metadata, out_file_part, latest_batch_completed, start_row, end_row, model_choice_clean, temperature, log_files_output_paths, existing_reference_df, existing_unique_topics_df, first_run=False)
         # If error in table parsing, leave function
         #system_prompt_with_table = system_prompt + normalised_simple_markdown_table
         # Prepare Gemini models before query
+        if model_choice in GEMINI_MODELS:
             print("Using Gemini model:", model_choice)
             model, config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=system_prompt, max_tokens=max_tokens)
         else:
         #print("Whole conversation metadata before:", whole_conversation_metadata)
         topic_table_out_path, reference_table_out_path, unique_topics_df_out_path, topic_table_df, markdown_table, reference_df, new_unique_topics_df, batch_out_file_part, is_error =  write_llm_output_and_logs(responses, whole_conversation, whole_conversation_metadata, out_file_part, latest_batch_completed, start_row, end_row, model_choice_clean, temperature, log_files_output_paths, existing_reference_df, existing_unique_topics_df, first_run=True)
         # If error in table parsing, leave function
         new_unique_topics_df = pd.concat([new_unique_topics_df, existing_unique_topics_df]).drop_duplicates('Subtopic')
         new_unique_topics_df.to_csv(unique_topics_df_out_path, index=None)
         out_file_paths.append(unique_topics_df_out_path)

{chatfuncs → tools}/model_load.py RENAMED Viewed

File without changes

{chatfuncs → tools}/prompts.py RENAMED Viewed

File without changes