Merge pull request #8 from seanpedrick-case/dev
Browse filesChanged embedding model to mixed bread xsmall, optimised related functions in general. Updated Gemini model references.
- .dockerignore +2 -1
- .gitignore +2 -1
- app.py +12 -43
- app_save_docstore.py +0 -307
- chatfuncs/ingest_borough_plan.py +0 -14
- faiss_embedding/faiss_embedding.zip +2 -2
- ingest_borough_plan.py +14 -0
- requirements.txt +2 -1
- test/test_module.py +2 -2
- {chatfuncs β tools}/__init__.py +0 -0
- {chatfuncs β tools}/auth.py +1 -1
- {chatfuncs β tools}/aws_functions.py +1 -1
- {chatfuncs β tools}/chatfuncs.py +195 -195
- {chatfuncs β tools}/config.py +5 -4
- {chatfuncs β tools}/helper_functions.py +1 -1
- {chatfuncs β tools}/ingest.py +188 -14
- {chatfuncs β tools}/llm_api_call.py +6 -42
- {chatfuncs β tools}/model_load.py +0 -0
- {chatfuncs β tools}/prompts.py +0 -0
.dockerignore
CHANGED
@@ -11,4 +11,5 @@ dist/*
|
|
11 |
test/*
|
12 |
config/*
|
13 |
output/*
|
14 |
-
input/*
|
|
|
|
11 |
test/*
|
12 |
config/*
|
13 |
output/*
|
14 |
+
input/*
|
15 |
+
borough_plan/*
|
.gitignore
CHANGED
@@ -11,4 +11,5 @@ dist/*
|
|
11 |
test/*
|
12 |
config/*
|
13 |
output/*
|
14 |
-
input/*
|
|
|
|
11 |
test/*
|
12 |
config/*
|
13 |
output/*
|
14 |
+
input/*
|
15 |
+
borough_plan/*
|
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
from typing import Type
|
3 |
-
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
4 |
from langchain_community.vectorstores import FAISS
|
5 |
import gradio as gr
|
6 |
import pandas as pd
|
@@ -8,17 +8,15 @@ from torch import float16, float32
|
|
8 |
from llama_cpp import Llama
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
|
11 |
-
import zipfile
|
12 |
|
13 |
-
from
|
14 |
-
|
15 |
-
from
|
16 |
-
from
|
17 |
-
from
|
18 |
-
from
|
19 |
-
|
20 |
-
import
|
21 |
-
import chatfuncs.ingest as ing
|
22 |
|
23 |
PandasDataFrame = Type[pd.DataFrame]
|
24 |
|
@@ -34,41 +32,14 @@ if isinstance(DEFAULT_EXAMPLES, str): default_examples_set = eval(DEFAULT_EXAMPL
|
|
34 |
if isinstance(DEFAULT_MODEL_CHOICES, str): default_model_choices = eval(DEFAULT_MODEL_CHOICES)
|
35 |
|
36 |
# Disable cuda devices if necessary
|
37 |
-
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
|
38 |
-
|
39 |
|
40 |
###
|
41 |
# Load preset embeddings, vectorstore, and model
|
42 |
###
|
43 |
-
|
44 |
-
def load_embeddings_model(embeddings_model = EMBEDDINGS_MODEL_NAME):
|
45 |
-
|
46 |
-
embeddings_func = HuggingFaceEmbeddings(model_name=embeddings_model)
|
47 |
-
|
48 |
-
#global embeddings
|
49 |
-
|
50 |
-
#embeddings = embeddings_func
|
51 |
-
|
52 |
-
return embeddings_func
|
53 |
-
|
54 |
-
def get_faiss_store(faiss_vstore_folder:str, embeddings_model:object):
|
55 |
-
|
56 |
-
with zipfile.ZipFile(faiss_vstore_folder + '/' + faiss_vstore_folder + '.zip', 'r') as zip_ref:
|
57 |
-
zip_ref.extractall(faiss_vstore_folder)
|
58 |
-
|
59 |
-
faiss_vstore = FAISS.load_local(folder_path=faiss_vstore_folder, embeddings=embeddings_model, allow_dangerous_deserialization=True)
|
60 |
-
os.remove(faiss_vstore_folder + "/index.faiss")
|
61 |
-
os.remove(faiss_vstore_folder + "/index.pkl")
|
62 |
-
|
63 |
-
#global vectorstore
|
64 |
-
|
65 |
-
#vectorstore = faiss_vstore
|
66 |
-
|
67 |
-
return faiss_vstore #vectorstore
|
68 |
-
|
69 |
# Load in default embeddings and embeddings model name
|
70 |
embeddings_model = load_embeddings_model(EMBEDDINGS_MODEL_NAME)
|
71 |
-
vectorstore = get_faiss_store(
|
72 |
|
73 |
chatf.embeddings = embeddings_model
|
74 |
chatf.vectorstore = vectorstore
|
@@ -87,7 +58,6 @@ def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings_model=embeddings_mod
|
|
87 |
|
88 |
return out_message, vectorstore_func
|
89 |
|
90 |
-
|
91 |
def create_hf_model(model_name:str, hf_token=HF_TOKEN):
|
92 |
if torch_device == "cuda":
|
93 |
if "flan" in model_name:
|
@@ -167,12 +137,11 @@ def load_model(model_type:str, gpu_layers:int, gpu_config:dict=gpu_config, cpu_c
|
|
167 |
|
168 |
return model_type, load_confirmation, model_type#model, tokenizer, model_type
|
169 |
|
170 |
-
|
171 |
###
|
172 |
# RUN UI
|
173 |
###
|
174 |
|
175 |
-
app = gr.Blocks(theme = gr.themes.
|
176 |
|
177 |
with app:
|
178 |
model_type = SMALL_MODEL_NAME
|
|
|
1 |
import os
|
2 |
from typing import Type
|
3 |
+
#from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
4 |
from langchain_community.vectorstores import FAISS
|
5 |
import gradio as gr
|
6 |
import pandas as pd
|
|
|
8 |
from llama_cpp import Llama
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
|
|
|
11 |
|
12 |
+
from tools.ingest import embed_faiss_save_to_zip, load_embeddings_model, get_faiss_store
|
13 |
+
from tools.helper_functions import get_connection_params, reveal_feedback_buttons, wipe_logs
|
14 |
+
from tools.aws_functions import upload_file_to_s3
|
15 |
+
from tools.auth import authenticate_user
|
16 |
+
from tools.config import FEEDBACK_LOGS_FOLDER, ACCESS_LOGS_FOLDER, USAGE_LOGS_FOLDER, HOST_NAME, COGNITO_AUTH, INPUT_FOLDER, OUTPUT_FOLDER, MAX_QUEUE_SIZE, DEFAULT_CONCURRENCY_LIMIT, MAX_FILE_SIZE, GRADIO_SERVER_PORT, ROOT_PATH, DEFAULT_EMBEDDINGS_LOCATION, EMBEDDINGS_MODEL_NAME, DEFAULT_DATA_SOURCE, HF_TOKEN, LARGE_MODEL_REPO_ID, LARGE_MODEL_GGUF_FILE, LARGE_MODEL_NAME, SMALL_MODEL_NAME, SMALL_MODEL_REPO_ID, DEFAULT_DATA_SOURCE_NAME, DEFAULT_EXAMPLES, DEFAULT_MODEL_CHOICES, RUN_GEMINI_MODELS, LOAD_LARGE_MODEL
|
17 |
+
from tools.model_load import torch_device, gpu_config, cpu_config, context_length
|
18 |
+
import tools.chatfuncs as chatf
|
19 |
+
import tools.ingest as ing
|
|
|
20 |
|
21 |
PandasDataFrame = Type[pd.DataFrame]
|
22 |
|
|
|
32 |
if isinstance(DEFAULT_MODEL_CHOICES, str): default_model_choices = eval(DEFAULT_MODEL_CHOICES)
|
33 |
|
34 |
# Disable cuda devices if necessary
|
35 |
+
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
|
|
|
36 |
|
37 |
###
|
38 |
# Load preset embeddings, vectorstore, and model
|
39 |
###
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
# Load in default embeddings and embeddings model name
|
41 |
embeddings_model = load_embeddings_model(EMBEDDINGS_MODEL_NAME)
|
42 |
+
vectorstore = get_faiss_store(zip_file_path=DEFAULT_EMBEDDINGS_LOCATION,embeddings_model=embeddings_model)#globals()["embeddings"])
|
43 |
|
44 |
chatf.embeddings = embeddings_model
|
45 |
chatf.vectorstore = vectorstore
|
|
|
58 |
|
59 |
return out_message, vectorstore_func
|
60 |
|
|
|
61 |
def create_hf_model(model_name:str, hf_token=HF_TOKEN):
|
62 |
if torch_device == "cuda":
|
63 |
if "flan" in model_name:
|
|
|
137 |
|
138 |
return model_type, load_confirmation, model_type#model, tokenizer, model_type
|
139 |
|
|
|
140 |
###
|
141 |
# RUN UI
|
142 |
###
|
143 |
|
144 |
+
app = gr.Blocks(theme = gr.themes.Default(primary_hue="blue"), fill_width=True)#css=".gradio-container {background-color: black}")
|
145 |
|
146 |
with app:
|
147 |
model_type = SMALL_MODEL_NAME
|
app_save_docstore.py
DELETED
@@ -1,307 +0,0 @@
|
|
1 |
-
# Load in packages
|
2 |
-
|
3 |
-
import os
|
4 |
-
|
5 |
-
from typing import Type
|
6 |
-
from langchain_community.embeddings import HuggingFaceEmbeddings#, HuggingFaceInstructEmbeddings
|
7 |
-
from langchain_community.vectorstores import FAISS
|
8 |
-
import gradio as gr
|
9 |
-
import pandas as pd
|
10 |
-
|
11 |
-
from transformers import AutoTokenizer
|
12 |
-
import torch
|
13 |
-
|
14 |
-
from llama_cpp import Llama
|
15 |
-
from huggingface_hub import hf_hub_download
|
16 |
-
|
17 |
-
PandasDataFrame = Type[pd.DataFrame]
|
18 |
-
|
19 |
-
# Disable cuda devices if necessary
|
20 |
-
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
|
21 |
-
|
22 |
-
#from chatfuncs.chatfuncs import *
|
23 |
-
import chatfuncs.ingest as ing
|
24 |
-
|
25 |
-
## Load preset embeddings, vectorstore, and model
|
26 |
-
|
27 |
-
embeddings_name = "BAAI/bge-base-en-v1.5"
|
28 |
-
|
29 |
-
def load_embeddings(embeddings_name = embeddings_name):
|
30 |
-
|
31 |
-
embeddings_func = HuggingFaceEmbeddings(model_name=embeddings_name)
|
32 |
-
|
33 |
-
global embeddings
|
34 |
-
|
35 |
-
embeddings = embeddings_func
|
36 |
-
|
37 |
-
return embeddings
|
38 |
-
|
39 |
-
def get_faiss_store(faiss_vstore_folder,embeddings):
|
40 |
-
import zipfile
|
41 |
-
with zipfile.ZipFile(faiss_vstore_folder + '/' + faiss_vstore_folder + '.zip', 'r') as zip_ref:
|
42 |
-
zip_ref.extractall(faiss_vstore_folder)
|
43 |
-
|
44 |
-
faiss_vstore = FAISS.load_local(folder_path=faiss_vstore_folder, embeddings=embeddings, allow_dangerous_deserialization=True)
|
45 |
-
os.remove(faiss_vstore_folder + "/index.faiss")
|
46 |
-
os.remove(faiss_vstore_folder + "/index.pkl")
|
47 |
-
|
48 |
-
global vectorstore
|
49 |
-
|
50 |
-
vectorstore = faiss_vstore
|
51 |
-
|
52 |
-
return vectorstore
|
53 |
-
|
54 |
-
import chatfuncs.chatfuncs as chatf
|
55 |
-
|
56 |
-
chatf.embeddings = load_embeddings(embeddings_name)
|
57 |
-
chatf.vectorstore = get_faiss_store(faiss_vstore_folder="faiss_embedding",embeddings=globals()["embeddings"])
|
58 |
-
|
59 |
-
|
60 |
-
def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_device=None):
|
61 |
-
print("Loading model")
|
62 |
-
|
63 |
-
# Default values inside the function
|
64 |
-
if gpu_config is None:
|
65 |
-
gpu_config = chatf.gpu_config
|
66 |
-
if cpu_config is None:
|
67 |
-
cpu_config = chatf.cpu_config
|
68 |
-
if torch_device is None:
|
69 |
-
torch_device = chatf.torch_device
|
70 |
-
|
71 |
-
if model_type == "Phi 3 Mini (larger, slow)":
|
72 |
-
if torch_device == "cuda":
|
73 |
-
gpu_config.update_gpu(gpu_layers)
|
74 |
-
print("Loading with", gpu_config.n_gpu_layers, "model layers sent to GPU.")
|
75 |
-
else:
|
76 |
-
gpu_config.update_gpu(gpu_layers)
|
77 |
-
cpu_config.update_gpu(gpu_layers)
|
78 |
-
|
79 |
-
print("Loading with", cpu_config.n_gpu_layers, "model layers sent to GPU.")
|
80 |
-
|
81 |
-
print(vars(gpu_config))
|
82 |
-
print(vars(cpu_config))
|
83 |
-
|
84 |
-
try:
|
85 |
-
model = Llama(
|
86 |
-
model_path=hf_hub_download(
|
87 |
-
repo_id=os.environ.get("REPO_ID", "QuantFactory/Phi-3-mini-128k-instruct-GGUF"),# "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), # "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
|
88 |
-
filename=os.environ.get("MODEL_FILE", "Phi-3-mini-128k-instruct.Q4_K_M.gguf") #"Phi-3-mini-128k-instruct.Q4_K_M.gguf") #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf")#"mistral-7b-openorca.Q4_K_M.gguf"),
|
89 |
-
),
|
90 |
-
**vars(gpu_config) # change n_gpu_layers if you have more or less VRAM
|
91 |
-
)
|
92 |
-
|
93 |
-
except Exception as e:
|
94 |
-
print("GPU load failed")
|
95 |
-
print(e)
|
96 |
-
model = Llama(
|
97 |
-
model_path=hf_hub_download(
|
98 |
-
repo_id=os.environ.get("REPO_ID", "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), #"QuantFactory/Phi-3-mini-128k-instruct-GGUF"), #, "microsoft/Phi-3-mini-4k-instruct-gguf"),#"QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
|
99 |
-
filename=os.environ.get("MODEL_FILE", "Phi-3-mini-128k-instruct.Q4_K_M.gguf"), # "Phi-3-mini-128k-instruct.Q4_K_M.gguf") # , #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf"),#"mistral-7b-openorca.Q4_K_M.gguf"),
|
100 |
-
),
|
101 |
-
**vars(cpu_config)
|
102 |
-
)
|
103 |
-
|
104 |
-
tokenizer = []
|
105 |
-
|
106 |
-
if model_type == "Flan Alpaca (small, fast)":
|
107 |
-
# Huggingface chat model
|
108 |
-
hf_checkpoint = 'declare-lab/flan-alpaca-large'#'declare-lab/flan-alpaca-base' # # #
|
109 |
-
|
110 |
-
def create_hf_model(model_name):
|
111 |
-
|
112 |
-
from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM
|
113 |
-
|
114 |
-
if torch_device == "cuda":
|
115 |
-
if "flan" in model_name:
|
116 |
-
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
|
117 |
-
else:
|
118 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
|
119 |
-
else:
|
120 |
-
if "flan" in model_name:
|
121 |
-
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.float16)
|
122 |
-
else:
|
123 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16)
|
124 |
-
|
125 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = chatf.context_length)
|
126 |
-
|
127 |
-
return model, tokenizer, model_type
|
128 |
-
|
129 |
-
model, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint)
|
130 |
-
|
131 |
-
chatf.model = model
|
132 |
-
chatf.tokenizer = tokenizer
|
133 |
-
chatf.model_type = model_type
|
134 |
-
|
135 |
-
load_confirmation = "Finished loading model: " + model_type
|
136 |
-
|
137 |
-
print(load_confirmation)
|
138 |
-
return model_type, load_confirmation, model_type
|
139 |
-
|
140 |
-
# Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
|
141 |
-
model_type = "Phi 3 Mini (larger, slow)"
|
142 |
-
load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
|
143 |
-
|
144 |
-
model_type = "Flan Alpaca (small, fast)"
|
145 |
-
load_model(model_type, 0, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
|
146 |
-
|
147 |
-
def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
|
148 |
-
|
149 |
-
print(f"> Total split documents: {len(docs_out)}")
|
150 |
-
|
151 |
-
print(docs_out)
|
152 |
-
|
153 |
-
vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
|
154 |
-
|
155 |
-
|
156 |
-
chatf.vectorstore = vectorstore_func
|
157 |
-
|
158 |
-
out_message = "Document processing complete"
|
159 |
-
|
160 |
-
return out_message, vectorstore_func, out_file
|
161 |
-
|
162 |
-
# Gradio chat
|
163 |
-
|
164 |
-
block = gr.Blocks(theme = gr.themes.Base())#css=".gradio-container {background-color: black}")
|
165 |
-
|
166 |
-
with block:
|
167 |
-
ingest_text = gr.State()
|
168 |
-
ingest_metadata = gr.State()
|
169 |
-
ingest_docs = gr.State()
|
170 |
-
|
171 |
-
model_type_state = gr.State(model_type)
|
172 |
-
embeddings_state = gr.State(chatf.embeddings)#globals()["embeddings"])
|
173 |
-
vectorstore_state = gr.State(chatf.vectorstore)#globals()["vectorstore"])
|
174 |
-
|
175 |
-
model_state = gr.State() # chatf.model (gives error)
|
176 |
-
tokenizer_state = gr.State() # chatf.tokenizer (gives error)
|
177 |
-
|
178 |
-
chat_history_state = gr.State()
|
179 |
-
instruction_prompt_out = gr.State()
|
180 |
-
|
181 |
-
gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
|
182 |
-
|
183 |
-
gr.Markdown("Chat with PDF, web page or (new) csv/Excel documents. The default is a small model (Flan Alpaca), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. The alternative (Phi 3 Mini (larger, slow)), can reason a little better, but is much slower (See Advanced tab).\n\nBy default the Lambeth Borough Plan '[Lambeth 2030 : Our Future, Our Lambeth](https://www.lambeth.gov.uk/better-fairer-lambeth/projects/lambeth-2030-our-future-our-lambeth)' is loaded. If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nCaution: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.")
|
184 |
-
|
185 |
-
with gr.Row():
|
186 |
-
current_source = gr.Textbox(label="Current data source(s)", value="Lambeth_2030-Our_Future_Our_Lambeth.pdf", scale = 10)
|
187 |
-
current_model = gr.Textbox(label="Current model", value=model_type, scale = 3)
|
188 |
-
|
189 |
-
with gr.Tab("Chatbot"):
|
190 |
-
|
191 |
-
with gr.Row():
|
192 |
-
#chat_height = 500
|
193 |
-
chatbot = gr.Chatbot(avatar_images=('user.jfif', 'bot.jpg'),bubble_full_width = False, scale = 1) # , height=chat_height
|
194 |
-
with gr.Accordion("Open this tab to see the source paragraphs used to generate the answer", open = False):
|
195 |
-
sources = gr.HTML(value = "Source paragraphs with the most relevant text will appear here") # , height=chat_height
|
196 |
-
|
197 |
-
with gr.Row():
|
198 |
-
message = gr.Textbox(
|
199 |
-
label="Enter your question here",
|
200 |
-
lines=1,
|
201 |
-
)
|
202 |
-
with gr.Row():
|
203 |
-
submit = gr.Button(value="Send message", variant="secondary", scale = 1)
|
204 |
-
clear = gr.Button(value="Clear chat", variant="secondary", scale=0)
|
205 |
-
stop = gr.Button(value="Stop generating", variant="secondary", scale=0)
|
206 |
-
|
207 |
-
examples_set = gr.Radio(label="Examples for the Lambeth Borough Plan",
|
208 |
-
#value = "What were the five pillars of the previous borough plan?",
|
209 |
-
choices=["What were the five pillars of the previous borough plan?",
|
210 |
-
"What is the vision statement for Lambeth?",
|
211 |
-
"What are the commitments for Lambeth?",
|
212 |
-
"What are the 2030 outcomes for Lambeth?"])
|
213 |
-
|
214 |
-
|
215 |
-
current_topic = gr.Textbox(label="Feature currently disabled - Keywords related to current conversation topic.", placeholder="Keywords related to the conversation topic will appear here")
|
216 |
-
|
217 |
-
|
218 |
-
with gr.Tab("Load in a different file to chat with"):
|
219 |
-
with gr.Accordion("PDF file", open = False):
|
220 |
-
in_pdf = gr.File(label="Upload pdf", file_count="multiple", file_types=['.pdf'])
|
221 |
-
load_pdf = gr.Button(value="Load in file", variant="secondary", scale=0)
|
222 |
-
|
223 |
-
with gr.Accordion("Web page", open = False):
|
224 |
-
with gr.Row():
|
225 |
-
in_web = gr.Textbox(label="Enter web page url")
|
226 |
-
in_div = gr.Textbox(label="(Advanced) Web page div for text extraction", value="p", placeholder="p")
|
227 |
-
load_web = gr.Button(value="Load in webpage", variant="secondary", scale=0)
|
228 |
-
|
229 |
-
with gr.Accordion("CSV/Excel file", open = False):
|
230 |
-
in_csv = gr.File(label="Upload CSV/Excel file", file_count="multiple", file_types=['.csv', '.xlsx'])
|
231 |
-
in_text_column = gr.Textbox(label="Enter column name where text is stored")
|
232 |
-
load_csv = gr.Button(value="Load in CSV/Excel file", variant="secondary", scale=0)
|
233 |
-
|
234 |
-
with gr.Row():
|
235 |
-
ingest_embed_out = gr.Textbox(label="File/web page preparation progress")
|
236 |
-
out_file_box = gr.File(count='single', filetype=['.zip'])
|
237 |
-
|
238 |
-
with gr.Tab("Advanced features"):
|
239 |
-
out_passages = gr.Slider(minimum=1, value = 2, maximum=10, step=1, label="Choose number of passages to retrieve from the document. Numbers greater than 2 may lead to increased hallucinations or input text being truncated.")
|
240 |
-
temp_slide = gr.Slider(minimum=0.1, value = 0.5, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
|
241 |
-
with gr.Row():
|
242 |
-
model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca (small, fast)", choices = ["Flan Alpaca (small, fast)", "Phi 3 Mini (larger, slow)"])
|
243 |
-
change_model_button = gr.Button(value="Load model", scale=0)
|
244 |
-
with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False):
|
245 |
-
gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=True)
|
246 |
-
|
247 |
-
load_text = gr.Text(label="Load status")
|
248 |
-
|
249 |
-
|
250 |
-
gr.HTML(
|
251 |
-
"<center>This app is based on the models Flan Alpaca and Phi 3 Mini. It powered by Gradio, Transformers, and Llama.cpp.</a></center>"
|
252 |
-
)
|
253 |
-
|
254 |
-
examples_set.change(fn=chatf.update_message, inputs=[examples_set], outputs=[message])
|
255 |
-
|
256 |
-
change_model_button.click(fn=chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
|
257 |
-
then(fn=load_model, inputs=[model_choice, gpu_layer_choice], outputs = [model_type_state, load_text, current_model]).\
|
258 |
-
then(lambda: chatf.restore_interactivity(), None, [message], queue=False).\
|
259 |
-
then(chatf.clear_chat, inputs=[chat_history_state, sources, message, current_topic], outputs=[chat_history_state, sources, message, current_topic]).\
|
260 |
-
then(lambda: None, None, chatbot, queue=False)
|
261 |
-
|
262 |
-
# Load in a pdf
|
263 |
-
load_pdf_click = load_pdf.click(ing.parse_file, inputs=[in_pdf], outputs=[ingest_text, current_source]).\
|
264 |
-
then(ing.text_to_docs, inputs=[ingest_text], outputs=[ingest_docs]).\
|
265 |
-
then(docs_to_faiss_save, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
|
266 |
-
then(chatf.hide_block, outputs = [examples_set])
|
267 |
-
|
268 |
-
# Load in a webpage
|
269 |
-
load_web_click = load_web.click(ing.parse_html, inputs=[in_web, in_div], outputs=[ingest_text, ingest_metadata, current_source]).\
|
270 |
-
then(ing.html_text_to_docs, inputs=[ingest_text, ingest_metadata], outputs=[ingest_docs]).\
|
271 |
-
then(docs_to_faiss_save, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
|
272 |
-
then(chatf.hide_block, outputs = [examples_set])
|
273 |
-
|
274 |
-
# Load in a csv/excel file
|
275 |
-
load_csv_click = load_csv.click(ing.parse_csv_or_excel, inputs=[in_csv, in_text_column], outputs=[ingest_text, current_source]).\
|
276 |
-
then(ing.csv_excel_text_to_docs, inputs=[ingest_text, in_text_column], outputs=[ingest_docs]).\
|
277 |
-
then(docs_to_faiss_save, inputs=[ingest_docs], outputs=[ingest_embed_out, vectorstore_state, file_out_box]).\
|
278 |
-
then(chatf.hide_block, outputs = [examples_set])
|
279 |
-
|
280 |
-
# Load in a webpage
|
281 |
-
|
282 |
-
# Click/enter to send message action
|
283 |
-
response_click = submit.click(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state, model_type_state, out_passages], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False, api_name="retrieval").\
|
284 |
-
then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
|
285 |
-
then(chatf.produce_streaming_answer_chatbot, inputs=[chatbot, instruction_prompt_out, model_type_state, temp_slide], outputs=chatbot)
|
286 |
-
response_click.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
|
287 |
-
then(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
|
288 |
-
then(lambda: chatf.restore_interactivity(), None, [message], queue=False)
|
289 |
-
|
290 |
-
response_enter = message.submit(chatf.create_full_prompt, inputs=[message, chat_history_state, current_topic, vectorstore_state, embeddings_state, model_type_state, out_passages], outputs=[chat_history_state, sources, instruction_prompt_out], queue=False).\
|
291 |
-
then(chatf.turn_off_interactivity, inputs=[message, chatbot], outputs=[message, chatbot], queue=False).\
|
292 |
-
then(chatf.produce_streaming_answer_chatbot, [chatbot, instruction_prompt_out, model_type_state, temp_slide], chatbot)
|
293 |
-
response_enter.then(chatf.highlight_found_text, [chatbot, sources], [sources]).\
|
294 |
-
then(chatf.add_inputs_answer_to_history,[message, chatbot, current_topic], [chat_history_state, current_topic]).\
|
295 |
-
then(lambda: chatf.restore_interactivity(), None, [message], queue=False)
|
296 |
-
|
297 |
-
# Stop box
|
298 |
-
stop.click(fn=None, inputs=None, outputs=None, cancels=[response_click, response_enter])
|
299 |
-
|
300 |
-
# Clear box
|
301 |
-
clear.click(chatf.clear_chat, inputs=[chat_history_state, sources, message, current_topic], outputs=[chat_history_state, sources, message, current_topic])
|
302 |
-
clear.click(lambda: None, None, chatbot, queue=False)
|
303 |
-
|
304 |
-
# Thumbs up or thumbs down voting function
|
305 |
-
chatbot.like(chatf.vote, [chat_history_state, instruction_prompt_out, model_type_state], None)
|
306 |
-
|
307 |
-
block.queue().launch(debug=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
chatfuncs/ingest_borough_plan.py
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
import ingest as ing
|
2 |
-
|
3 |
-
borough_plan_text, file_names = ing.parse_file([open("Lambeth_2030-Our_Future_Our_Lambeth.pdf")])
|
4 |
-
print("Borough plan text created")
|
5 |
-
|
6 |
-
print(borough_plan_text)
|
7 |
-
|
8 |
-
borough_plan_docs = ing.text_to_docs(borough_plan_text)
|
9 |
-
print("Borough plan docs created")
|
10 |
-
|
11 |
-
embedding_model = "BAAI/bge-base-en-v1.5" # "mixedbread-ai/mxbai-embed-xsmall-v1" #
|
12 |
-
|
13 |
-
embeddings = ing.load_embeddings(model_name = embedding_model)
|
14 |
-
ing.embed_faiss_save_to_zip(borough_plan_docs, save_to="faiss_embedding", model_name = embedding_model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
faiss_embedding/faiss_embedding.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca011e3baf4b92201d378f35ab5f2fe0b6d16ac2eaac4f0705b8c4e84e24a6ae
|
3 |
+
size 243109
|
ingest_borough_plan.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from tools.ingest import parse_file, text_to_docs, load_embeddings_model, embed_faiss_save_to_zip
|
2 |
+
|
3 |
+
borough_plan_text, file_names = parse_file([open("Lambeth_2030-Our_Future_Our_Lambeth.pdf")])
|
4 |
+
print("Borough plan text created")
|
5 |
+
|
6 |
+
#print(borough_plan_text)
|
7 |
+
|
8 |
+
borough_plan_docs = text_to_docs(borough_plan_text)
|
9 |
+
print("Borough plan docs created")
|
10 |
+
|
11 |
+
embedding_model = "mixedbread-ai/mxbai-embed-xsmall-v1" # "mixedbread-ai/mxbai-embed-xsmall-v1" #
|
12 |
+
|
13 |
+
embeddings = load_embeddings_model(embeddings_model = embedding_model)
|
14 |
+
embed_faiss_save_to_zip(borough_plan_docs, save_folder="borough_plan", embeddings_model_object= embeddings, save_to="faiss_embedding", model_name = embedding_model)
|
requirements.txt
CHANGED
@@ -7,7 +7,8 @@ pandas==2.2.3
|
|
7 |
transformers==4.51.3
|
8 |
# For Windows https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-#cp311-win_amd64.whl -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
|
9 |
llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu # For linux if dependencies for below build command are not available in the environment
|
10 |
-
#llama-cpp-python==0.3.
|
|
|
11 |
torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu
|
12 |
sentence_transformers==4.1.0
|
13 |
faiss-cpu==1.10.0
|
|
|
7 |
transformers==4.51.3
|
8 |
# For Windows https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-#cp311-win_amd64.whl -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
|
9 |
llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu # For linux if dependencies for below build command are not available in the environment
|
10 |
+
#llama-cpp-python==0.3.9 -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS" # CPU
|
11 |
+
#llama-cpp-python==0.3.9 -C cmake.args="-DGGML_CUDA=on" # With CUDA
|
12 |
torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu
|
13 |
sentence_transformers==4.1.0
|
14 |
faiss-cpu==1.10.0
|
test/test_module.py
CHANGED
@@ -16,8 +16,8 @@
|
|
16 |
# +
|
17 |
import pytest
|
18 |
import gradio as gr
|
19 |
-
from ..
|
20 |
-
from ..
|
21 |
|
22 |
def test_read_docx():
|
23 |
content = read_docx('sample.docx')
|
|
|
16 |
# +
|
17 |
import pytest
|
18 |
import gradio as gr
|
19 |
+
from ..tools.ingest import *
|
20 |
+
from ..tools.chatfuncs import *
|
21 |
|
22 |
def test_read_docx():
|
23 |
content = read_docx('sample.docx')
|
{chatfuncs β tools}/__init__.py
RENAMED
File without changes
|
{chatfuncs β tools}/auth.py
RENAMED
@@ -4,7 +4,7 @@ import boto3
|
|
4 |
import hmac
|
5 |
import hashlib
|
6 |
import base64
|
7 |
-
from
|
8 |
|
9 |
def calculate_secret_hash(client_id:str, client_secret:str, username:str):
|
10 |
message = username + client_id
|
|
|
4 |
import hmac
|
5 |
import hashlib
|
6 |
import base64
|
7 |
+
from tools.config import AWS_CLIENT_ID, AWS_CLIENT_SECRET, AWS_USER_POOL_ID, AWS_REGION
|
8 |
|
9 |
def calculate_secret_hash(client_id:str, client_secret:str, username:str):
|
10 |
message = username + client_id
|
{chatfuncs β tools}/aws_functions.py
RENAMED
@@ -2,7 +2,7 @@ from typing import Type, List
|
|
2 |
import pandas as pd
|
3 |
import boto3
|
4 |
import os
|
5 |
-
from
|
6 |
|
7 |
PandasDataFrame = Type[pd.DataFrame]
|
8 |
|
|
|
2 |
import pandas as pd
|
3 |
import boto3
|
4 |
import os
|
5 |
+
from tools.config import AWS_REGION, RUN_AWS_FUNCTIONS, QA_CHATBOT_BUCKET
|
6 |
|
7 |
PandasDataFrame = Type[pd.DataFrame]
|
8 |
|
{chatfuncs β tools}/chatfuncs.py
RENAMED
@@ -14,6 +14,7 @@ from nltk.corpus import stopwords
|
|
14 |
from nltk.tokenize import RegexpTokenizer
|
15 |
from nltk.stem import WordNetLemmatizer
|
16 |
from keybert import KeyBERT
|
|
|
17 |
|
18 |
# For Name Entity Recognition model
|
19 |
#from span_marker import SpanMarkerModel # Not currently used
|
@@ -32,9 +33,9 @@ from langchain_community.retrievers import SVMRetriever
|
|
32 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
33 |
from langchain.docstore.document import Document
|
34 |
|
35 |
-
from
|
36 |
-
from
|
37 |
-
from
|
38 |
|
39 |
model_object = [] # Define empty list for model functions to run
|
40 |
tokenizer = [] # Define empty list for model functions to run
|
@@ -75,51 +76,6 @@ ner_model = []#SpanMarkerModel.from_pretrained("tomaarsen/span-marker-mbert-base
|
|
75 |
# Used to pull out keywords from chat history to add to user queries behind the scenes
|
76 |
kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
|
77 |
|
78 |
-
# Vectorstore funcs
|
79 |
-
|
80 |
-
# def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
|
81 |
-
|
82 |
-
# print(f"> Total split documents: {len(docs_out)}")
|
83 |
-
|
84 |
-
# vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
|
85 |
-
|
86 |
-
# '''
|
87 |
-
# #with open("vectorstore.pkl", "wb") as f:
|
88 |
-
# #pickle.dump(vectorstore, f)
|
89 |
-
# '''
|
90 |
-
|
91 |
-
# #if Path(save_to).exists():
|
92 |
-
# # vectorstore_func.save_local(folder_path=save_to)
|
93 |
-
# #else:
|
94 |
-
# # os.mkdir(save_to)
|
95 |
-
# # vectorstore_func.save_local(folder_path=save_to)
|
96 |
-
|
97 |
-
# global vectorstore
|
98 |
-
|
99 |
-
# vectorstore = vectorstore_func
|
100 |
-
|
101 |
-
# out_message = "Document processing complete"
|
102 |
-
|
103 |
-
# #print(out_message)
|
104 |
-
# #print(f"> Saved to: {save_to}")
|
105 |
-
|
106 |
-
# return out_message
|
107 |
-
|
108 |
-
# def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings_model=embeddings_model):
|
109 |
-
|
110 |
-
# print(f"> Total split documents: {len(docs_out)}")
|
111 |
-
|
112 |
-
# print(docs_out)
|
113 |
-
|
114 |
-
# vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings_model)
|
115 |
-
|
116 |
-
# vectorstore = vectorstore_func
|
117 |
-
|
118 |
-
# out_message = "Document processing complete"
|
119 |
-
|
120 |
-
# return out_message, vectorstore_func
|
121 |
-
|
122 |
-
# Prompt functions
|
123 |
|
124 |
def base_prompt_templates(model_type:str = SMALL_MODEL_NAME):
|
125 |
|
@@ -141,7 +97,6 @@ def base_prompt_templates(model_type:str = SMALL_MODEL_NAME):
|
|
141 |
INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_phi3, input_variables=['question', 'summaries'])
|
142 |
else:
|
143 |
INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_gemini_aws, input_variables=['question', 'summaries'])
|
144 |
-
|
145 |
|
146 |
return INSTRUCTION_PROMPT, CONTENT_PROMPT
|
147 |
|
@@ -149,14 +104,44 @@ def write_out_metadata_as_string(metadata_in:str):
|
|
149 |
metadata_string = [f"{' '.join(f'{k}: {v}' for k, v in d.items() if k != 'page_section')}" for d in metadata_in] # ['metadata']
|
150 |
return metadata_string
|
151 |
|
152 |
-
def generate_expanded_prompt(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
question = inputs["question"]
|
155 |
chat_history = inputs["chat_history"]
|
156 |
|
157 |
if relevant_flag == True:
|
158 |
new_question_kworded = adapt_q_from_chat_history(question, chat_history, extracted_memory) # new_question_keywords,
|
159 |
-
docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, vectorstore,
|
160 |
else:
|
161 |
new_question_kworded = question
|
162 |
doc_df = pd.DataFrame()
|
@@ -164,7 +149,7 @@ def generate_expanded_prompt(inputs: Dict[str, str], instruction_prompt:str, con
|
|
164 |
docs_keep_out = []
|
165 |
|
166 |
if (not docs_keep_as_doc) | (doc_df.empty):
|
167 |
-
sorry_prompt = """
|
168 |
return sorry_prompt, "No relevant sources found.", new_question_kworded
|
169 |
|
170 |
# Expand the found passages to the neighbouring context
|
@@ -198,7 +183,7 @@ def create_full_prompt(user_input:str,
|
|
198 |
history:list[dict],
|
199 |
extracted_memory:str,
|
200 |
vectorstore:object,
|
201 |
-
|
202 |
model_type:str,
|
203 |
out_passages:list[str],
|
204 |
api_key:str="",
|
@@ -213,7 +198,7 @@ def create_full_prompt(user_input:str,
|
|
213 |
print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
|
214 |
|
215 |
history = history or []
|
216 |
-
|
217 |
# Create instruction prompt
|
218 |
instruction_prompt, content_prompt = base_prompt_templates(model_type=model_type)
|
219 |
|
@@ -225,7 +210,7 @@ def create_full_prompt(user_input:str,
|
|
225 |
|
226 |
instruction_prompt_out, docs_content_string, new_question_kworded =\
|
227 |
generate_expanded_prompt({"question": user_input, "chat_history": history}, #vectorstore,
|
228 |
-
instruction_prompt, content_prompt, extracted_memory, vectorstore,
|
229 |
|
230 |
history.append({"metadata":None, "options":None, "role": 'user', "content": user_input})
|
231 |
|
@@ -259,8 +244,6 @@ def call_aws_claude(prompt: str, system_prompt: str, temperature: float, max_tok
|
|
259 |
],
|
260 |
}
|
261 |
|
262 |
-
print("prompt_config:", prompt_config)
|
263 |
-
|
264 |
body = json.dumps(prompt_config)
|
265 |
|
266 |
modelId = model_choice
|
@@ -367,8 +350,6 @@ def send_request(prompt: str, conversation_history: List[dict], model: object, c
|
|
367 |
elif "claude" in model_choice:
|
368 |
try:
|
369 |
print("Calling AWS Claude model")
|
370 |
-
print("prompt:", prompt)
|
371 |
-
print("system_prompt:", system_prompt)
|
372 |
response = call_aws_claude(prompt, system_prompt, temperature, max_tokens, model_choice)
|
373 |
except Exception as e:
|
374 |
# If fails, try again after x seconds in case there is a throttle limit
|
@@ -420,9 +401,8 @@ def process_requests(prompts: List[str], system_prompt_with_table: str, conversa
|
|
420 |
|
421 |
response, conversation_history = send_request(prompts[0], conversation_history, model=model, config=config, model_choice=model_choice, system_prompt=system_prompt_with_table, temperature=temperature)
|
422 |
|
423 |
-
print(response.text)
|
424 |
-
#
|
425 |
-
print(response.usage_metadata)
|
426 |
responses.append(response)
|
427 |
|
428 |
# Create conversation txt object
|
@@ -464,8 +444,6 @@ def produce_streaming_answer_chatbot(
|
|
464 |
|
465 |
history = chat_history
|
466 |
|
467 |
-
print("history at start of streaming function:", history)
|
468 |
-
|
469 |
if relevant_query_bool == False:
|
470 |
history.append({"metadata":None, "options":None, "role": "assistant", "content": 'No relevant query found. Please retry your question'})
|
471 |
|
@@ -557,8 +535,6 @@ def produce_streaming_answer_chatbot(
|
|
557 |
elif "claude" in model_type:
|
558 |
system_prompt = "You are answering questions from the user based on source material. Make sure to fully answer the questions with all required detail."
|
559 |
|
560 |
-
print("full_prompt:", full_prompt)
|
561 |
-
|
562 |
if isinstance(full_prompt, str):
|
563 |
full_prompt = [full_prompt]
|
564 |
|
@@ -622,7 +598,7 @@ def produce_streaming_answer_chatbot(
|
|
622 |
history[-1]['content'] += char
|
623 |
yield history
|
624 |
|
625 |
-
print("history at end of function:", history)
|
626 |
|
627 |
# Chat helper functions
|
628 |
|
@@ -691,164 +667,188 @@ def create_doc_df(docs_keep_out):
|
|
691 |
|
692 |
return doc_df
|
693 |
|
694 |
-
def hybrid_retrieval(
|
695 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
696 |
|
697 |
-
|
698 |
-
|
699 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
700 |
|
|
|
|
|
|
|
|
|
|
|
|
|
701 |
|
702 |
-
|
703 |
|
704 |
-
|
705 |
-
docs_len = [len(x[0].page_content) for x in docs]
|
706 |
-
docs_scores = [x[1] for x in docs]
|
707 |
|
708 |
-
|
709 |
-
|
710 |
-
|
711 |
|
712 |
-
|
713 |
-
|
|
|
714 |
|
715 |
-
|
716 |
-
|
717 |
-
docs_keep = list(compress(docs_keep, length_more_limit))
|
718 |
|
719 |
-
|
720 |
-
|
|
|
721 |
|
722 |
-
|
723 |
-
|
724 |
|
|
|
|
|
|
|
|
|
725 |
|
726 |
-
|
727 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
728 |
|
729 |
-
|
730 |
-
meta_url=[]
|
731 |
-
score=[]
|
732 |
-
|
733 |
-
for item in docs_keep:
|
734 |
-
content.append(item[0].page_content)
|
735 |
-
meta_url.append(item[0].metadata['source'])
|
736 |
-
score.append(item[1])
|
737 |
|
738 |
-
|
|
|
739 |
|
740 |
-
|
741 |
-
|
742 |
|
743 |
-
|
744 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
745 |
|
746 |
-
|
747 |
-
|
748 |
-
|
749 |
-
|
750 |
-
|
751 |
-
|
752 |
-
|
753 |
-
|
754 |
-
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
|
759 |
-
|
760 |
-
|
761 |
-
|
762 |
-
|
763 |
-
|
764 |
-
|
765 |
-
|
766 |
-
|
767 |
-
|
768 |
-
|
769 |
-
|
770 |
-
|
771 |
-
|
772 |
-
|
773 |
-
|
774 |
-
|
775 |
-
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
|
781 |
-
|
782 |
-
|
783 |
-
|
784 |
-
|
785 |
-
|
786 |
-
bm25_rank = list(range(1, len(results[0]) + 1))
|
787 |
-
#bm25_rank = results[0]#.tolist()[0] # Since you have a single query
|
788 |
-
bm25_score = [(docs_keep_length / (rank + 1)) * bm25_weight for rank in bm25_rank]
|
789 |
-
# +1 to avoid division by 0 for rank 0
|
790 |
-
|
791 |
-
# Result Ordering (Using the calculated ranks)
|
792 |
-
pairs = list(zip(bm25_rank, docs_keep_as_doc))
|
793 |
-
pairs.sort()
|
794 |
-
bm25_result = [value for rank, value in pairs]
|
795 |
-
|
796 |
|
797 |
-
|
798 |
-
|
799 |
-
|
800 |
|
801 |
|
802 |
-
|
803 |
-
|
804 |
-
|
805 |
-
|
806 |
-
|
807 |
|
808 |
-
|
809 |
-
|
810 |
-
|
811 |
|
812 |
-
|
813 |
-
|
814 |
-
|
815 |
-
|
816 |
-
|
817 |
-
|
818 |
-
|
819 |
|
820 |
-
|
821 |
-
## Calculate final score based on three ranking methods
|
822 |
-
final_score = [a + b + c for a, b, c in zip(vec_score, bm25_score, svm_score)]
|
823 |
-
final_rank = [sorted(final_score, reverse=True).index(x)+1 for x in final_score]
|
824 |
-
# Force final_rank to increment by 1 each time
|
825 |
-
final_rank = list(pd.Series(final_rank).rank(method='first'))
|
826 |
|
827 |
-
|
828 |
-
|
|
|
|
|
|
|
829 |
|
830 |
-
|
|
|
831 |
|
832 |
-
|
833 |
-
try:
|
834 |
-
best_rank_index_pos.append(final_rank.index(x))
|
835 |
-
except IndexError: # catch the error
|
836 |
-
pass
|
837 |
|
838 |
-
|
|
|
|
|
|
|
|
|
839 |
|
840 |
-
|
841 |
|
|
|
842 |
|
843 |
-
docs_keep_out = [docs_keep[i] for i in best_rank_index_pos]
|
844 |
-
|
845 |
-
# Keep only 'best' options
|
846 |
-
docs_keep_as_doc = [x[0] for x in docs_keep_out]
|
847 |
-
|
848 |
-
# Make df of best options
|
849 |
-
doc_df = create_doc_df(docs_keep_out)
|
850 |
|
851 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
852 |
|
853 |
def get_expanded_passages(vectorstore, docs, width):
|
854 |
|
|
|
14 |
from nltk.tokenize import RegexpTokenizer
|
15 |
from nltk.stem import WordNetLemmatizer
|
16 |
from keybert import KeyBERT
|
17 |
+
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
18 |
|
19 |
# For Name Entity Recognition model
|
20 |
#from span_marker import SpanMarkerModel # Not currently used
|
|
|
33 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
34 |
from langchain.docstore.document import Document
|
35 |
|
36 |
+
from tools.prompts import instruction_prompt_template_alpaca, instruction_prompt_mistral_orca, instruction_prompt_phi3, instruction_prompt_llama3, instruction_prompt_qwen, instruction_prompt_template_orca, instruction_prompt_gemma, instruction_prompt_template_gemini_aws
|
37 |
+
from tools.model_load import temperature, max_new_tokens, sample, repetition_penalty, top_p, top_k, torch_device, CtransGenGenerationConfig, max_tokens
|
38 |
+
from tools.config import GEMINI_API_KEY, AWS_DEFAULT_REGION, LARGE_MODEL_NAME, SMALL_MODEL_NAME, RUN_AWS_FUNCTIONS, FEEDBACK_LOGS_FOLDER
|
39 |
|
40 |
model_object = [] # Define empty list for model functions to run
|
41 |
tokenizer = [] # Define empty list for model functions to run
|
|
|
76 |
# Used to pull out keywords from chat history to add to user queries behind the scenes
|
77 |
kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
def base_prompt_templates(model_type:str = SMALL_MODEL_NAME):
|
81 |
|
|
|
97 |
INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_phi3, input_variables=['question', 'summaries'])
|
98 |
else:
|
99 |
INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_gemini_aws, input_variables=['question', 'summaries'])
|
|
|
100 |
|
101 |
return INSTRUCTION_PROMPT, CONTENT_PROMPT
|
102 |
|
|
|
104 |
metadata_string = [f"{' '.join(f'{k}: {v}' for k, v in d.items() if k != 'page_section')}" for d in metadata_in] # ['metadata']
|
105 |
return metadata_string
|
106 |
|
107 |
+
def generate_expanded_prompt(
|
108 |
+
inputs: Dict[str, str],
|
109 |
+
instruction_prompt: str,
|
110 |
+
content_prompt: str,
|
111 |
+
extracted_memory: list,
|
112 |
+
vectorstore: object,
|
113 |
+
embeddings_model: object,
|
114 |
+
relevant_flag: bool = True,
|
115 |
+
out_passages: int = 2,
|
116 |
+
total_output_passage_chunks_size: int = 5
|
117 |
+
):
|
118 |
+
"""
|
119 |
+
Generate an expanded prompt for a language model by retrieving and formatting relevant document passages.
|
120 |
+
|
121 |
+
Args:
|
122 |
+
inputs (Dict[str, str]): Dictionary containing the user's question and chat history.
|
123 |
+
instruction_prompt (str): The instruction prompt template to use for the model.
|
124 |
+
content_prompt (str): The content prompt template for formatting passages.
|
125 |
+
extracted_memory (list): List of previous conversation memory or context.
|
126 |
+
vectorstore (object): The vector store object used for document retrieval.
|
127 |
+
embeddings_model (object): The embeddings model used for vector search.
|
128 |
+
relevant_flag (bool, optional): Whether to perform relevant document retrieval. Defaults to True.
|
129 |
+
out_passages (int, optional): Number of passages to retrieve. Defaults to 2.
|
130 |
+
total_output_passage_chunks_size (int, optional): Number of neighboring chunks to expand for context. Defaults to 5.
|
131 |
+
|
132 |
+
Returns:
|
133 |
+
tuple: (instruction_prompt_out, sources_docs_content_string, new_question_kworded)
|
134 |
+
instruction_prompt_out (str): The fully formatted instruction prompt for the model.
|
135 |
+
sources_docs_content_string (str): The formatted string of source passages and metadata for user display.
|
136 |
+
new_question_kworded (str): The (possibly keyword-adapted) user question.
|
137 |
+
"""
|
138 |
|
139 |
question = inputs["question"]
|
140 |
chat_history = inputs["chat_history"]
|
141 |
|
142 |
if relevant_flag == True:
|
143 |
new_question_kworded = adapt_q_from_chat_history(question, chat_history, extracted_memory) # new_question_keywords,
|
144 |
+
docs_keep_as_doc, doc_df, docs_keep_out = hybrid_retrieval(new_question_kworded, vectorstore, embeddings_model, k_val = 25, out_passages = out_passages, vec_score_cut_off = 1, vec_weight = 1, bm25_weight = 1, svm_weight = 1)
|
145 |
else:
|
146 |
new_question_kworded = question
|
147 |
doc_df = pd.DataFrame()
|
|
|
149 |
docs_keep_out = []
|
150 |
|
151 |
if (not docs_keep_as_doc) | (doc_df.empty):
|
152 |
+
sorry_prompt = """Respond 'Sorry, there is no relevant information to answer this question.'"""
|
153 |
return sorry_prompt, "No relevant sources found.", new_question_kworded
|
154 |
|
155 |
# Expand the found passages to the neighbouring context
|
|
|
183 |
history:list[dict],
|
184 |
extracted_memory:str,
|
185 |
vectorstore:object,
|
186 |
+
embeddings_model:object,
|
187 |
model_type:str,
|
188 |
out_passages:list[str],
|
189 |
api_key:str="",
|
|
|
198 |
print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
|
199 |
|
200 |
history = history or []
|
201 |
+
|
202 |
# Create instruction prompt
|
203 |
instruction_prompt, content_prompt = base_prompt_templates(model_type=model_type)
|
204 |
|
|
|
210 |
|
211 |
instruction_prompt_out, docs_content_string, new_question_kworded =\
|
212 |
generate_expanded_prompt({"question": user_input, "chat_history": history}, #vectorstore,
|
213 |
+
instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings_model, relevant_flag, out_passages)
|
214 |
|
215 |
history.append({"metadata":None, "options":None, "role": 'user', "content": user_input})
|
216 |
|
|
|
244 |
],
|
245 |
}
|
246 |
|
|
|
|
|
247 |
body = json.dumps(prompt_config)
|
248 |
|
249 |
modelId = model_choice
|
|
|
350 |
elif "claude" in model_choice:
|
351 |
try:
|
352 |
print("Calling AWS Claude model")
|
|
|
|
|
353 |
response = call_aws_claude(prompt, system_prompt, temperature, max_tokens, model_choice)
|
354 |
except Exception as e:
|
355 |
# If fails, try again after x seconds in case there is a throttle limit
|
|
|
401 |
|
402 |
response, conversation_history = send_request(prompts[0], conversation_history, model=model, config=config, model_choice=model_choice, system_prompt=system_prompt_with_table, temperature=temperature)
|
403 |
|
404 |
+
#print(response.text)
|
405 |
+
#print(response.usage_metadata)
|
|
|
406 |
responses.append(response)
|
407 |
|
408 |
# Create conversation txt object
|
|
|
444 |
|
445 |
history = chat_history
|
446 |
|
|
|
|
|
447 |
if relevant_query_bool == False:
|
448 |
history.append({"metadata":None, "options":None, "role": "assistant", "content": 'No relevant query found. Please retry your question'})
|
449 |
|
|
|
535 |
elif "claude" in model_type:
|
536 |
system_prompt = "You are answering questions from the user based on source material. Make sure to fully answer the questions with all required detail."
|
537 |
|
|
|
|
|
538 |
if isinstance(full_prompt, str):
|
539 |
full_prompt = [full_prompt]
|
540 |
|
|
|
598 |
history[-1]['content'] += char
|
599 |
yield history
|
600 |
|
601 |
+
#print("history at end of function:", history)
|
602 |
|
603 |
# Chat helper functions
|
604 |
|
|
|
667 |
|
668 |
return doc_df
|
669 |
|
670 |
+
def hybrid_retrieval(
|
671 |
+
new_question_kworded: str,
|
672 |
+
vectorstore:FAISS,
|
673 |
+
embeddings_model:HuggingFaceEmbeddings,
|
674 |
+
k_val: int,
|
675 |
+
out_passages: int,
|
676 |
+
vec_score_cut_off: float,
|
677 |
+
vec_weight: float,
|
678 |
+
bm25_weight: float,
|
679 |
+
svm_weight: float
|
680 |
+
) -> tuple:
|
681 |
+
"""
|
682 |
+
Perform hybrid retrieval of relevant documents based on a query using vector similarity, BM25, and SVM weights.
|
683 |
|
684 |
+
Args:
|
685 |
+
new_question_kworded (str): The keyword-adapted user query.
|
686 |
+
vectorstore: The vectorstore object for similarity search.
|
687 |
+
embeddings_model: The embeddings model used for vector search.
|
688 |
+
k_val (int): Number of top documents to retrieve.
|
689 |
+
out_passages (int): Number of passages to output.
|
690 |
+
vec_score_cut_off (float): Similarity score threshold for filtering.
|
691 |
+
vec_weight (float): Weight for vector similarity.
|
692 |
+
bm25_weight (float): Weight for BM25 retrieval.
|
693 |
+
svm_weight (float): Weight for SVM retrieval.
|
694 |
|
695 |
+
Returns:
|
696 |
+
tuple: (docs_keep_as_doc, doc_df, docs_keep_out)
|
697 |
+
docs_keep_as_doc: List of kept document objects.
|
698 |
+
doc_df: DataFrame of kept documents and metadata.
|
699 |
+
docs_keep_out: List of kept (document, score) tuples.
|
700 |
+
"""
|
701 |
|
702 |
+
doc_df = pd.DataFrame()
|
703 |
|
704 |
+
docs = vectorstore.similarity_search_with_score(new_question_kworded, k=k_val)
|
|
|
|
|
705 |
|
706 |
+
# Keep only documents with a certain score
|
707 |
+
docs_len = [len(x[0].page_content) for x in docs]
|
708 |
+
docs_scores = [x[1] for x in docs]
|
709 |
|
710 |
+
# Only keep sources that are sufficiently relevant (i.e. similarity search score above threshold below)
|
711 |
+
score_more_limit = pd.Series(docs_scores) > vec_score_cut_off
|
712 |
+
docs_keep = list(compress(docs, score_more_limit))
|
713 |
|
714 |
+
if not docs_keep:
|
715 |
+
return [], pd.DataFrame(), []
|
|
|
716 |
|
717 |
+
# Only keep sources that are at least 100 characters long
|
718 |
+
length_more_limit = pd.Series(docs_len) >= 100
|
719 |
+
docs_keep = list(compress(docs_keep, length_more_limit))
|
720 |
|
721 |
+
if not docs_keep:
|
722 |
+
return [], pd.DataFrame(), []
|
723 |
|
724 |
+
docs_keep_as_doc = [x[0] for x in docs_keep]
|
725 |
+
docs_keep_length = len(docs_keep_as_doc)
|
726 |
+
|
727 |
+
if docs_keep_length == 1:
|
728 |
|
729 |
+
content=[]
|
730 |
+
meta_url=[]
|
731 |
+
score=[]
|
732 |
+
|
733 |
+
for item in docs_keep:
|
734 |
+
content.append(item[0].page_content)
|
735 |
+
meta_url.append(item[0].metadata['source'])
|
736 |
+
score.append(item[1])
|
737 |
|
738 |
+
# Create df from 'winning' passages
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
739 |
|
740 |
+
doc_df = pd.DataFrame(list(zip(content, meta_url, score)),
|
741 |
+
columns =['page_content', 'meta_url', 'score'])
|
742 |
|
743 |
+
docs_content = doc_df['page_content'].astype(str)
|
744 |
+
docs_url = doc_df['meta_url']
|
745 |
|
746 |
+
return docs_keep_as_doc, doc_df, docs_content, docs_url
|
747 |
+
|
748 |
+
# Check for if more docs are removed than the desired output
|
749 |
+
if out_passages > docs_keep_length:
|
750 |
+
out_passages = docs_keep_length
|
751 |
+
k_val = docs_keep_length
|
752 |
+
|
753 |
+
vec_rank = [*range(1, docs_keep_length+1)]
|
754 |
+
vec_score = [(docs_keep_length/x)*vec_weight for x in vec_rank]
|
755 |
|
756 |
+
print("Number of documents remaining: ", docs_keep_length)
|
757 |
+
|
758 |
+
# 2nd level check using BM25s package to do keyword search on retrieved passages.
|
759 |
+
|
760 |
+
content_keep=[]
|
761 |
+
for item in docs_keep:
|
762 |
+
content_keep.append(item[0].page_content)
|
763 |
+
|
764 |
+
# Prepare Corpus (Tokenized & Optional Stemming)
|
765 |
+
corpus = [doc.lower() for doc in content_keep]
|
766 |
+
#stemmer = SnowballStemmer("english", ignore_stopwords=True) # NLTK stemming not compatible
|
767 |
+
stemmer = Stemmer.Stemmer("english")
|
768 |
+
corpus_tokens = bm25s.tokenize(corpus, stopwords="en", stemmer=stemmer)
|
769 |
+
|
770 |
+
# Create and Index with BM25s
|
771 |
+
retriever = bm25s.BM25()
|
772 |
+
retriever.index(corpus_tokens)
|
773 |
+
|
774 |
+
# Query Processing (Stemming applied consistently if used above)
|
775 |
+
query_tokens = bm25s.tokenize(new_question_kworded.lower(), stemmer=stemmer)
|
776 |
+
results, scores = retriever.retrieve(query_tokens, corpus=corpus, k=len(corpus)) # Retrieve all docs
|
777 |
+
|
778 |
+
for i in range(results.shape[1]):
|
779 |
+
doc, score = results[0, i], scores[0, i]
|
780 |
+
print(f"Rank {i+1} (score: {score:.2f}): {doc}")
|
781 |
+
|
782 |
+
#print("BM25 results:", results)
|
783 |
+
#print("BM25 scores:", scores)
|
784 |
+
|
785 |
+
# Rank Calculation (Custom Logic for Your BM25 Score)
|
786 |
+
bm25_rank = list(range(1, len(results[0]) + 1))
|
787 |
+
#bm25_rank = results[0]#.tolist()[0] # Since you have a single query
|
788 |
+
bm25_score = [(docs_keep_length / (rank + 1)) * bm25_weight for rank in bm25_rank]
|
789 |
+
# +1 to avoid division by 0 for rank 0
|
790 |
+
|
791 |
+
# Result Ordering (Using the calculated ranks)
|
792 |
+
pairs = list(zip(bm25_rank, docs_keep_as_doc))
|
793 |
+
pairs.sort()
|
794 |
+
bm25_result = [value for rank, value in pairs]
|
795 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
796 |
|
797 |
+
# 3rd level check on retrieved docs with SVM retriever
|
798 |
+
# Check the type of the embeddings_model object
|
799 |
+
embeddings_type = type(embeddings_model)
|
800 |
|
801 |
|
802 |
+
#hf_embeddings = HuggingFaceEmbeddings(**embeddings)
|
803 |
+
hf_embeddings = embeddings_model
|
804 |
+
|
805 |
+
svm_retriever = SVMRetriever.from_texts(content_keep, hf_embeddings, k = k_val)
|
806 |
+
svm_result = svm_retriever.invoke(new_question_kworded)
|
807 |
|
808 |
+
|
809 |
+
svm_rank=[]
|
810 |
+
svm_score = []
|
811 |
|
812 |
+
for vec_item in docs_keep:
|
813 |
+
x = 0
|
814 |
+
for svm_item in svm_result:
|
815 |
+
x = x + 1
|
816 |
+
if svm_item.page_content == vec_item[0].page_content:
|
817 |
+
svm_rank.append(x)
|
818 |
+
svm_score.append((docs_keep_length/x)*svm_weight)
|
819 |
|
|
|
|
|
|
|
|
|
|
|
|
|
820 |
|
821 |
+
## Calculate final score based on three ranking methods
|
822 |
+
final_score = [a + b + c for a, b, c in zip(vec_score, bm25_score, svm_score)]
|
823 |
+
final_rank = [sorted(final_score, reverse=True).index(x)+1 for x in final_score]
|
824 |
+
# Force final_rank to increment by 1 each time
|
825 |
+
final_rank = list(pd.Series(final_rank).rank(method='first'))
|
826 |
|
827 |
+
#print("final rank: " + str(final_rank))
|
828 |
+
#print("out_passages: " + str(out_passages))
|
829 |
|
830 |
+
best_rank_index_pos = []
|
|
|
|
|
|
|
|
|
831 |
|
832 |
+
for x in range(1,out_passages+1):
|
833 |
+
try:
|
834 |
+
best_rank_index_pos.append(final_rank.index(x))
|
835 |
+
except IndexError: # catch the error
|
836 |
+
pass
|
837 |
|
838 |
+
# Adjust best_rank_index_pos to
|
839 |
|
840 |
+
best_rank_pos_series = pd.Series(best_rank_index_pos)
|
841 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
842 |
|
843 |
+
docs_keep_out = [docs_keep[i] for i in best_rank_index_pos]
|
844 |
+
|
845 |
+
# Keep only 'best' options
|
846 |
+
docs_keep_as_doc = [x[0] for x in docs_keep_out]
|
847 |
+
|
848 |
+
# Make df of best options
|
849 |
+
doc_df = create_doc_df(docs_keep_out)
|
850 |
+
|
851 |
+
return docs_keep_as_doc, doc_df, docs_keep_out
|
852 |
|
853 |
def get_expanded_passages(vectorstore, docs, width):
|
854 |
|
{chatfuncs β tools}/config.py
RENAMED
@@ -200,17 +200,18 @@ if LOAD_LARGE_MODEL == "1":
|
|
200 |
default_model_choices.append(LARGE_MODEL_NAME)
|
201 |
|
202 |
if RUN_AWS_FUNCTIONS == "1":
|
203 |
-
default_model_choices.extend(["anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-
|
204 |
|
205 |
if RUN_GEMINI_MODELS == "1":
|
206 |
-
|
|
|
207 |
|
208 |
|
209 |
DEFAULT_MODEL_CHOICES = get_or_create_env_var("DEFAULT_MODEL_CHOICES", str(default_model_choices))
|
210 |
|
211 |
-
EMBEDDINGS_MODEL_NAME = get_or_create_env_var('EMBEDDINGS_MODEL_NAME', "
|
212 |
|
213 |
-
DEFAULT_EMBEDDINGS_LOCATION = get_or_create_env_var('DEFAULT_EMBEDDINGS_LOCATION', "faiss_embedding")
|
214 |
|
215 |
DEFAULT_DATA_SOURCE_NAME = get_or_create_env_var('DEFAULT_DATA_SOURCE_NAME', "Document redaction app documentation")
|
216 |
|
|
|
200 |
default_model_choices.append(LARGE_MODEL_NAME)
|
201 |
|
202 |
if RUN_AWS_FUNCTIONS == "1":
|
203 |
+
default_model_choices.extend(["anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-7-sonnet-20250219-v1:0"])
|
204 |
|
205 |
if RUN_GEMINI_MODELS == "1":
|
206 |
+
GEMINI_MODELS = ["gemini-2.5-flash-lite", "gemini-2.5-flash", "gemini-2.5-pro"]
|
207 |
+
default_model_choices.extend(GEMINI_MODELS)
|
208 |
|
209 |
|
210 |
DEFAULT_MODEL_CHOICES = get_or_create_env_var("DEFAULT_MODEL_CHOICES", str(default_model_choices))
|
211 |
|
212 |
+
EMBEDDINGS_MODEL_NAME = get_or_create_env_var('EMBEDDINGS_MODEL_NAME', "mixedbread-ai/mxbai-embed-xsmall-v1") #"mixedbread-ai/mxbai-embed-xsmall-v1"
|
213 |
|
214 |
+
DEFAULT_EMBEDDINGS_LOCATION = get_or_create_env_var('DEFAULT_EMBEDDINGS_LOCATION', "faiss_embedding/faiss_embedding.zip")
|
215 |
|
216 |
DEFAULT_DATA_SOURCE_NAME = get_or_create_env_var('DEFAULT_DATA_SOURCE_NAME', "Document redaction app documentation")
|
217 |
|
{chatfuncs β tools}/helper_functions.py
RENAMED
@@ -3,7 +3,7 @@ import gradio as gr
|
|
3 |
import pandas as pd
|
4 |
import boto3
|
5 |
from botocore.exceptions import ClientError
|
6 |
-
from
|
7 |
|
8 |
def get_or_create_env_var(var_name, default_value):
|
9 |
# Get the environment variable if it exists
|
|
|
3 |
import pandas as pd
|
4 |
import boto3
|
5 |
from botocore.exceptions import ClientError
|
6 |
+
from tools.config import CUSTOM_HEADER_VALUE, CUSTOM_HEADER, OUTPUT_FOLDER, INPUT_FOLDER, SESSION_OUTPUT_FOLDER, AWS_USER_POOL_ID
|
7 |
|
8 |
def get_or_create_env_var(var_name, default_value):
|
9 |
# Get the environment variable if it exists
|
{chatfuncs β tools}/ingest.py
RENAMED
@@ -6,19 +6,31 @@ import re
|
|
6 |
import requests
|
7 |
import pandas as pd
|
8 |
import dateutil.parser
|
9 |
-
from typing import Type, List
|
10 |
import shutil
|
|
|
|
|
|
|
|
|
|
|
11 |
|
|
|
12 |
#from langchain_community.embeddings import HuggingFaceEmbeddings # HuggingFaceInstructEmbeddings,
|
13 |
from langchain_community.vectorstores.faiss import FAISS
|
14 |
#from langchain_community.vectorstores import Chroma
|
15 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
16 |
from langchain.docstore.document import Document
|
17 |
#from chatfuncs.config import EMBEDDINGS_MODEL_NAME
|
18 |
-
|
|
|
|
|
|
|
19 |
from bs4 import BeautifulSoup
|
20 |
from docx import Document as Doc
|
21 |
from pypdf import PdfReader
|
|
|
|
|
|
|
22 |
|
23 |
PandasDataFrame = Type[pd.DataFrame]
|
24 |
|
@@ -558,22 +570,130 @@ def docs_elements_from_csv_save(docs_path="documents.csv"):
|
|
558 |
|
559 |
# ## Create embeddings and save faiss vector store to the path specified in `save_to`
|
560 |
|
561 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
562 |
|
563 |
-
#
|
|
|
|
|
564 |
|
565 |
-
# #
|
|
|
566 |
|
567 |
-
#
|
|
|
568 |
|
569 |
-
# return
|
570 |
|
571 |
-
|
572 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
573 |
|
574 |
print(f"> Total split documents: {len(docs_out)}")
|
575 |
|
576 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
577 |
|
578 |
save_to_path = Path(save_folder, save_to)
|
579 |
save_to_path.mkdir(parents=True, exist_ok=True)
|
@@ -603,14 +723,68 @@ def embed_faiss_save_to_zip(docs_out, save_folder, embeddings_model_object, save
|
|
603 |
index_faiss.unlink(missing_ok=True)
|
604 |
index_pkl.unlink(missing_ok=True)
|
605 |
|
606 |
-
# Move ZIP inside the folder for easier reference
|
607 |
-
#final_zip_path = save_to_path.with_suffix('.zip')
|
608 |
-
|
609 |
print("> Archive complete")
|
610 |
print(f"> Final ZIP path: {final_zip_path}")
|
611 |
|
612 |
-
return "Document processing complete", vectorstore, final_zip_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
613 |
|
|
|
614 |
|
615 |
|
616 |
# def sim_search_local_saved_vec(query, k_val, save_to="faiss_lambeth_census_embedding"):
|
|
|
6 |
import requests
|
7 |
import pandas as pd
|
8 |
import dateutil.parser
|
9 |
+
from typing import Type, List, Tuple
|
10 |
import shutil
|
11 |
+
import numpy as np
|
12 |
+
import gradio as gr
|
13 |
+
import zipfile
|
14 |
+
import tempfile
|
15 |
+
from pathlib import Path
|
16 |
|
17 |
+
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
18 |
#from langchain_community.embeddings import HuggingFaceEmbeddings # HuggingFaceInstructEmbeddings,
|
19 |
from langchain_community.vectorstores.faiss import FAISS
|
20 |
#from langchain_community.vectorstores import Chroma
|
21 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
22 |
from langchain.docstore.document import Document
|
23 |
#from chatfuncs.config import EMBEDDINGS_MODEL_NAME
|
24 |
+
from langchain_core.embeddings import Embeddings # Import Embeddings for type hinting
|
25 |
+
from tqdm import tqdm
|
26 |
+
from langchain_community.docstore.in_memory import InMemoryDocstore # To manually build the docstore
|
27 |
+
from uuid import uuid4 # To generate unique IDs for documents in the docstore
|
28 |
from bs4 import BeautifulSoup
|
29 |
from docx import Document as Doc
|
30 |
from pypdf import PdfReader
|
31 |
+
import faiss # For directly creating the FAISS index
|
32 |
+
|
33 |
+
from tools.config import EMBEDDINGS_MODEL_NAME
|
34 |
|
35 |
PandasDataFrame = Type[pd.DataFrame]
|
36 |
|
|
|
570 |
|
571 |
# ## Create embeddings and save faiss vector store to the path specified in `save_to`
|
572 |
|
573 |
+
def load_embeddings_model(embeddings_model = EMBEDDINGS_MODEL_NAME):
|
574 |
+
|
575 |
+
embeddings_func = HuggingFaceEmbeddings(model_name=embeddings_model)
|
576 |
+
|
577 |
+
#global embeddings
|
578 |
+
|
579 |
+
#embeddings = embeddings_func
|
580 |
+
|
581 |
+
return embeddings_func
|
582 |
+
|
583 |
+
# def embed_faiss_save_to_zip(docs_out, save_folder, embeddings_model_object, save_to="faiss_embeddings", model_name="mixedbread-ai/mxbai-embed-xsmall-v1"):
|
584 |
+
|
585 |
+
# print(f"> Total split documents: {len(docs_out)}")
|
586 |
+
|
587 |
+
# vectorstore = FAISS.from_documents(documents=docs_out, embedding=embeddings_model_object)
|
588 |
+
|
589 |
+
# save_to_path = Path(save_folder, save_to)
|
590 |
+
# save_to_path.mkdir(parents=True, exist_ok=True)
|
591 |
+
|
592 |
+
# vectorstore.save_local(folder_path=str(save_to_path))
|
593 |
+
|
594 |
+
# print("> FAISS index saved")
|
595 |
+
# print(f"> Saved to: {save_to}")
|
596 |
+
|
597 |
+
# # Ensure files are written before archiving
|
598 |
+
# index_faiss = save_to_path / "index.faiss"
|
599 |
+
# index_pkl = save_to_path / "index.pkl"
|
600 |
+
|
601 |
+
# if not index_faiss.exists() or not index_pkl.exists():
|
602 |
+
# raise FileNotFoundError("Expected FAISS index files not found before zipping.")
|
603 |
+
|
604 |
+
# # Flush file system writes by forcing a sync (works best on Unix)
|
605 |
+
# try:
|
606 |
+
# os.sync()
|
607 |
+
# except AttributeError:
|
608 |
+
# pass # os.sync() not available on Windows
|
609 |
+
|
610 |
+
# # Create ZIP archive
|
611 |
+
# final_zip_path = shutil.make_archive(str(save_to_path), 'zip', root_dir=str(save_to_path))
|
612 |
|
613 |
+
# # Remove individual index files to avoid leaking large raw files
|
614 |
+
# index_faiss.unlink(missing_ok=True)
|
615 |
+
# index_pkl.unlink(missing_ok=True)
|
616 |
|
617 |
+
# # Move ZIP inside the folder for easier reference
|
618 |
+
# #final_zip_path = save_to_path.with_suffix('.zip')
|
619 |
|
620 |
+
# print("> Archive complete")
|
621 |
+
# print(f"> Final ZIP path: {final_zip_path}")
|
622 |
|
623 |
+
# return "Document processing complete", vectorstore, final_zip_path
|
624 |
|
625 |
+
|
626 |
+
|
627 |
+
def embed_faiss_save_to_zip(
|
628 |
+
docs_out: List[Document],
|
629 |
+
save_folder: str,
|
630 |
+
embeddings_model_object: Embeddings, # Type hint for clarity
|
631 |
+
save_to: str = "faiss_embeddings",
|
632 |
+
model_name: str = "mixedbread-ai/mxbai-embed-xsmall-v1", # This is a descriptive name, not directly used in FAISS build
|
633 |
+
progress: gr.Progress = gr.Progress(track_tqdm=True)
|
634 |
+
) -> Tuple[str, FAISS, Path]:
|
635 |
|
636 |
print(f"> Total split documents: {len(docs_out)}")
|
637 |
|
638 |
+
# --- Progress Bar Integration Starts Here ---
|
639 |
+
print("Starting embedding generation and FAISS index construction...")
|
640 |
+
|
641 |
+
texts = []
|
642 |
+
metadatas = []
|
643 |
+
vectors = []
|
644 |
+
docstore = InMemoryDocstore()
|
645 |
+
index_to_docstore_id = {} # Maps FAISS index position to docstore ID
|
646 |
+
|
647 |
+
if not docs_out:
|
648 |
+
print("No documents provided. Skipping FAISS index creation.")
|
649 |
+
return "No documents to process", None, None # Or handle as an error
|
650 |
+
|
651 |
+
# 1. Generate Embeddings and Populate Data Structures with tqdm
|
652 |
+
# Wrap the iteration over docs_out with tqdm for a progress bar
|
653 |
+
for i, doc in tqdm(enumerate(docs_out), desc="Generating Embeddings", total=len(docs_out)):
|
654 |
+
# Store text and metadata
|
655 |
+
texts.append(doc.page_content)
|
656 |
+
metadatas.append(doc.metadata)
|
657 |
+
|
658 |
+
# Generate embedding for the current document
|
659 |
+
# embeddings_model_object.embed_documents expects a list of strings
|
660 |
+
# and returns a list of lists (embeddings). We take the first element.
|
661 |
+
vector = embeddings_model_object.embed_documents([doc.page_content])[0]
|
662 |
+
vectors.append(vector)
|
663 |
+
|
664 |
+
# Populate the internal docstore that FAISS uses
|
665 |
+
doc_id = str(uuid4()) # Generate a unique ID for each document
|
666 |
+
docstore.add({doc_id: doc}) # Add the full Document object to the docstore
|
667 |
+
index_to_docstore_id[i] = doc_id # Map FAISS index position (i) to its doc_id
|
668 |
+
|
669 |
+
print("\nEmbedding generation complete. Building FAISS index...")
|
670 |
+
|
671 |
+
# 2. Build the Raw FAISS Index
|
672 |
+
# Ensure all embeddings are numpy float32, which FAISS expects.
|
673 |
+
# BGE models (like bge-base-en-v1.5) typically produce L2-normalized embeddings,
|
674 |
+
# which are ideal for Inner Product (IP) similarity, equivalent to cosine similarity.
|
675 |
+
# If your model *does not* output normalized vectors and you want cosine similarity,
|
676 |
+
# you must normalize them here: `np.array([v / np.linalg.norm(v) for v in vectors]).astype("float32")`
|
677 |
+
# Otherwise, you might use IndexFlatL2 for Euclidean distance.
|
678 |
+
# For common embedding models and cosine similarity, `IndexFlatIP` with pre-normalized vectors is standard.
|
679 |
+
embeddings_np = np.array(vectors).astype("float32")
|
680 |
+
embedding_dimension = embeddings_np.shape[1]
|
681 |
+
|
682 |
+
# Create a raw FAISS index (e.g., IndexFlatIP for cosine similarity)
|
683 |
+
raw_faiss_index = faiss.IndexFlatIP(embedding_dimension)
|
684 |
+
raw_faiss_index.add(embeddings_np) # Add all vectors to the raw FAISS index
|
685 |
+
|
686 |
+
# 3. Create the LangChain FAISS Vectorstore from the components
|
687 |
+
# The `embedding_function` is used for subsequent queries to the vectorstore,
|
688 |
+
# not for building the initial index here (as we've already done that).
|
689 |
+
vectorstore = FAISS(
|
690 |
+
embedding_function=embeddings_model_object.embed_query,
|
691 |
+
index=raw_faiss_index,
|
692 |
+
docstore=docstore,
|
693 |
+
index_to_docstore_id=index_to_docstore_id
|
694 |
+
# distance_strategy defaults to COSINE, which is appropriate for IndexFlatIP
|
695 |
+
)
|
696 |
+
# --- Progress Bar Integration Ends Here ---
|
697 |
|
698 |
save_to_path = Path(save_folder, save_to)
|
699 |
save_to_path.mkdir(parents=True, exist_ok=True)
|
|
|
723 |
index_faiss.unlink(missing_ok=True)
|
724 |
index_pkl.unlink(missing_ok=True)
|
725 |
|
|
|
|
|
|
|
726 |
print("> Archive complete")
|
727 |
print(f"> Final ZIP path: {final_zip_path}")
|
728 |
|
729 |
+
return "Document processing complete", vectorstore, final_zip_path # Return Path object for consistency
|
730 |
+
|
731 |
+
def get_faiss_store(zip_file_path: str, embeddings_model: Embeddings) -> FAISS:
|
732 |
+
"""
|
733 |
+
Loads a FAISS vector store from a ZIP archive.
|
734 |
+
|
735 |
+
Args:
|
736 |
+
zip_file_path: The string path pointing to the .zip archive containing
|
737 |
+
index.faiss and index.pkl. This should be the
|
738 |
+
final_zip_path returned by embed_faiss_save_to_zip.
|
739 |
+
embeddings_model: The embeddings model object (e.g., OpenAIEmbeddings, HuggingFaceEmbeddings)
|
740 |
+
used to create the index. This is crucial for proper deserialization.
|
741 |
+
|
742 |
+
Returns:
|
743 |
+
A FAISS vector store object.
|
744 |
+
"""
|
745 |
+
|
746 |
+
zip_file_path = Path(zip_file_path)
|
747 |
+
|
748 |
+
if not zip_file_path.exists():
|
749 |
+
raise FileNotFoundError(f"ZIP archive not found at: {zip_file_path}")
|
750 |
+
if not zip_file_path.suffix == '.zip':
|
751 |
+
raise ValueError(f"Expected a .zip file, but got: {zip_file_path}")
|
752 |
+
|
753 |
+
# Create a temporary directory to extract the FAISS index files
|
754 |
+
# tempfile.TemporaryDirectory() handles cleanup automatically when the 'with' block exits.
|
755 |
+
with tempfile.TemporaryDirectory() as temp_dir_str:
|
756 |
+
temp_extract_path = Path(temp_dir_str)
|
757 |
+
|
758 |
+
print(f"> Extracting {zip_file_path} to temporary directory: {temp_extract_path}")
|
759 |
+
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
|
760 |
+
# The zip file contains 'index.faiss' and 'index.pkl' directly at its root.
|
761 |
+
# So, extracting to temp_extract_path will place them as temp_extract_path/index.faiss
|
762 |
+
zip_ref.extractall(temp_extract_path)
|
763 |
+
|
764 |
+
# Verify that the files were extracted successfully
|
765 |
+
extracted_faiss_file = temp_extract_path / "index.faiss"
|
766 |
+
extracted_pkl_file = temp_extract_path / "index.pkl"
|
767 |
+
|
768 |
+
if not extracted_faiss_file.exists() or not extracted_pkl_file.exists():
|
769 |
+
raise FileNotFoundError(
|
770 |
+
f"Required FAISS index files (index.faiss, index.pkl) not found "
|
771 |
+
f"in extracted location: {temp_extract_path}. "
|
772 |
+
f"ZIP content might be structured unexpectedly."
|
773 |
+
)
|
774 |
+
|
775 |
+
print("> Loading FAISS index from extracted files...")
|
776 |
+
faiss_vstore = FAISS.load_local(
|
777 |
+
folder_path=str(temp_extract_path), # FAISS.load_local expects a string path
|
778 |
+
embeddings=embeddings_model,
|
779 |
+
allow_dangerous_deserialization=True
|
780 |
+
)
|
781 |
+
print("> FAISS index loaded successfully.")
|
782 |
+
|
783 |
+
# The temporary directory and its contents are automatically removed here
|
784 |
+
# when the `with tempfile.TemporaryDirectory()` block exits.
|
785 |
+
# No need for manual os.remove() calls for index.faiss and index.pkl.
|
786 |
|
787 |
+
return faiss_vstore
|
788 |
|
789 |
|
790 |
# def sim_search_local_saved_vec(query, k_val, save_to="faiss_lambeth_census_embedding"):
|
{chatfuncs β tools}/llm_api_call.py
RENAMED
@@ -12,8 +12,9 @@ from gradio import Progress
|
|
12 |
from typing import List, Tuple
|
13 |
from io import StringIO
|
14 |
|
15 |
-
from
|
16 |
-
from
|
|
|
17 |
|
18 |
# ResponseObject class for AWS Bedrock calls
|
19 |
class ResponseObject:
|
@@ -171,33 +172,6 @@ def construct_gemini_generative_model(in_api_key: str, temperature: float, model
|
|
171 |
#model = ai.GenerativeModel.from_cached_content(cached_content=cache, generation_config=config)
|
172 |
model = ai.GenerativeModel(model_name='models/' + model_choice, system_instruction=system_prompt, generation_config=config)
|
173 |
|
174 |
-
# Upload CSV file (replace with your actual file path)
|
175 |
-
#file_id = ai.upload_file(upload_file_path)
|
176 |
-
|
177 |
-
|
178 |
-
# if file_type == 'xlsx':
|
179 |
-
# print("Running through all xlsx sheets")
|
180 |
-
# #anon_xlsx = pd.ExcelFile(upload_file_path)
|
181 |
-
# if not in_excel_sheets:
|
182 |
-
# out_message.append("No Excel sheets selected. Please select at least one to anonymise.")
|
183 |
-
# continue
|
184 |
-
|
185 |
-
# anon_xlsx = pd.ExcelFile(upload_file_path)
|
186 |
-
|
187 |
-
# # Create xlsx file:
|
188 |
-
# anon_xlsx_export_file_name = output_folder + out_file_part + "_redacted.xlsx"
|
189 |
-
|
190 |
-
|
191 |
-
### QUERYING LARGE LANGUAGE MODEL ###
|
192 |
-
# Prompt caching the table and system prompt. See here: https://ai.google.dev/gemini-api/docs/caching?lang=python
|
193 |
-
# Create a cache with a 5 minute TTL. ONLY FOR CACHES OF AT LEAST 32k TOKENS!
|
194 |
-
# cache = ai.caching.CachedContent.create(
|
195 |
-
# model='models/' + model_choice,
|
196 |
-
# display_name=out_file_part, # used to identify the cache
|
197 |
-
# system_instruction=system_prompt_with_table,
|
198 |
-
# ttl=datetime.timedelta(minutes=5),
|
199 |
-
# )
|
200 |
-
|
201 |
return model, config
|
202 |
|
203 |
def call_aws_claude(prompt: str, system_prompt: str, temperature: float, max_tokens: int, model_choice: str) -> ResponseObject:
|
@@ -276,7 +250,7 @@ def send_request(prompt: str, conversation_history: List[dict], model: object, c
|
|
276 |
#print("full_prompt:", full_prompt)
|
277 |
|
278 |
# Generate the model's response
|
279 |
-
if model_choice in
|
280 |
try:
|
281 |
response = model.generate_content(contents=full_prompt, generation_config=config)
|
282 |
except Exception as e:
|
@@ -701,7 +675,7 @@ def llm_query(file_data:pd.DataFrame, existing_topics_w_references_table:pd.Data
|
|
701 |
#print("normalised_simple_markdown_table:", normalised_simple_markdown_table)
|
702 |
|
703 |
# Prepare Gemini models before query
|
704 |
-
if model_choice in
|
705 |
print("Using Gemini model:", model_choice)
|
706 |
model, config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=summarise_system_prompt, max_tokens=max_tokens)
|
707 |
else:
|
@@ -772,17 +746,12 @@ def llm_query(file_data:pd.DataFrame, existing_topics_w_references_table:pd.Data
|
|
772 |
|
773 |
summary_prompt_list = [formatted_summary_prompt]
|
774 |
|
775 |
-
print("master_summary_prompt_list:", summary_prompt_list[0])
|
776 |
-
|
777 |
summary_conversation_history = []
|
778 |
summary_whole_conversation = []
|
779 |
|
780 |
# Process requests to large language model
|
781 |
master_summary_response, summary_conversation_history, whole_summary_conversation, whole_conversation_metadata = process_requests(summary_prompt_list, summarise_system_prompt, summary_conversation_history, summary_whole_conversation, whole_conversation_metadata, model, config, model_choice, temperature, reported_batch_no, master = True)
|
782 |
|
783 |
-
print("master_summary_response:", master_summary_response[-1].text)
|
784 |
-
print("Whole conversation metadata:", whole_conversation_metadata)
|
785 |
-
|
786 |
new_topic_table_out_path, new_reference_table_out_path, new_unique_topics_df_out_path, new_topic_df, new_markdown_table, new_reference_df, new_unique_topics_df, master_batch_out_file_part, is_error = write_llm_output_and_logs(master_summary_response, whole_summary_conversation, whole_conversation_metadata, out_file_part, latest_batch_completed, start_row, end_row, model_choice_clean, temperature, log_files_output_paths, existing_reference_df, existing_unique_topics_df, first_run=False)
|
787 |
|
788 |
# If error in table parsing, leave function
|
@@ -832,7 +801,7 @@ def llm_query(file_data:pd.DataFrame, existing_topics_w_references_table:pd.Data
|
|
832 |
#system_prompt_with_table = system_prompt + normalised_simple_markdown_table
|
833 |
|
834 |
# Prepare Gemini models before query
|
835 |
-
if model_choice in
|
836 |
print("Using Gemini model:", model_choice)
|
837 |
model, config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=system_prompt, max_tokens=max_tokens)
|
838 |
else:
|
@@ -857,9 +826,6 @@ def llm_query(file_data:pd.DataFrame, existing_topics_w_references_table:pd.Data
|
|
857 |
|
858 |
#print("Whole conversation metadata before:", whole_conversation_metadata)
|
859 |
|
860 |
-
print("responses:", responses[-1].text)
|
861 |
-
print("Whole conversation metadata:", whole_conversation_metadata)
|
862 |
-
|
863 |
topic_table_out_path, reference_table_out_path, unique_topics_df_out_path, topic_table_df, markdown_table, reference_df, new_unique_topics_df, batch_out_file_part, is_error = write_llm_output_and_logs(responses, whole_conversation, whole_conversation_metadata, out_file_part, latest_batch_completed, start_row, end_row, model_choice_clean, temperature, log_files_output_paths, existing_reference_df, existing_unique_topics_df, first_run=True)
|
864 |
|
865 |
# If error in table parsing, leave function
|
@@ -879,8 +845,6 @@ def llm_query(file_data:pd.DataFrame, existing_topics_w_references_table:pd.Data
|
|
879 |
|
880 |
new_unique_topics_df = pd.concat([new_unique_topics_df, existing_unique_topics_df]).drop_duplicates('Subtopic')
|
881 |
|
882 |
-
print("new_unique_topics_df:", new_unique_topics_df)
|
883 |
-
|
884 |
new_unique_topics_df.to_csv(unique_topics_df_out_path, index=None)
|
885 |
out_file_paths.append(unique_topics_df_out_path)
|
886 |
|
|
|
12 |
from typing import List, Tuple
|
13 |
from io import StringIO
|
14 |
|
15 |
+
from tools.prompts import prompt1, prompt2, prompt3, system_prompt, summarise_system_prompt, summarise_prompt
|
16 |
+
from tools.helper_functions import output_folder, detect_file_type, get_file_path_end, read_file, get_or_create_env_var
|
17 |
+
from tools.config import GEMINI_MODELS
|
18 |
|
19 |
# ResponseObject class for AWS Bedrock calls
|
20 |
class ResponseObject:
|
|
|
172 |
#model = ai.GenerativeModel.from_cached_content(cached_content=cache, generation_config=config)
|
173 |
model = ai.GenerativeModel(model_name='models/' + model_choice, system_instruction=system_prompt, generation_config=config)
|
174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
return model, config
|
176 |
|
177 |
def call_aws_claude(prompt: str, system_prompt: str, temperature: float, max_tokens: int, model_choice: str) -> ResponseObject:
|
|
|
250 |
#print("full_prompt:", full_prompt)
|
251 |
|
252 |
# Generate the model's response
|
253 |
+
if model_choice in GEMINI_MODELS:
|
254 |
try:
|
255 |
response = model.generate_content(contents=full_prompt, generation_config=config)
|
256 |
except Exception as e:
|
|
|
675 |
#print("normalised_simple_markdown_table:", normalised_simple_markdown_table)
|
676 |
|
677 |
# Prepare Gemini models before query
|
678 |
+
if model_choice in GEMINI_MODELS:
|
679 |
print("Using Gemini model:", model_choice)
|
680 |
model, config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=summarise_system_prompt, max_tokens=max_tokens)
|
681 |
else:
|
|
|
746 |
|
747 |
summary_prompt_list = [formatted_summary_prompt]
|
748 |
|
|
|
|
|
749 |
summary_conversation_history = []
|
750 |
summary_whole_conversation = []
|
751 |
|
752 |
# Process requests to large language model
|
753 |
master_summary_response, summary_conversation_history, whole_summary_conversation, whole_conversation_metadata = process_requests(summary_prompt_list, summarise_system_prompt, summary_conversation_history, summary_whole_conversation, whole_conversation_metadata, model, config, model_choice, temperature, reported_batch_no, master = True)
|
754 |
|
|
|
|
|
|
|
755 |
new_topic_table_out_path, new_reference_table_out_path, new_unique_topics_df_out_path, new_topic_df, new_markdown_table, new_reference_df, new_unique_topics_df, master_batch_out_file_part, is_error = write_llm_output_and_logs(master_summary_response, whole_summary_conversation, whole_conversation_metadata, out_file_part, latest_batch_completed, start_row, end_row, model_choice_clean, temperature, log_files_output_paths, existing_reference_df, existing_unique_topics_df, first_run=False)
|
756 |
|
757 |
# If error in table parsing, leave function
|
|
|
801 |
#system_prompt_with_table = system_prompt + normalised_simple_markdown_table
|
802 |
|
803 |
# Prepare Gemini models before query
|
804 |
+
if model_choice in GEMINI_MODELS:
|
805 |
print("Using Gemini model:", model_choice)
|
806 |
model, config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=system_prompt, max_tokens=max_tokens)
|
807 |
else:
|
|
|
826 |
|
827 |
#print("Whole conversation metadata before:", whole_conversation_metadata)
|
828 |
|
|
|
|
|
|
|
829 |
topic_table_out_path, reference_table_out_path, unique_topics_df_out_path, topic_table_df, markdown_table, reference_df, new_unique_topics_df, batch_out_file_part, is_error = write_llm_output_and_logs(responses, whole_conversation, whole_conversation_metadata, out_file_part, latest_batch_completed, start_row, end_row, model_choice_clean, temperature, log_files_output_paths, existing_reference_df, existing_unique_topics_df, first_run=True)
|
830 |
|
831 |
# If error in table parsing, leave function
|
|
|
845 |
|
846 |
new_unique_topics_df = pd.concat([new_unique_topics_df, existing_unique_topics_df]).drop_duplicates('Subtopic')
|
847 |
|
|
|
|
|
848 |
new_unique_topics_df.to_csv(unique_topics_df_out_path, index=None)
|
849 |
out_file_paths.append(unique_topics_df_out_path)
|
850 |
|
{chatfuncs β tools}/model_load.py
RENAMED
File without changes
|
{chatfuncs β tools}/prompts.py
RENAMED
File without changes
|