Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -19,8 +19,8 @@ DEFAULT_PDF_URLS = [
|
|
19 |
]
|
20 |
|
21 |
def preload_data(pdf_urls):
|
22 |
-
embedding_model = SentenceTransformer("
|
23 |
-
|
24 |
def download_pdf(url):
|
25 |
response = requests.get(url, stream=True)
|
26 |
response.raise_for_status()
|
@@ -40,7 +40,7 @@ def preload_data(pdf_urls):
|
|
40 |
text = re.sub(r'\s+', ' ', text).strip()
|
41 |
return text
|
42 |
|
43 |
-
def chunk_text(text, chunk_size=
|
44 |
chunks = []
|
45 |
start = 0
|
46 |
text_length = len(text)
|
@@ -84,7 +84,6 @@ def preload_data(pdf_urls):
|
|
84 |
return index, chunks
|
85 |
|
86 |
index, chunks = preload_data(DEFAULT_PDF_URLS)
|
87 |
-
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
88 |
accelerator = Accelerator()
|
89 |
MODEL_NAME = "google/flan-t5-small"
|
90 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
@@ -146,7 +145,7 @@ def generate_response(query, context):
|
|
146 |
|
147 |
def process_query(query):
|
148 |
retrieved_chunks = adaptive_retrieval(query, index, chunks)
|
149 |
-
merged_chunks = merge_chunks(retrieved_chunks,
|
150 |
reranked_chunks, similarities = rerank(query, merged_chunks)
|
151 |
context = " ".join(reranked_chunks[:3])
|
152 |
answer = generate_response(query, context)
|
|
|
19 |
]
|
20 |
|
21 |
def preload_data(pdf_urls):
|
22 |
+
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
23 |
+
|
24 |
def download_pdf(url):
|
25 |
response = requests.get(url, stream=True)
|
26 |
response.raise_for_status()
|
|
|
40 |
text = re.sub(r'\s+', ' ', text).strip()
|
41 |
return text
|
42 |
|
43 |
+
def chunk_text(text, chunk_size=512, overlap_size=50):
|
44 |
chunks = []
|
45 |
start = 0
|
46 |
text_length = len(text)
|
|
|
84 |
return index, chunks
|
85 |
|
86 |
index, chunks = preload_data(DEFAULT_PDF_URLS)
|
|
|
87 |
accelerator = Accelerator()
|
88 |
MODEL_NAME = "google/flan-t5-small"
|
89 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
|
|
145 |
|
146 |
def process_query(query):
|
147 |
retrieved_chunks = adaptive_retrieval(query, index, chunks)
|
148 |
+
merged_chunks = merge_chunks(retrieved_chunks, 50)
|
149 |
reranked_chunks, similarities = rerank(query, merged_chunks)
|
150 |
context = " ".join(reranked_chunks[:3])
|
151 |
answer = generate_response(query, context)
|