Niveytha27 commited on
Commit
6fc2acb
·
verified ·
1 Parent(s): b35da19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -19,8 +19,8 @@ DEFAULT_PDF_URLS = [
19
  ]
20
 
21
  def preload_data(pdf_urls):
22
- embedding_model = SentenceTransformer("BAAI/bge-large-en")
23
-
24
  def download_pdf(url):
25
  response = requests.get(url, stream=True)
26
  response.raise_for_status()
@@ -40,7 +40,7 @@ def preload_data(pdf_urls):
40
  text = re.sub(r'\s+', ' ', text).strip()
41
  return text
42
 
43
- def chunk_text(text, chunk_size=700, overlap_size=150):
44
  chunks = []
45
  start = 0
46
  text_length = len(text)
@@ -84,7 +84,6 @@ def preload_data(pdf_urls):
84
  return index, chunks
85
 
86
  index, chunks = preload_data(DEFAULT_PDF_URLS)
87
- embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
88
  accelerator = Accelerator()
89
  MODEL_NAME = "google/flan-t5-small"
90
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
@@ -146,7 +145,7 @@ def generate_response(query, context):
146
 
147
  def process_query(query):
148
  retrieved_chunks = adaptive_retrieval(query, index, chunks)
149
- merged_chunks = merge_chunks(retrieved_chunks, 150)
150
  reranked_chunks, similarities = rerank(query, merged_chunks)
151
  context = " ".join(reranked_chunks[:3])
152
  answer = generate_response(query, context)
 
19
  ]
20
 
21
  def preload_data(pdf_urls):
22
+ embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
23
+
24
  def download_pdf(url):
25
  response = requests.get(url, stream=True)
26
  response.raise_for_status()
 
40
  text = re.sub(r'\s+', ' ', text).strip()
41
  return text
42
 
43
+ def chunk_text(text, chunk_size=512, overlap_size=50):
44
  chunks = []
45
  start = 0
46
  text_length = len(text)
 
84
  return index, chunks
85
 
86
  index, chunks = preload_data(DEFAULT_PDF_URLS)
 
87
  accelerator = Accelerator()
88
  MODEL_NAME = "google/flan-t5-small"
89
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
145
 
146
  def process_query(query):
147
  retrieved_chunks = adaptive_retrieval(query, index, chunks)
148
+ merged_chunks = merge_chunks(retrieved_chunks, 50)
149
  reranked_chunks, similarities = rerank(query, merged_chunks)
150
  context = " ".join(reranked_chunks[:3])
151
  answer = generate_response(query, context)