ramysaidagieb commited on
Commit
6ec351d
·
verified ·
1 Parent(s): d3d92c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -27,9 +27,12 @@ def process_pdf(pdf_bytes):
27
  texts.append(chunk.strip())
28
  return texts
29
 
30
- # إدخال البيانات إلى Chroma
31
  def ingest(pdf_file):
32
- pdf_bytes = pdf_file.read()
 
 
 
33
  texts = process_pdf(pdf_bytes)
34
  embeddings = embedder.encode(texts, show_progress_bar=True)
35
  for i, (chunk, emb) in enumerate(zip(texts, embeddings)):
@@ -40,7 +43,6 @@ def ingest(pdf_file):
40
  def retrieve_context(query):
41
  query_emb = embedder.encode([query])[0]
42
  results = col.query(query_embeddings=[query_emb.tolist()], n_results=1)
43
- # التعامل مع نتائج داخل قائمة من القوائم
44
  context_list = [m["text"] for group in results["metadatas"] for m in group]
45
  return context_list[0] if context_list else ""
46
 
 
27
  texts.append(chunk.strip())
28
  return texts
29
 
30
+ # إدخال البيانات إلى Chroma مع دعم NamedString
31
  def ingest(pdf_file):
32
+ if hasattr(pdf_file, "read"):
33
+ pdf_bytes = pdf_file.read()
34
+ else:
35
+ pdf_bytes = pdf_file # قد يكون bytes أو NamedString
36
  texts = process_pdf(pdf_bytes)
37
  embeddings = embedder.encode(texts, show_progress_bar=True)
38
  for i, (chunk, emb) in enumerate(zip(texts, embeddings)):
 
43
  def retrieve_context(query):
44
  query_emb = embedder.encode([query])[0]
45
  results = col.query(query_embeddings=[query_emb.tolist()], n_results=1)
 
46
  context_list = [m["text"] for group in results["metadatas"] for m in group]
47
  return context_list[0] if context_list else ""
48