Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -27,9 +27,12 @@ def process_pdf(pdf_bytes):
|
|
27 |
texts.append(chunk.strip())
|
28 |
return texts
|
29 |
|
30 |
-
# إدخال البيانات إلى Chroma
|
31 |
def ingest(pdf_file):
|
32 |
-
|
|
|
|
|
|
|
33 |
texts = process_pdf(pdf_bytes)
|
34 |
embeddings = embedder.encode(texts, show_progress_bar=True)
|
35 |
for i, (chunk, emb) in enumerate(zip(texts, embeddings)):
|
@@ -40,7 +43,6 @@ def ingest(pdf_file):
|
|
40 |
def retrieve_context(query):
|
41 |
query_emb = embedder.encode([query])[0]
|
42 |
results = col.query(query_embeddings=[query_emb.tolist()], n_results=1)
|
43 |
-
# التعامل مع نتائج داخل قائمة من القوائم
|
44 |
context_list = [m["text"] for group in results["metadatas"] for m in group]
|
45 |
return context_list[0] if context_list else ""
|
46 |
|
|
|
27 |
texts.append(chunk.strip())
|
28 |
return texts
|
29 |
|
30 |
+
# إدخال البيانات إلى Chroma مع دعم NamedString
|
31 |
def ingest(pdf_file):
|
32 |
+
if hasattr(pdf_file, "read"):
|
33 |
+
pdf_bytes = pdf_file.read()
|
34 |
+
else:
|
35 |
+
pdf_bytes = pdf_file # قد يكون bytes أو NamedString
|
36 |
texts = process_pdf(pdf_bytes)
|
37 |
embeddings = embedder.encode(texts, show_progress_bar=True)
|
38 |
for i, (chunk, emb) in enumerate(zip(texts, embeddings)):
|
|
|
43 |
def retrieve_context(query):
|
44 |
query_emb = embedder.encode([query])[0]
|
45 |
results = col.query(query_embeddings=[query_emb.tolist()], n_results=1)
|
|
|
46 |
context_list = [m["text"] for group in results["metadatas"] for m in group]
|
47 |
return context_list[0] if context_list else ""
|
48 |
|