Spaces:

afouda
/

Wisal_QA

Runtime error

App Files Files Community

afouda commited on Jul 8

Commit

163369c

verified ·

1 Parent(s): bf0ba5b

Update Old_Document.py

Browse files

Files changed (1) hide show

Old_Document.py +140 -134

Old_Document.py CHANGED Viewed

@@ -1,135 +1,141 @@
-import os
-import asyncio
-from dotenv import load_dotenv
-import gradio as gr
-# Load env variables
-load_dotenv()
-DEEPINFRA_TOKEN   = os.getenv("DEEPINFRA_API_KEY")
-WEAVIATE_URL      = os.getenv("WEAVIATE_URL")
-WEAVIATE_API_KEY  = os.getenv("WEAVIATE_API_KEY")
-if not (DEEPINFRA_TOKEN and WEAVIATE_URL and WEAVIATE_API_KEY):
-    raise ValueError("Please set all required keys in .env")
-# DeepInfra client
-from openai import OpenAI
-openai = OpenAI(
-    api_key=DEEPINFRA_TOKEN,
-    base_url="https://api.deepinfra.com/v1/openai",
-)
-# Weaviate client
-import weaviate
-from weaviate.classes.init import Auth
-from contextlib import contextmanager
-@contextmanager
-def weaviate_client():
-    client = weaviate.connect_to_weaviate_cloud(
-        cluster_url=WEAVIATE_URL,
-        auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
-    )
-    try:
-        yield client
-    finally:
-        client.close()
-# Global path tracker
-last_uploaded_path = None
-# Embed function
-def embed_texts(texts: list[str], batch_size: int = 50) -> list[list[float]]:
-    all_embeddings = []
-    for i in range(0, len(texts), batch_size):
-        batch = texts[i : i + batch_size]
-        try:
-            resp = openai.embeddings.create(
-                model="Qwen/Qwen3-Embedding-8B",
-                input=batch,
-                encoding_format="float"
-            )
-            batch_embs = [item.embedding for item in resp.data]
-            all_embeddings.extend(batch_embs)
-        except Exception as e:
-            print(f"Embedding error: {e}")
-            all_embeddings.extend([[] for _ in batch])
-    return all_embeddings
-def encode_query(query: str) -> list[float] | None:
-    embs = embed_texts([query], batch_size=1)
-    if embs and embs[0]:
-        return embs[0]
-    return None
-async def old_Document(query: str, top_k: int = 1) -> dict:
-    qe = encode_query(query)
-    if not qe:
-        return {"answer": []}
-    try:
-        with weaviate_client() as client:
-            coll = client.collections.get("Old_Documents")
-            res = coll.query.near_vector(
-                near_vector=qe,
-                limit=top_k,
-                return_properties=["text"]
-            )
-        if not getattr(res, "objects", None):
-            return {"answer": []}
-        return {
-            "answer": [obj.properties.get("text", "[No Text]") for obj in res.objects]
-        }
-    except Exception as e:
-        print("RAG Error:", e)
-        return {"answer": []}
-# New functions to support Gradio app
-def ingest_file(path: str) -> str:
-    global last_uploaded_path
-    last_uploaded_path = path
-    return f"Old document ingested: {os.path.basename(path)}"
-def answer_question(query: str) -> str:
-    try:
-        rag_resp = asyncio.run(old_Document(query))
-        chunks = rag_resp.get("answer", [])
-        if not chunks:
-            return "Sorry, I couldn't find relevant content in the old document."
-        return "\n".join(f"- {c}" for c in chunks)
-    except Exception as e:
-        return f"Error processing your request: {e}"
-# Gradio interface for Old Documents
-with gr.Blocks(title="Old Documents RAG") as demo:
-    gr.Markdown("## Old Documents RAG")
-    query = gr.Textbox(placeholder="Your question...", lines=2, label="Ask about Old Documents")
-    doc_file = gr.File(label="Upload Old Document (PDF, DOCX, TXT)")
-    btn = gr.Button("Submit")
-    out = gr.Textbox(label="Answer from Old Documents", lines=8, interactive=False)
-    def process_old_doc(query, doc_file):
-        if doc_file:
-            # Save and ingest the uploaded file
-            upload_dir = os.path.join(os.path.dirname(__file__), "uploaded_docs")
-            os.makedirs(upload_dir, exist_ok=True)
-            safe_filename = os.path.basename(doc_file.name)
-            save_path = os.path.join(upload_dir, safe_filename)
-            with open(save_path, "wb") as f:
-                f.write(doc_file.read())
-            status = ingest_file(save_path)
-            answer = answer_question(query)
-            return f"{status}\n\n{answer}"
-        else:
-            # Use last uploaded file or return error if none exists
-            if last_uploaded_path:
-                answer = answer_question(query)
-                return f"[Using previously uploaded document: {os.path.basename(last_uploaded_path)}]\n\n{answer}"
-            else:
-                return "No document uploaded. Please upload an old document to proceed."
-    btn.click(fn=process_old_doc, inputs=[query, doc_file], outputs=out)
-if __name__ == "__main__":
     demo.launch(debug=True)

+import os
+import asyncio
+from dotenv import load_dotenv
+import gradio as gr
+GEMINI_API_KEY="AIzaSyCUCivstFpC9pq_jMHMYdlPrmh9Bx97dFo"
+TAVILY_API_KEY="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"
+OPENAI_API_KEY="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"
+QDRANT_API_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxNzUxMDUxNzg4fQ.I9J-K7OM0BtcNKgj2d4uVM8QYAHYfFCVAyP4rlZkK2E"
+QDRANT_URL="https://6a3aade6-e8ad-4a6c-a579-21f5af90b7e8.us-east4-0.gcp.cloud.qdrant.io"
+OPENAI_API_KEY="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"
+WEAVIATE_URL="https://xbvlj5rpqyiswspww0tthq.c0.us-west3.gcp.weaviate.cloud"
+WEAVIATE_API_KEY="RU9acU1CYnNRTjY1S1ZFc18zNS9tQktaWlcwTzFEUjlscEVCUGF4YU5xRWx2MDhmTUtIdUhnOWdOTGVZPV92MjAw"
+DEEPINFRA_API_KEY="285LUJulGIprqT6hcPhiXtcrphU04FG4"
+DEEPINFRA_BASE_URL="https://api.deepinfra.com/v1/openai"
+# if not (DEEPINFRA_TOKEN and WEAVIATE_URL and WEAVIATE_API_KEY):
+#     raise ValueError("Please set all required keys in .env")
+# DeepInfra client
+from openai import OpenAI
+openai = OpenAI(
+    api_key=DEEPINFRA_TOKEN,
+    base_url="https://api.deepinfra.com/v1/openai",
+)
+# Weaviate client
+import weaviate
+from weaviate.classes.init import Auth
+from contextlib import contextmanager
+@contextmanager
+def weaviate_client():
+    client = weaviate.connect_to_weaviate_cloud(
+        cluster_url=WEAVIATE_URL,
+        auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
+    )
+    try:
+        yield client
+    finally:
+        client.close()
+# Global path tracker
+last_uploaded_path = None
+# Embed function
+def embed_texts(texts: list[str], batch_size: int = 50) -> list[list[float]]:
+    all_embeddings = []
+    for i in range(0, len(texts), batch_size):
+        batch = texts[i : i + batch_size]
+        try:
+            resp = openai.embeddings.create(
+                model="Qwen/Qwen3-Embedding-8B",
+                input=batch,
+                encoding_format="float"
+            )
+            batch_embs = [item.embedding for item in resp.data]
+            all_embeddings.extend(batch_embs)
+        except Exception as e:
+            print(f"Embedding error: {e}")
+            all_embeddings.extend([[] for _ in batch])
+    return all_embeddings
+def encode_query(query: str) -> list[float] | None:
+    embs = embed_texts([query], batch_size=1)
+    if embs and embs[0]:
+        return embs[0]
+    return None
+async def old_Document(query: str, top_k: int = 1) -> dict:
+    qe = encode_query(query)
+    if not qe:
+        return {"answer": []}
+    try:
+        with weaviate_client() as client:
+            coll = client.collections.get("Old_Documents")
+            res = coll.query.near_vector(
+                near_vector=qe,
+                limit=top_k,
+                return_properties=["text"]
+            )
+        if not getattr(res, "objects", None):
+            return {"answer": []}
+        return {
+            "answer": [obj.properties.get("text", "[No Text]") for obj in res.objects]
+        }
+    except Exception as e:
+        print("RAG Error:", e)
+        return {"answer": []}
+# New functions to support Gradio app
+def ingest_file(path: str) -> str:
+    global last_uploaded_path
+    last_uploaded_path = path
+    return f"Old document ingested: {os.path.basename(path)}"
+def answer_question(query: str) -> str:
+    try:
+        rag_resp = asyncio.run(old_Document(query))
+        chunks = rag_resp.get("answer", [])
+        if not chunks:
+            return "Sorry, I couldn't find relevant content in the old document."
+        return "\n".join(f"- {c}" for c in chunks)
+    except Exception as e:
+        return f"Error processing your request: {e}"
+# Gradio interface for Old Documents
+with gr.Blocks(title="Old Documents RAG") as demo:
+    gr.Markdown("## Old Documents RAG")
+    query = gr.Textbox(placeholder="Your question...", lines=2, label="Ask about Old Documents")
+    doc_file = gr.File(label="Upload Old Document (PDF, DOCX, TXT)")
+    btn = gr.Button("Submit")
+    out = gr.Textbox(label="Answer from Old Documents", lines=8, interactive=False)
+    def process_old_doc(query, doc_file):
+        if doc_file:
+            # Save and ingest the uploaded file
+            upload_dir = os.path.join(os.path.dirname(__file__), "uploaded_docs")
+            os.makedirs(upload_dir, exist_ok=True)
+            safe_filename = os.path.basename(doc_file.name)
+            save_path = os.path.join(upload_dir, safe_filename)
+            with open(save_path, "wb") as f:
+                f.write(doc_file.read())
+            status = ingest_file(save_path)
+            answer = answer_question(query)
+            return f"{status}\n\n{answer}"
+        else:
+            # Use last uploaded file or return error if none exists
+            if last_uploaded_path:
+                answer = answer_question(query)
+                return f"[Using previously uploaded document: {os.path.basename(last_uploaded_path)}]\n\n{answer}"
+            else:
+                return "No document uploaded. Please upload an old document to proceed."
+    btn.click(fn=process_old_doc, inputs=[query, doc_file], outputs=out)
+if __name__ == "__main__":
     demo.launch(debug=True)