Spaces:

Codegeass321
/

BackendServer

Sleeping

App Files Files Community

Codegeass321 commited on Jul 2

Commit

0068013

1 Parent(s): a7005fc

HF Spaces

Browse files

Files changed (5) hide show

README.md +23 -4
api.py +81 -6
app.py +17 -8
requirements.txt +2 -1
utils.py +58 -29

README.md CHANGED Viewed

@@ -9,26 +9,45 @@ app_file: app.py
 pinned: false
 ---
-# Backend (FastAPI)
 ## Structure
 - `api.py` — Main FastAPI app
 - `utils.py` — Helper functions
 - `requirements.txt` — Python dependencies
 - `.env.example` — Example environment variables
 ## Running Locally
 ```sh
 pip install -r requirements.txt
-uvicorn api:app --reload --host 0.0.0.0 --port 8000
 ```
-## Deploying to Render
-- Push this folder to a GitHub repo
 - Use the following start command on Render:
   ```
   uvicorn api:app --host 0.0.0.0 --port 10000
   ```
 - Add your environment variables in the Render dashboard
 ---

 pinned: false
 ---
+# Backend (FastAPI with Gradio UI)
 ## Structure
 - `api.py` — Main FastAPI app
+- `app.py` — Gradio wrapper for FastAPI
 - `utils.py` — Helper functions
 - `requirements.txt` — Python dependencies
 - `.env.example` — Example environment variables
+## Port Configuration
+- **FastAPI**: Port 8000 (internal)
+- **Gradio**: Port 7860 (default for Hugging Face Spaces)
+## API Endpoints
+- **Upload Documents**: `/proxy/8000/upload`
+- **Ask Questions**: `/proxy/8000/ask`
+- **Check Status**: `/proxy/8000/status`
+- **API Documentation**: `/proxy/8000/docs`
 ## Running Locally
 ```sh
 pip install -r requirements.txt
+python app.py
 ```
+## Deploying to Hugging Face Spaces
+1. Create a new Space with Gradio SDK
+2. Upload this repository to the Space
+3. Set the following environment variables in the Space settings:
+   - `GOOGLE_API_KEY` - Your Google Gemini API key
+   - `SUPABASE_URL` - Your Supabase URL
+   - `SUPABASE_KEY` - Your Supabase API key
+## For Render (Legacy)
 - Use the following start command on Render:
   ```
   uvicorn api:app --host 0.0.0.0 --port 10000
   ```
+  ```
 - Add your environment variables in the Render dashboard
 ---

api.py CHANGED Viewed

@@ -31,6 +31,7 @@ origins = [
     "https://huggingface.co",  # Hugging Face Spaces domain
     "https://codegeass321-chatdocxai.hf.space",  # Old HF space
     "https://codegeass321-backendserver.hf.space",  # New HF space
 ]
 app.add_middleware(
@@ -60,9 +61,15 @@ async def options_upload():
 @app.post("/upload")
 async def upload(files: List[UploadFile] = File(...)):
     headers = {
-        "Access-Control-Allow-Origin": "*"
     }
     try:
         if not files:
             return JSONResponse(
                 content={"status": "error", "message": "No files uploaded."},
@@ -77,21 +84,59 @@ async def upload(files: List[UploadFile] = File(...)):
         print("Memory cleared.")
         print("Starting document processing...")
-        raw_docs = load_documents_gradio(files)
         print("Documents loaded. Splitting documents...")
-        chunks = split_documents(raw_docs)
         print("Documents split. Building vector store...")
-        store["value"] = build_vectorstore(chunks)
-        print("Vector store built successfully.")
         return JSONResponse(
             content={"status": "success", "message": "Document processed successfully! You can now ask questions."},
             headers=headers
         )
     except Exception as e:
         print(f"An error occurred during upload: {e}")
         return JSONResponse(
-            content={"status": "error", "message": f"An internal server error occurred: {e}"},
             status_code=500,
             headers=headers
         )
@@ -131,3 +176,33 @@ async def ask(
     prompt = build_prompt(top_chunks, query)
     answer = ask_gemini(prompt, client)
     return {"status": "success", "answer": answer.strip(), "transcribed": transcribed}

     "https://huggingface.co",  # Hugging Face Spaces domain
     "https://codegeass321-chatdocxai.hf.space",  # Old HF space
     "https://codegeass321-backendserver.hf.space",  # New HF space
+    "*",  # Allow requests from the proxy (same origin)
 ]
 app.add_middleware(
 @app.post("/upload")
 async def upload(files: List[UploadFile] = File(...)):
     headers = {
+        "Access-Control-Allow-Origin": "*",
+        "Access-Control-Allow-Methods": "POST, OPTIONS",
+        "Access-Control-Allow-Headers": "Content-Type, Authorization",
     }
     try:
+        print(f"Upload request received with {len(files)} files")
+        for i, file in enumerate(files):
+            print(f"File {i+1}: {file.filename}, content_type: {file.content_type}")
         if not files:
             return JSONResponse(
                 content={"status": "error", "message": "No files uploaded."},
         print("Memory cleared.")
         print("Starting document processing...")
+        try:
+            raw_docs = load_documents_gradio(files)
+            print(f"Documents loaded: {len(raw_docs)} documents")
+        except Exception as doc_error:
+            print(f"Error loading documents: {doc_error}")
+            return JSONResponse(
+                content={"status": "error", "message": f"Error loading documents: {str(doc_error)}"},
+                status_code=500,
+                headers=headers
+            )
+        if not raw_docs:
+            return JSONResponse(
+                content={"status": "error", "message": "No content could be extracted from the uploaded files."},
+                status_code=400,
+                headers=headers
+            )
         print("Documents loaded. Splitting documents...")
+        try:
+            chunks = split_documents(raw_docs)
+            print(f"Documents split into {len(chunks)} chunks")
+        except Exception as split_error:
+            print(f"Error splitting documents: {split_error}")
+            return JSONResponse(
+                content={"status": "error", "message": f"Error splitting documents: {str(split_error)}"},
+                status_code=500,
+                headers=headers
+            )
         print("Documents split. Building vector store...")
+        try:
+            store["value"] = build_vectorstore(chunks)
+            print("Vector store built successfully.")
+        except Exception as vector_error:
+            print(f"Error building vector store: {vector_error}")
+            return JSONResponse(
+                content={"status": "error", "message": f"Error building vector store: {str(vector_error)}"},
+                status_code=500,
+                headers=headers
+            )
         return JSONResponse(
             content={"status": "success", "message": "Document processed successfully! You can now ask questions."},
             headers=headers
         )
     except Exception as e:
+        import traceback
+        error_trace = traceback.format_exc()
         print(f"An error occurred during upload: {e}")
+        print(f"Traceback: {error_trace}")
         return JSONResponse(
+            content={"status": "error", "message": f"An internal server error occurred: {str(e)}"},
             status_code=500,
             headers=headers
         )
     prompt = build_prompt(top_chunks, query)
     answer = ask_gemini(prompt, client)
     return {"status": "success", "answer": answer.strip(), "transcribed": transcribed}
+@app.get("/status")
+async def status():
+    """Simple endpoint to check if the server is running."""
+    import platform
+    import sys
+    import psutil
+    # Get memory info
+    process = psutil.Process(os.getpid())
+    memory_info = process.memory_info()
+    return {
+        "status": "ok",
+        "message": "Server is running",
+        "google_api_key_set": bool(os.environ.get("GOOGLE_API_KEY")),
+        "vectorstore_loaded": store.get("value") is not None,
+        "system_info": {
+            "platform": platform.platform(),
+            "python_version": sys.version,
+            "memory_usage_mb": memory_info.rss / (1024 * 1024),
+            "cpu_percent": process.cpu_percent(),
+            "available_memory_mb": psutil.virtual_memory().available / (1024 * 1024)
+        },
+        "env_vars": {
+            "PORT": os.environ.get("PORT"),
+            "SPACE_ID": os.environ.get("SPACE_ID"),
+            "SYSTEM": os.environ.get("SYSTEM")
+        }
+    }

app.py CHANGED Viewed

@@ -3,16 +3,24 @@ from api import app
 import uvicorn
 import threading
 import time
 # Create a simple Gradio interface
 def create_interface():
     with gr.Blocks(title="ChatDocxAI Backend") as interface:
         gr.Markdown("# ChatDocxAI Backend")
-        gr.Markdown("""
         This is the backend server for ChatDocxAI. It provides the following endpoints:
-        - `/upload` - Upload documents
-        - `/ask` - Ask questions about uploaded documents
         The frontend should be configured to communicate with this backend.
         """)
@@ -25,13 +33,13 @@ def create_interface():
         with gr.Row():
             with gr.Column():
                 gr.Markdown("## API Documentation")
-                doc_link = gr.HTML(f"<a href='/docs' target='_blank'>View FastAPI Docs</a>")
     return interface
-# Function to start FastAPI in a separate thread
 def start_fastapi():
-    uvicorn.run(app, host="0.0.0.0", port=7860)
 # Start FastAPI in a separate thread
 fastapi_thread = threading.Thread(target=start_fastapi)
@@ -44,6 +52,7 @@ time.sleep(2)
 # Create and launch the Gradio interface
 interface = create_interface()
-# Launch the interface with Gradio defaults (no specific port)
 if __name__ == "__main__":
-    interface.launch()

 import uvicorn
 import threading
 import time
+import os
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import RedirectResponse
+# FastAPI port - used internally
+FASTAPI_PORT = 8000
 # Create a simple Gradio interface
 def create_interface():
     with gr.Blocks(title="ChatDocxAI Backend") as interface:
         gr.Markdown("# ChatDocxAI Backend")
+        gr.Markdown(f"""
         This is the backend server for ChatDocxAI. It provides the following endpoints:
+        - `/proxy/8000/upload` - Upload documents
+        - `/proxy/8000/ask` - Ask questions about uploaded documents
+        - `/proxy/8000/status` - Check API status
         The frontend should be configured to communicate with this backend.
         """)
         with gr.Row():
             with gr.Column():
                 gr.Markdown("## API Documentation")
+                doc_link = gr.HTML(f"<a href='/proxy/8000/docs' target='_blank'>View FastAPI Docs</a>")
     return interface
+# Set up FastAPI to run on port 8000
 def start_fastapi():
+    uvicorn.run(app, host="0.0.0.0", port=FASTAPI_PORT)
 # Start FastAPI in a separate thread
 fastapi_thread = threading.Thread(target=start_fastapi)
 # Create and launch the Gradio interface
 interface = create_interface()
+# Launch the Gradio interface on the default Hugging Face Spaces port (7860)
 if __name__ == "__main__":
+    # Use the port specified by Hugging Face Spaces
+    interface.launch(server_name="0.0.0.0")

requirements.txt CHANGED Viewed

@@ -18,4 +18,5 @@ unstructured[excel]
 unstructured[xml]
 torch
 torchaudio
-transformers

 unstructured[xml]
 torch
 torchaudio
+transformers
+psutil

utils.py CHANGED Viewed

@@ -38,36 +38,65 @@ def authenticate():
 def load_documents_gradio(uploaded_files):
     docs = []
     for file in uploaded_files:
-        # For FastAPI UploadFile, save to a temp file
-        if hasattr(file, "filename") and hasattr(file, "file"):
-            import tempfile
-            suffix = os.path.splitext(file.filename)[1]
-            with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
-                tmp.write(file.file.read())
-                tmp_path = tmp.name
-            file_path = tmp_path
-        else:
-            file_path = file.name  # For Gradio or other file types
-        # Detect type and load accordingly
-        if file_path.lower().endswith('.pdf'):
-            docs.extend(UnstructuredPDFLoader(file_path).load())
-        elif file_path.lower().endswith('.txt'):
-            docs.extend(TextLoader(file_path).load())
-        elif file_path.lower().endswith('.csv'):
-            docs.extend(CSVLoader(file_path).load())
-        elif file_path.lower().endswith('.json'):
-            docs.extend(JSONLoader(file_path).load())
-        elif file_path.lower().endswith('.pptx'):
-            docs.extend(UnstructuredPowerPointLoader(file_path).load())
-        elif file_path.lower().endswith('.xlsx'):
-            docs.extend(UnstructuredExcelLoader(file_path).load())
-        elif file_path.lower().endswith('.xml'):
-            docs.extend(UnstructuredXMLLoader(file_path).load())
-        elif file_path.lower().endswith('.docx'):
-            docs.extend(UnstructuredWordDocumentLoader(file_path).load())
-        else:
-            print(f'Unsupported File Type: {file_path}')
     return docs

 def load_documents_gradio(uploaded_files):
     docs = []
+    print(f"Processing {len(uploaded_files)} files")
     for file in uploaded_files:
+        try:
+            # For FastAPI UploadFile, save to a temp file
+            if hasattr(file, "filename") and hasattr(file, "file"):
+                import tempfile
+                suffix = os.path.splitext(file.filename)[1].lower()
+                print(f"Processing file: {file.filename} with suffix {suffix}")
+                with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+                    content = file.file.read()
+                    print(f"Read {len(content)} bytes from {file.filename}")
+                    tmp.write(content)
+                    tmp_path = tmp.name
+                # Rewind the file cursor for potential further reads
+                file.file.seek(0)
+                file_path = tmp_path
+            else:
+                file_path = file.name  # For Gradio or other file types
+                print(f"Non-FastAPI file: {file_path}")
+            # Detect type and load accordingly
+            if file_path.lower().endswith('.pdf'):
+                print(f"Loading PDF: {file_path}")
+                docs.extend(UnstructuredPDFLoader(file_path).load())
+            elif file_path.lower().endswith('.txt'):
+                print(f"Loading TXT: {file_path}")
+                docs.extend(TextLoader(file_path).load())
+            elif file_path.lower().endswith('.csv'):
+                print(f"Loading CSV: {file_path}")
+                docs.extend(CSVLoader(file_path).load())
+            elif file_path.lower().endswith('.json'):
+                print(f"Loading JSON: {file_path}")
+                docs.extend(JSONLoader(file_path, jq_schema='.', text_content=False).load())
+            elif file_path.lower().endswith('.pptx'):
+                print(f"Loading PPTX: {file_path}")
+                docs.extend(UnstructuredPowerPointLoader(file_path).load())
+            elif file_path.lower().endswith(('.xlsx', '.xls')):
+                print(f"Loading Excel: {file_path}")
+                docs.extend(UnstructuredExcelLoader(file_path).load())
+            elif file_path.lower().endswith('.xml'):
+                print(f"Loading XML: {file_path}")
+                docs.extend(UnstructuredXMLLoader(file_path).load())
+            elif file_path.lower().endswith(('.docx', '.doc')):
+                print(f"Loading Word: {file_path}")
+                docs.extend(UnstructuredWordDocumentLoader(file_path).load())
+            else:
+                print(f'Unsupported File Type: {file_path}')
+            print(f"Successfully processed {file_path}")
+        except Exception as e:
+            import traceback
+            print(f"Error processing file {getattr(file, 'filename', file)}: {e}")
+            print(traceback.format_exc())
+            # Continue with next file instead of failing completely
+            continue
+    print(f"Total documents loaded: {len(docs)}")
     return docs