Codegeass321 commited on
Commit
2c4ccb6
Β·
0 Parent(s):

Initial commit

Browse files
.env.example ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Example environment variables for backend
2
+ GOOGLE_API_KEY=your_google_api_key_here
3
+ NEXT_PUBLIC_SUPABASE_URL=your_supabase_url_here
4
+ NEXT_PUBLIC_SUPABASE_ANON_KEY=your_supabase_anon_key_here
README.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Backend (FastAPI)
2
+
3
+ ## Structure
4
+ - `api.py` β€” Main FastAPI app
5
+ - `utils.py` β€” Helper functions
6
+ - `requirements.txt` β€” Python dependencies
7
+ - `.env.example` β€” Example environment variables
8
+
9
+ ## Running Locally
10
+ ```sh
11
+ pip install -r requirements.txt
12
+ uvicorn api:app --reload --host 0.0.0.0 --port 8000
13
+ ```
14
+
15
+ ## Deploying to Render
16
+ - Push this folder to a GitHub repo
17
+ - Use the following start command on Render:
18
+ ```
19
+ uvicorn api:app --host 0.0.0.0 --port 10000
20
+ ```
21
+ - Add your environment variables in the Render dashboard
22
+
23
+ ---
24
+
25
+ **Do not commit your real `.env` file! Use `.env.example` for reference.**
__pycache__/api.cpython-312.pyc ADDED
Binary file (4.23 kB). View file
 
__pycache__/utils.cpython-312.pyc ADDED
Binary file (9.45 kB). View file
 
api.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, Form
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import JSONResponse
4
+ from typing import List, Optional
5
+ import numpy as np
6
+ import io
7
+ import os
8
+ from dotenv import load_dotenv
9
+ from pydub import AudioSegment
10
+ from utils import (
11
+ authenticate,
12
+ split_documents,
13
+ build_vectorstore,
14
+ retrieve_context,
15
+ retrieve_context_approx,
16
+ build_prompt,
17
+ ask_gemini,
18
+ load_documents_gradio,
19
+ transcribe
20
+ )
21
+
22
+ load_dotenv()
23
+
24
+ app = FastAPI()
25
+
26
+ app.add_middleware(
27
+ CORSMiddleware,
28
+ allow_origins=["*"],
29
+ allow_credentials=True,
30
+ allow_methods=["*"],
31
+ allow_headers=["*"],
32
+ )
33
+
34
+ client = authenticate()
35
+ store = {"value": None}
36
+
37
+ @app.post("/upload")
38
+ async def upload(files: List[UploadFile] = File(...)):
39
+ if not files:
40
+ return JSONResponse({"status": "error", "message": "No files uploaded."}, status_code=400)
41
+ raw_docs = load_documents_gradio(files)
42
+ chunks = split_documents(raw_docs)
43
+ store["value"] = build_vectorstore(chunks)
44
+ return {"status": "success", "message": "Document processed successfully! You can now ask questions."}
45
+
46
+ @app.post("/ask")
47
+ async def ask(
48
+ text: Optional[str] = Form(None),
49
+ audio: Optional[UploadFile] = File(None)
50
+ ):
51
+ transcribed = None
52
+ if store["value"] is None:
53
+ return JSONResponse({"status": "error", "message": "Please upload and process a document first."}, status_code=400)
54
+ if text and text.strip():
55
+ query = text.strip()
56
+ elif audio is not None:
57
+ audio_bytes = await audio.read()
58
+ try:
59
+ audio_io = io.BytesIO(audio_bytes)
60
+ audio_seg = AudioSegment.from_file(audio_io)
61
+ y = np.array(audio_seg.get_array_of_samples()).astype(np.float32)
62
+ if audio_seg.channels == 2:
63
+ y = y.reshape((-1, 2)).mean(axis=1) # Convert to mono
64
+ y /= np.max(np.abs(y)) # Normalize to [-1, 1]
65
+ sr = audio_seg.frame_rate
66
+ transcribed = transcribe((sr, y))
67
+ query = transcribed
68
+ except FileNotFoundError as e:
69
+ return JSONResponse({"status": "error", "message": "Audio decode failed: ffmpeg is not installed or not in PATH. Please install ffmpeg."}, status_code=400)
70
+ except Exception as e:
71
+ return JSONResponse({"status": "error", "message": f"Audio decode failed: {str(e)}"}, status_code=400)
72
+ else:
73
+ return JSONResponse({"status": "error", "message": "Please provide a question by typing or speaking."}, status_code=400)
74
+ if store["value"]["chunks"] <= 50:
75
+ top_chunks = retrieve_context(query, store["value"])
76
+ else:
77
+ top_chunks = retrieve_context_approx(query, store["value"])
78
+ prompt = build_prompt(top_chunks, query)
79
+ answer = ask_gemini(prompt, client)
80
+ return {"status": "success", "answer": answer.strip(), "transcribed": transcribed}
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-dotenv
4
+ pydub
5
+ numpy
6
+ # Add any other dependencies your utils.py or backend needs
utils.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import getpass
3
+ import faiss
4
+ import numpy as np
5
+ import warnings
6
+ import logging
7
+
8
+ # Suppress warnings
9
+ logging.getLogger("pdfminer").setLevel(logging.ERROR)
10
+ warnings.filterwarnings("ignore")
11
+
12
+ from google import genai
13
+ from google.genai import types
14
+ from sentence_transformers import SentenceTransformer
15
+ from transformers import pipeline
16
+ from langchain_community.document_loaders import(
17
+ UnstructuredPDFLoader,
18
+ TextLoader,
19
+ CSVLoader,
20
+ JSONLoader,
21
+ UnstructuredPowerPointLoader,
22
+ UnstructuredExcelLoader,
23
+ UnstructuredXMLLoader,
24
+ UnstructuredWordDocumentLoader,
25
+ )
26
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
27
+
28
+
29
+ def authenticate():
30
+ """Authenticates with the Google Generative AI API using an API key."""
31
+ api_key = os.environ.get("GOOGLE_API_KEY")
32
+ if not api_key:
33
+ api_key = getpass.getpass("Enter your API Key: ")
34
+
35
+ client = genai.Client(api_key=api_key)
36
+ return client
37
+
38
+
39
+ def load_documents_gradio(uploaded_files):
40
+ docs = []
41
+ for file in uploaded_files:
42
+ # For FastAPI UploadFile, save to a temp file
43
+ if hasattr(file, "filename") and hasattr(file, "file"):
44
+ import tempfile
45
+ suffix = os.path.splitext(file.filename)[1]
46
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
47
+ tmp.write(file.file.read())
48
+ tmp_path = tmp.name
49
+ file_path = tmp_path
50
+ else:
51
+ file_path = file.name # For Gradio or other file types
52
+ # Detect type and load accordingly
53
+ if file_path.lower().endswith('.pdf'):
54
+ docs.extend(UnstructuredPDFLoader(file_path).load())
55
+ elif file_path.lower().endswith('.txt'):
56
+ docs.extend(TextLoader(file_path).load())
57
+ elif file_path.lower().endswith('.csv'):
58
+ docs.extend(CSVLoader(file_path).load())
59
+ elif file_path.lower().endswith('.json'):
60
+ docs.extend(JSONLoader(file_path).load())
61
+ elif file_path.lower().endswith('.pptx'):
62
+ docs.extend(UnstructuredPowerPointLoader(file_path).load())
63
+ elif file_path.lower().endswith('.xlsx'):
64
+ docs.extend(UnstructuredExcelLoader(file_path).load())
65
+ elif file_path.lower().endswith('.xml'):
66
+ docs.extend(UnstructuredXMLLoader(file_path).load())
67
+ elif file_path.lower().endswith('.docx'):
68
+ docs.extend(UnstructuredWordDocumentLoader(file_path).load())
69
+ else:
70
+ print(f'Unsupported File Type: {file_path}')
71
+ return docs
72
+
73
+
74
+ def split_documents(docs, chunk_size=500, chunk_overlap=100):
75
+ """Splits documents into smaller chunks using RecursiveCharacterTextSplitter."""
76
+ splitter = RecursiveCharacterTextSplitter(
77
+ chunk_size=chunk_size, chunk_overlap=chunk_overlap
78
+ )
79
+ return splitter.split_documents(docs)
80
+
81
+
82
+ def build_vectorstore(docs, embedding_model_name="all-MiniLM-L6-v2"):
83
+ """Builds a FAISS vector store from the document chunks."""
84
+ texts = [doc.page_content.strip() for doc in docs if doc.page_content.strip()]
85
+ if not texts:
86
+ raise ValueError("No valid text found in the documents.")
87
+
88
+ print(f"No. of Chunks: {len(texts)}")
89
+
90
+ model = SentenceTransformer(embedding_model_name)
91
+ embeddings = model.encode(texts)
92
+ print(embeddings.shape)
93
+
94
+ index = faiss.IndexFlatL2(embeddings.shape[1])
95
+ index.add(np.array(embeddings).astype("float32"))
96
+
97
+ return {
98
+ "index": index,
99
+ "texts": texts,
100
+ "embedding_model": model,
101
+ "embeddings": embeddings,
102
+ "chunks": len(texts)
103
+ }
104
+
105
+
106
+ def retrieve_context(query, store, k=6):
107
+ """Retrieves the top-k context chunks most similar to the query."""
108
+ query_vec = store["embedding_model"].encode([query])
109
+ k = min(k, len(store["texts"]))
110
+ distances, indices = store["index"].search(query_vec, k)
111
+ return [store["texts"][i] for i in indices[0]]
112
+
113
+
114
+ def retrieve_context_approx(query, store, k=6):
115
+ """Retrieves context chunks using approximate nearest neighbor search."""
116
+ ncells = 50
117
+ D = store["index"].d
118
+ index = faiss.IndexFlatL2(D)
119
+ nindex = faiss.IndexIVFFlat(index, D, ncells)
120
+ nindex.nprobe = 10
121
+
122
+ if not nindex.is_trained:
123
+ nindex.train(np.array(store["embeddings"]).astype("float32"))
124
+
125
+ nindex.add(np.array(store["embeddings"]).astype("float32"))
126
+ query_vec = store["embedding_model"].encode([query])
127
+ k = min(k, len(store["texts"]))
128
+ _, indices = nindex.search(np.array(query_vec).astype("float32"), k)
129
+ return [store["texts"][i] for i in indices[0]]
130
+
131
+
132
+ def build_prompt(context_chunks, query):
133
+ """Builds the prompt for the Gemini API using context and query."""
134
+ context = "\n".join(context_chunks)
135
+ return f"""You are a highly knowledgeable and helpful assistant. Use the following context to generate a **detailed and step-by-step** answer to the user's question. Include explanations, examples, and reasoning wherever helpful.
136
+
137
+ Context:
138
+ {context}
139
+
140
+ Question: {query}
141
+ Answer:"""
142
+
143
+
144
+ def ask_gemini(prompt, client):
145
+ """Calls the Gemini API with the given prompt and returns the response."""
146
+ response = client.models.generate_content(
147
+ model="gemini-2.0-flash", # Or your preferred model
148
+ contents=[prompt],
149
+ config=types.GenerateContentConfig(max_output_tokens=2048, temperature=0.5, seed=42),
150
+ )
151
+ return response.text
152
+
153
+ # Speech2Text:
154
+ def transcribe(audio, model="openai/whisper-base.en"):
155
+ if audio is None:
156
+ raise ValueError("No audio detected!")
157
+
158
+ transcriber = pipeline("automatic-speech-recognition", model=model)
159
+ sr, y = audio # Sampling rate (KHz) and y= amplitude array
160
+
161
+ if y.ndim > 1: # Convert to Mono (CH=1) if Stereo (CH=2; L & R)
162
+ y = y.mean(1)
163
+
164
+ y = y.astype(np.float32)
165
+ y /= np.max(np.abs(y)) # Normalizing the amplitude values in range [-1,1]
166
+
167
+ result = transcriber({"sampling_rate" : sr, "raw" : y})
168
+ return result["text"]