Spaces:
Sleeping
Sleeping
File size: 2,554 Bytes
6a8f952 da4f565 3cfed0b 6a8f952 da4f565 4c284da 6a8f952 da4f565 6a8f952 da4f565 3cfed0b da4f565 3cfed0b da4f565 3cfed0b da4f565 3cfed0b da4f565 3cfed0b da4f565 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import os
import streamlit as st
from groq import Groq
from PyPDF2 import PdfReader
from docx import Document
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
# Initialize Groq API Client
client = Groq(api_key=os.environ.get("Groq_Api"))
# Title with Book Icon
st.title("π A&Q From a File")
# File Upload
uploaded_file = st.file_uploader("Upload a PDF or DOCX file", type=["pdf", "docx"])
if uploaded_file:
st.write(f"**File Name:** {uploaded_file.name}") # Display file name
# Extract Text
def extract_text(file):
if file.name.endswith(".pdf"):
reader = PdfReader(file)
return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
elif file.name.endswith(".docx"):
doc = Document(file)
return "\n".join([para.text for para in doc.paragraphs])
return ""
file_text = extract_text(uploaded_file)
if file_text:
st.success("File uploaded and text extracted successfully!")
st.write("Ask a question about the file:")
query = st.text_input("Enter your question")
if query:
# Load Sentence Transformer Model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# Chunk & Embed Text
chunk_size = 512
chunks = [file_text[i:i + chunk_size] for i in range(0, len(file_text), chunk_size)]
embeddings = model.encode(chunks, convert_to_numpy=True)
# Build FAISS Index for Fast Retrieval
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)
# Query Embedding
query_embedding = model.encode([query], convert_to_numpy=True)
_, retrieved_idx = index.search(query_embedding, k=3)
# Retrieve Top 3 Relevant Chunks
relevant_text = " ".join([chunks[i] for i in retrieved_idx[0]])
# Query Groq API with relevant chunks only
chat_completion = client.chat.completions.create(
messages=[
{"role": "user", "content": f"Answer based on this document: {query}\n\n{relevant_text}"},
],
model="llama-3.3-70b-versatile",
)
# Display Answer
answer = chat_completion.choices[0].message.content
st.subheader("Answer:")
st.write(answer)
else:
st.error("Failed to extract text from the file. Please check the format.")
|