mfraz commited on
Commit
3cfed0b
·
verified ·
1 Parent(s): 4c284da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -8
app.py CHANGED
@@ -4,6 +4,8 @@ from groq import Groq
4
  from PyPDF2 import PdfReader
5
  from docx import Document
6
  from sentence_transformers import SentenceTransformer
 
 
7
 
8
  # Initialize Groq API Client
9
  client = Groq(api_key=os.environ.get("Groq_Api"))
@@ -16,8 +18,8 @@ uploaded_file = st.file_uploader("Upload a PDF or DOCX file", type=["pdf", "docx
16
 
17
  if uploaded_file:
18
  st.write(f"**File Name:** {uploaded_file.name}") # Display file name
19
-
20
- # Read PDF or DOCX content
21
  def extract_text(file):
22
  if file.name.endswith(".pdf"):
23
  reader = PdfReader(file)
@@ -35,15 +37,29 @@ if uploaded_file:
35
  query = st.text_input("Enter your question")
36
 
37
  if query:
38
- # Chunk & Tokenize
39
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
40
- chunks = [file_text[i:i + 512] for i in range(0, len(file_text), 512)]
41
- embeddings = model.encode(chunks)
42
 
43
- # Query with Groq API
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  chat_completion = client.chat.completions.create(
45
  messages=[
46
- {"role": "user", "content": f"Answer based on this document: {query}\n\n{file_text}"},
47
  ],
48
  model="llama-3.3-70b-versatile",
49
  )
@@ -52,6 +68,6 @@ if uploaded_file:
52
  answer = chat_completion.choices[0].message.content
53
  st.subheader("Answer:")
54
  st.write(answer)
 
55
  else:
56
  st.error("Failed to extract text from the file. Please check the format.")
57
-
 
4
  from PyPDF2 import PdfReader
5
  from docx import Document
6
  from sentence_transformers import SentenceTransformer
7
+ import faiss
8
+ import numpy as np
9
 
10
  # Initialize Groq API Client
11
  client = Groq(api_key=os.environ.get("Groq_Api"))
 
18
 
19
  if uploaded_file:
20
  st.write(f"**File Name:** {uploaded_file.name}") # Display file name
21
+
22
+ # Extract Text
23
  def extract_text(file):
24
  if file.name.endswith(".pdf"):
25
  reader = PdfReader(file)
 
37
  query = st.text_input("Enter your question")
38
 
39
  if query:
40
+ # Load Sentence Transformer Model
41
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 
 
42
 
43
+ # Chunk & Embed Text
44
+ chunk_size = 512
45
+ chunks = [file_text[i:i + chunk_size] for i in range(0, len(file_text), chunk_size)]
46
+ embeddings = model.encode(chunks, convert_to_numpy=True)
47
+
48
+ # Build FAISS Index for Fast Retrieval
49
+ index = faiss.IndexFlatL2(embeddings.shape[1])
50
+ index.add(embeddings)
51
+
52
+ # Query Embedding
53
+ query_embedding = model.encode([query], convert_to_numpy=True)
54
+ _, retrieved_idx = index.search(query_embedding, k=3)
55
+
56
+ # Retrieve Top 3 Relevant Chunks
57
+ relevant_text = " ".join([chunks[i] for i in retrieved_idx[0]])
58
+
59
+ # Query Groq API with relevant chunks only
60
  chat_completion = client.chat.completions.create(
61
  messages=[
62
+ {"role": "user", "content": f"Answer based on this document: {query}\n\n{relevant_text}"},
63
  ],
64
  model="llama-3.3-70b-versatile",
65
  )
 
68
  answer = chat_completion.choices[0].message.content
69
  st.subheader("Answer:")
70
  st.write(answer)
71
+
72
  else:
73
  st.error("Failed to extract text from the file. Please check the format.")