mfraz commited on
Commit
da4f565
Β·
verified Β·
1 Parent(s): fde9d41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -49
app.py CHANGED
@@ -1,61 +1,57 @@
1
  import os
2
  import streamlit as st
3
- import PyPDF2
4
- import docx
5
- from sentence_transformers import SentenceTransformer
6
  from groq import Groq
7
- from transformers import pipeline
8
- from langchain.text_splitter import RecursiveCharacterTextSplitter
9
-
10
- # Set up Groq API
11
- client = Groq(api_key=os.environ.get("Groq_Api"))
12
 
13
- # Load embedding model
14
- embedder = SentenceTransformer("all-MiniLM-L6-v2")
15
 
16
- # Title and UI
17
- st.set_page_config(page_title="A&Q From a File", page_icon="πŸ“–")
18
  st.title("πŸ“– A&Q From a File")
19
 
20
  # File Upload
21
  uploaded_file = st.file_uploader("Upload a PDF or DOCX file", type=["pdf", "docx"])
22
 
23
  if uploaded_file:
24
- text = ""
25
-
26
- # Extract text from PDF
27
- if uploaded_file.type == "application/pdf":
28
- pdf_reader = PyPDF2.PdfReader(uploaded_file)
29
- for page in pdf_reader.pages:
30
- text += page.extract_text() + "\n"
31
-
32
- # Extract text from DOCX
33
- elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
34
- doc = docx.Document(uploaded_file)
35
- for para in doc.paragraphs:
36
- text += para.text + "\n"
37
-
38
- # Chunking the text
39
- text_splitter = RecursiveCharacterTextSplitter(
40
- chunk_size=500, chunk_overlap=50
41
- )
42
- chunks = text_splitter.split_text(text)
43
-
44
- # Embed chunks
45
- embeddings = embedder.encode(chunks, convert_to_tensor=True)
46
 
47
- # Query Input
48
- user_query = st.text_input("Ask a question about the file:")
49
- if user_query:
50
-
51
- # Query Groq API
52
- chat_completion = client.chat.completions.create(
53
- messages=[
54
- {"role": "user", "content": f"Answer this question based on the uploaded document: {user_query}"}
55
- ],
56
- model="llama-3.3-70b-versatile",
57
- )
58
-
59
- # Display answer
60
- st.subheader("Answer:")
61
- st.write(chat_completion.choices[0].message.content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import streamlit as st
 
 
 
3
  from groq import Groq
4
+ from PyPDF2 import PdfReader
5
+ from docx import Document
6
+ from sentence_transformers import SentenceTransformer
 
 
7
 
8
+ # Initialize Groq API Client
9
+ client = Groq(api_key=os.environ.get("Groq-Api"))
10
 
11
+ # Title with Book Icon
 
12
  st.title("πŸ“– A&Q From a File")
13
 
14
  # File Upload
15
  uploaded_file = st.file_uploader("Upload a PDF or DOCX file", type=["pdf", "docx"])
16
 
17
  if uploaded_file:
18
+ st.write(f"**File Name:** {uploaded_file.name}") # Display file name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ # Read PDF or DOCX content
21
+ def extract_text(file):
22
+ if file.name.endswith(".pdf"):
23
+ reader = PdfReader(file)
24
+ return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
25
+ elif file.name.endswith(".docx"):
26
+ doc = Document(file)
27
+ return "\n".join([para.text for para in doc.paragraphs])
28
+ return ""
29
+
30
+ file_text = extract_text(uploaded_file)
31
+
32
+ if file_text:
33
+ st.success("File uploaded and text extracted successfully!")
34
+ st.write("Ask a question about the file:")
35
+ query = st.text_input("Enter your question")
36
+
37
+ if query:
38
+ # Chunk & Tokenize
39
+ model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
40
+ chunks = [file_text[i:i + 512] for i in range(0, len(file_text), 512)]
41
+ embeddings = model.encode(chunks)
42
+
43
+ # Query with Groq API
44
+ chat_completion = client.chat.completions.create(
45
+ messages=[
46
+ {"role": "user", "content": f"Answer based on this document: {query}\n\n{file_text}"},
47
+ ],
48
+ model="llama-3.3-70b-versatile",
49
+ )
50
+
51
+ # Display Answer
52
+ answer = chat_completion.choices[0].message.content
53
+ st.subheader("Answer:")
54
+ st.write(answer)
55
+ else:
56
+ st.error("Failed to extract text from the file. Please check the format.")
57
+