mfraz commited on
Commit
6a8f952
Β·
verified Β·
1 Parent(s): b0e1dbf

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import PyPDF2
4
+ import docx
5
+ from sentence_transformers import SentenceTransformer
6
+ from groq import Groq
7
+ from transformers import pipeline
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+
10
+ # Set up Groq API
11
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
12
+
13
+ # Load embedding model
14
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
15
+
16
+ # Title and UI
17
+ st.set_page_config(page_title="A&Q From a File", page_icon="πŸ“–")
18
+ st.title("πŸ“– A&Q From a File")
19
+
20
+ # File Upload
21
+ uploaded_file = st.file_uploader("Upload a PDF or DOCX file", type=["pdf", "docx"])
22
+
23
+ if uploaded_file:
24
+ text = ""
25
+
26
+ # Extract text from PDF
27
+ if uploaded_file.type == "application/pdf":
28
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
29
+ for page in pdf_reader.pages:
30
+ text += page.extract_text() + "\n"
31
+
32
+ # Extract text from DOCX
33
+ elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
34
+ doc = docx.Document(uploaded_file)
35
+ for para in doc.paragraphs:
36
+ text += para.text + "\n"
37
+
38
+ # Chunking the text
39
+ text_splitter = RecursiveCharacterTextSplitter(
40
+ chunk_size=500, chunk_overlap=50
41
+ )
42
+ chunks = text_splitter.split_text(text)
43
+
44
+ # Embed chunks
45
+ embeddings = embedder.encode(chunks, convert_to_tensor=True)
46
+
47
+ # Query Input
48
+ user_query = st.text_input("Ask a question about the file:")
49
+ if user_query:
50
+
51
+ # Query Groq API
52
+ chat_completion = client.chat.completions.create(
53
+ messages=[
54
+ {"role": "user", "content": f"Answer this question based on the uploaded document: {user_query}"}
55
+ ],
56
+ model="llama-3.3-70b-versatile",
57
+ )
58
+
59
+ # Display answer
60
+ st.subheader("Answer:")
61
+ st.write(chat_completion.choices[0].message.content)