tahirsher commited on
Commit
94e8525
·
verified ·
1 Parent(s): de6fb9b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sentence_transformers import SentenceTransformer
4
+ import faiss
5
+ import numpy as np
6
+ import requests
7
+ import fitz # PyMuPDF for PDF text extraction
8
+
9
+ # Load your data from PDF
10
+ @st.cache_resource
11
+ def load_data(file):
12
+ text = ""
13
+ with fitz.open(stream=file.read(), filetype="pdf") as pdf_file:
14
+ for page in pdf_file:
15
+ text += page.get_text()
16
+ return pd.DataFrame({'combined_text': [text]})
17
+
18
+ # Initialize the embedding model and FAISS index
19
+ @st.cache_resource
20
+ def initialize_embeddings(data):
21
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
22
+ embeddings = embedder.encode(data['combined_text'].tolist(), convert_to_tensor=False)
23
+ embedding_dim = embeddings[0].shape[0]
24
+ index = faiss.IndexFlatL2(embedding_dim)
25
+ index.add(np.array(embeddings))
26
+ return embedder, index
27
+
28
+ # OpenAI API setup
29
+ openai_api_key = "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" # Replace with your actual OpenAI API key
30
+
31
+ # Function to retrieve top-k similar documents from FAISS index
32
+ def retrieve(query, embedder, index, data, top_k=5):
33
+ query_embedding = embedder.encode([query], convert_to_tensor=False)
34
+ distances, indices = index.search(np.array(query_embedding), top_k)
35
+ return data.iloc[indices[0]]
36
+
37
+ # Function for RAG using OpenAI API
38
+ def rag_query(query, embedder, index, data, top_k=5):
39
+ retrieved_docs = retrieve(query, embedder, index, data, top_k)
40
+ context = "\n".join(retrieved_docs['combined_text'].tolist())
41
+ prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
42
+
43
+ # Call the OpenAI API
44
+ headers = {
45
+ "Authorization": f"Bearer {openai_api_key}",
46
+ "Content-Type": "application/json"
47
+ }
48
+
49
+ data = {
50
+ "model": "gpt-3.5-turbo", # Change to your preferred model
51
+ "messages": [{"role": "user", "content": prompt}],
52
+ "max_tokens": 100,
53
+ "temperature": 0.7
54
+ }
55
+
56
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=data)
57
+
58
+ if response.status_code == 200:
59
+ answer = response.json().get("choices", [{}])[0].get("message", {}).get("content", "No answer found.")
60
+ else:
61
+ answer = f"Error: {response.json().get('error', 'Unknown error')}"
62
+
63
+ return answer
64
+
65
+ # Streamlit UI
66
+ st.title("RAG Application with OpenAI API")
67
+ st.write("Ask a question, and I'll find the answer for you!")
68
+
69
+ # File uploader for PDF data
70
+ uploaded_file = st.file_uploader("Upload your PDF file", type=["pdf"])
71
+
72
+ if uploaded_file is not None:
73
+ # Load data
74
+ data = load_data(uploaded_file)
75
+
76
+ # Initialize embeddings and FAISS index
77
+ embedder, index = initialize_embeddings(data)
78
+
79
+ # User input for query
80
+ query = st.text_input("Your question:")
81
+
82
+ if st.button("Get Answer"):
83
+ if query:
84
+ answer = rag_query(query, embedder, index, data)
85
+ st.write("Answer:", answer)
86
+ else:
87
+ st.write("Please enter a question.")
88
+ else:
89
+ st.write("Please upload a PDF file to start.")