Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ st.set_page_config(page_title="RAG Book Analyzer", layout="wide") # Must be the
|
|
4 |
import torch
|
5 |
import numpy as np
|
6 |
import faiss
|
7 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
8 |
from sentence_transformers import SentenceTransformer
|
9 |
import fitz # PyMuPDF for PDF extraction
|
10 |
import docx2txt # For DOCX extraction
|
@@ -13,8 +13,8 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
13 |
# ------------------------
|
14 |
# Configuration
|
15 |
# ------------------------
|
16 |
-
MODEL_NAME = "
|
17 |
-
EMBED_MODEL = "sentence-transformers/all-
|
18 |
CHUNK_SIZE = 512
|
19 |
CHUNK_OVERLAP = 64
|
20 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -25,12 +25,12 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
25 |
@st.cache_resource
|
26 |
def load_models():
|
27 |
try:
|
28 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
29 |
model = AutoModelForCausalLM.from_pretrained(
|
30 |
MODEL_NAME,
|
31 |
device_map="auto" if DEVICE == "cuda" else None,
|
32 |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
33 |
-
|
34 |
)
|
35 |
embedder = SentenceTransformer(EMBED_MODEL, device=DEVICE)
|
36 |
return tokenizer, model, embedder
|
@@ -83,8 +83,8 @@ def build_index(chunks):
|
|
83 |
# Summarization and Q&A Functions
|
84 |
# ------------------------
|
85 |
def generate_summary(text):
|
86 |
-
# Create prompt
|
87 |
-
prompt = f"
|
88 |
inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
|
89 |
outputs = model.generate(
|
90 |
**inputs,
|
@@ -94,11 +94,11 @@ def generate_summary(text):
|
|
94 |
do_sample=True
|
95 |
)
|
96 |
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
97 |
-
return summary.split("
|
98 |
|
99 |
def generate_answer(query, context):
|
100 |
-
# Create prompt
|
101 |
-
prompt = f"
|
102 |
inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
|
103 |
outputs = model.generate(
|
104 |
**inputs,
|
@@ -109,7 +109,7 @@ def generate_answer(query, context):
|
|
109 |
do_sample=True
|
110 |
)
|
111 |
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
112 |
-
return answer.split("
|
113 |
|
114 |
# ------------------------
|
115 |
# Streamlit UI
|
|
|
4 |
import torch
|
5 |
import numpy as np
|
6 |
import faiss
|
7 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
8 |
from sentence_transformers import SentenceTransformer
|
9 |
import fitz # PyMuPDF for PDF extraction
|
10 |
import docx2txt # For DOCX extraction
|
|
|
13 |
# ------------------------
|
14 |
# Configuration
|
15 |
# ------------------------
|
16 |
+
MODEL_NAME = "microsoft/phi-2" # Open-source model with good performance
|
17 |
+
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2" # Smaller embedding model
|
18 |
CHUNK_SIZE = 512
|
19 |
CHUNK_OVERLAP = 64
|
20 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
25 |
@st.cache_resource
|
26 |
def load_models():
|
27 |
try:
|
28 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
|
29 |
model = AutoModelForCausalLM.from_pretrained(
|
30 |
MODEL_NAME,
|
31 |
device_map="auto" if DEVICE == "cuda" else None,
|
32 |
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
33 |
+
trust_remote_code=True
|
34 |
)
|
35 |
embedder = SentenceTransformer(EMBED_MODEL, device=DEVICE)
|
36 |
return tokenizer, model, embedder
|
|
|
83 |
# Summarization and Q&A Functions
|
84 |
# ------------------------
|
85 |
def generate_summary(text):
|
86 |
+
# Create prompt for Phi-2 model
|
87 |
+
prompt = f"Instruct: Summarize this book in a concise paragraph\nInput: {text[:3000]}\nOutput:"
|
88 |
inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
|
89 |
outputs = model.generate(
|
90 |
**inputs,
|
|
|
94 |
do_sample=True
|
95 |
)
|
96 |
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
97 |
+
return summary.split("Output:")[-1].strip()
|
98 |
|
99 |
def generate_answer(query, context):
|
100 |
+
# Create prompt for Phi-2 model
|
101 |
+
prompt = f"Instruct: Answer this question based on the context. If unsure, say 'I don't know'.\nQuestion: {query}\nContext: {context}\nOutput:"
|
102 |
inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
|
103 |
outputs = model.generate(
|
104 |
**inputs,
|
|
|
109 |
do_sample=True
|
110 |
)
|
111 |
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
112 |
+
return answer.split("Output:")[-1].strip()
|
113 |
|
114 |
# ------------------------
|
115 |
# Streamlit UI
|