sunbal7 commited on
Commit
66e139c
·
verified ·
1 Parent(s): 4a73aaa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -11
app.py CHANGED
@@ -4,7 +4,7 @@ st.set_page_config(page_title="RAG Book Analyzer", layout="wide") # Must be the
4
  import torch
5
  import numpy as np
6
  import faiss
7
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
8
  from sentence_transformers import SentenceTransformer
9
  import fitz # PyMuPDF for PDF extraction
10
  import docx2txt # For DOCX extraction
@@ -13,8 +13,8 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
13
  # ------------------------
14
  # Configuration
15
  # ------------------------
16
- MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
17
- EMBED_MODEL = "sentence-transformers/all-mpnet-base-v2"
18
  CHUNK_SIZE = 512
19
  CHUNK_OVERLAP = 64
20
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -25,12 +25,12 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
25
  @st.cache_resource
26
  def load_models():
27
  try:
28
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
29
  model = AutoModelForCausalLM.from_pretrained(
30
  MODEL_NAME,
31
  device_map="auto" if DEVICE == "cuda" else None,
32
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
33
- low_cpu_mem_usage=True
34
  )
35
  embedder = SentenceTransformer(EMBED_MODEL, device=DEVICE)
36
  return tokenizer, model, embedder
@@ -83,8 +83,8 @@ def build_index(chunks):
83
  # Summarization and Q&A Functions
84
  # ------------------------
85
  def generate_summary(text):
86
- # Create prompt with Mistral format
87
- prompt = f"<s>[INST] Summarize this book in a concise paragraph: {text[:3000]} [/INST]"
88
  inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
89
  outputs = model.generate(
90
  **inputs,
@@ -94,11 +94,11 @@ def generate_summary(text):
94
  do_sample=True
95
  )
96
  summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
97
- return summary.split("[/INST]")[-1].strip()
98
 
99
  def generate_answer(query, context):
100
- # Create prompt with Mistral format
101
- prompt = f"<s>[INST] Answer this question based on the context. If unsure, say 'I don't know'.\n\nQuestion: {query}\nContext: {context} [/INST]"
102
  inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
103
  outputs = model.generate(
104
  **inputs,
@@ -109,7 +109,7 @@ def generate_answer(query, context):
109
  do_sample=True
110
  )
111
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
112
- return answer.split("[/INST]")[-1].strip()
113
 
114
  # ------------------------
115
  # Streamlit UI
 
4
  import torch
5
  import numpy as np
6
  import faiss
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer
8
  from sentence_transformers import SentenceTransformer
9
  import fitz # PyMuPDF for PDF extraction
10
  import docx2txt # For DOCX extraction
 
13
  # ------------------------
14
  # Configuration
15
  # ------------------------
16
+ MODEL_NAME = "microsoft/phi-2" # Open-source model with good performance
17
+ EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2" # Smaller embedding model
18
  CHUNK_SIZE = 512
19
  CHUNK_OVERLAP = 64
20
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
25
  @st.cache_resource
26
  def load_models():
27
  try:
28
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
29
  model = AutoModelForCausalLM.from_pretrained(
30
  MODEL_NAME,
31
  device_map="auto" if DEVICE == "cuda" else None,
32
  torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
33
+ trust_remote_code=True
34
  )
35
  embedder = SentenceTransformer(EMBED_MODEL, device=DEVICE)
36
  return tokenizer, model, embedder
 
83
  # Summarization and Q&A Functions
84
  # ------------------------
85
  def generate_summary(text):
86
+ # Create prompt for Phi-2 model
87
+ prompt = f"Instruct: Summarize this book in a concise paragraph\nInput: {text[:3000]}\nOutput:"
88
  inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
89
  outputs = model.generate(
90
  **inputs,
 
94
  do_sample=True
95
  )
96
  summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
97
+ return summary.split("Output:")[-1].strip()
98
 
99
  def generate_answer(query, context):
100
+ # Create prompt for Phi-2 model
101
+ prompt = f"Instruct: Answer this question based on the context. If unsure, say 'I don't know'.\nQuestion: {query}\nContext: {context}\nOutput:"
102
  inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
103
  outputs = model.generate(
104
  **inputs,
 
109
  do_sample=True
110
  )
111
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
112
+ return answer.split("Output:")[-1].strip()
113
 
114
  # ------------------------
115
  # Streamlit UI