random2222 commited on
Commit
42d3ee2
·
verified ·
1 Parent(s): 0c8abd3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -90
app.py CHANGED
@@ -1,118 +1,106 @@
1
- import gradio as gr
2
  import os
 
3
  import torch
4
- from langchain_community.document_loaders import PyPDFLoader
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_community.vectorstores import FAISS
8
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
 
9
 
10
  # Configuration
11
- DOCS_DIR = "business_docs"
12
- EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
13
  MODEL_NAME = "microsoft/phi-2"
 
 
 
14
 
15
- def initialize_system():
16
- # Document processing
17
- if not os.path.exists(DOCS_DIR):
18
- raise FileNotFoundError(f"Missing {DOCS_DIR} folder")
19
-
20
- pdf_files = [os.path.join(DOCS_DIR, f)
21
- for f in os.listdir(DOCS_DIR)
22
- if f.endswith(".pdf")]
 
 
 
 
 
 
 
 
 
 
23
 
24
  text_splitter = RecursiveCharacterTextSplitter(
25
- chunk_size=1000,
26
- chunk_overlap=200
 
27
  )
28
-
29
- texts = []
30
- for pdf in pdf_files:
31
- loader = PyPDFLoader(pdf)
32
- pages = loader.load_and_split(text_splitter)
33
- texts.extend(pages)
34
 
35
- # Create embeddings
36
- embeddings = HuggingFaceEmbeddings(
37
- model_name=EMBEDDING_MODEL,
38
- model_kwargs={'device': 'cpu'}
39
- )
40
-
41
- # Vector store
42
- vector_store = FAISS.from_documents(texts, embeddings)
43
 
44
- # Load model and tokenizer
45
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
46
- tokenizer.pad_token = tokenizer.eos_token # Fix padding issue
47
-
48
  model = AutoModelForCausalLM.from_pretrained(
49
  MODEL_NAME,
 
 
50
  trust_remote_code=True,
51
- torch_dtype=torch.float32 if not torch.cuda.is_available() else torch.float16,
52
  device_map="auto",
53
  low_cpu_mem_usage=True
54
  )
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- return vector_store, model, tokenizer
 
 
 
 
 
57
 
58
- try:
59
- vector_store, model, tokenizer = initialize_system()
60
- print(" System initialized successfully")
61
- if torch.cuda.is_available():
62
- print("🚀 Using CUDA")
63
- print(f"Memory usage: {torch.cuda.memory_allocated()/1024**3:.2f} GB")
64
- else:
65
- print("🧠 Using CPU")
66
- except Exception as e:
67
- print(f"❌ Initialization failed: {str(e)}")
68
- raise
69
 
70
- def generate_response(query):
71
  try:
72
- # Context retrieval
73
- docs = vector_store.similarity_search(query, k=3)
74
- context = "\n".join([d.page_content for d in docs])
75
-
76
- # Prompt template optimized for Phi-2
77
- prompt = f"""Context:
78
- {context}
79
-
80
- Question: {query}
81
- Answer:"""
82
-
83
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
84
- outputs = model.generate(
85
- inputs.input_ids,
86
- max_new_tokens=300,
87
- temperature=0.3,
88
- do_sample=True,
89
- pad_token_id=tokenizer.eos_token_id
90
- )
91
-
92
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
93
- return response.split("Answer:")[-1].strip()
94
-
95
  except Exception as e:
96
- return "Sorry, an error occurred while generating a response."
97
-
98
- # Gradio UI
99
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
100
- gr.Markdown("# 🧠 Enterprise Customer Support Chatbot")
101
-
102
- chatbot = gr.Chatbot(height=500, label="Conversation")
103
-
104
- with gr.Row():
105
- msg = gr.Textbox(placeholder="Ask about our services...", scale=7)
106
- submit_btn = gr.Button("Send", variant="primary", scale=1)
107
 
 
 
 
 
108
  clear = gr.ClearButton([msg, chatbot])
109
 
110
- def respond(message, history):
111
- response = generate_response(message)
112
- history.append((message, response))
113
- return "", history
114
-
115
- submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
116
- msg.submit(respond, [msg, chatbot], [msg, chatbot])
117
 
118
- demo.launch(server_port=7860)
 
 
 
1
  import os
2
+ import gradio as gr
3
  import torch
4
+ from huggingface_hub import login
5
+ from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
6
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
7
  from langchain_community.vectorstores import FAISS
8
+ from langchain_community.embeddings import HuggingFaceEmbeddings
9
+ from langchain.chains import RetrievalQA
10
+ from langchain_community.llms import HuggingFacePipeline
11
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
12
+
13
+ # HF Authentication
14
+ login(token=os.environ.get('HF_TOKEN'))
15
 
16
  # Configuration
17
+ DOCS_DIR = "study_materials"
 
18
  MODEL_NAME = "microsoft/phi-2"
19
+ EMBEDDINGS_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
20
+ MAX_TOKENS = 300
21
+ CHUNK_SIZE = 1000
22
 
23
+ def load_documents():
24
+ documents = []
25
+ for filename in os.listdir(DOCS_DIR):
26
+ path = os.path.join(DOCS_DIR, filename)
27
+ try:
28
+ if filename.endswith(".pdf"):
29
+ documents.extend(PyMuPDFLoader(path).load())
30
+ elif filename.endswith(".txt"):
31
+ documents.extend(TextLoader(path).load())
32
+ except Exception as e:
33
+ print(f"Error loading {filename}: {str(e)}")
34
+ return documents
35
+
36
+ def create_qa_system():
37
+ # Load and split documents
38
+ documents = load_documents()
39
+ if not documents:
40
+ raise gr.Error("No documents found in 'study_materials' folder")
41
 
42
  text_splitter = RecursiveCharacterTextSplitter(
43
+ chunk_size=CHUNK_SIZE,
44
+ chunk_overlap=200,
45
+ separators=["\n\n", "\n", " "]
46
  )
47
+ texts = text_splitter.split_documents(documents)
 
 
 
 
 
48
 
49
+ # Create vector store
50
+ embeddings = HuggingFaceEmbeddings(model_name=EMBEDDINGS_MODEL)
51
+ db = FAISS.from_documents(texts, embeddings)
 
 
 
 
 
52
 
53
+ # Load Phi-2 with authentication
54
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
 
 
55
  model = AutoModelForCausalLM.from_pretrained(
56
  MODEL_NAME,
57
+ use_auth_token=True, # Critical change for gated models
58
+ torch_dtype=torch.float32,
59
  trust_remote_code=True,
 
60
  device_map="auto",
61
  low_cpu_mem_usage=True
62
  )
63
+
64
+ pipe = pipeline(
65
+ "text-generation",
66
+ model=model,
67
+ tokenizer=tokenizer,
68
+ max_new_tokens=MAX_TOKENS,
69
+ temperature=0.7,
70
+ do_sample=True,
71
+ top_k=40,
72
+ device_map="auto"
73
+ )
74
 
75
+ return RetrievalQA.from_chain_type(
76
+ llm=HuggingFacePipeline(pipeline=pipe),
77
+ chain_type="stuff",
78
+ retriever=db.as_retriever(search_kwargs={"k": 2}),
79
+ return_source_documents=True
80
+ )
81
 
82
+ def format_response(response):
83
+ answer = response["result"].split("</s>")[0].split("\nOutput:")[-1].strip()
84
+ sources = list({os.path.basename(doc.metadata["source"]) for doc in response["source_documents"]})
85
+ return f"{answer}\n\n📚 Sources: {', '.join(sources)}"
 
 
 
 
 
 
 
86
 
87
+ def process_query(question, history):
88
  try:
89
+ qa = create_qa_system()
90
+ formatted_q = f"Instruct: {question}\nOutput:"
91
+ response = qa.invoke({"query": formatted_q})
92
+ return format_response(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  except Exception as e:
94
+ print(f"Error: {str(e)}")
95
+ return f"⚠️ Error: {str(e)[:100]}"
 
 
 
 
 
 
 
 
 
96
 
97
+ with gr.Blocks(title="Phi-2 Study Assistant", theme=gr.themes.Soft()) as app:
98
+ gr.Markdown("## 📚 Phi-2 Study Assistant\nUpload study materials to 'study_materials' and ask questions!")
99
+ chatbot = gr.Chatbot(height=400)
100
+ msg = gr.Textbox(label="Your Question")
101
  clear = gr.ClearButton([msg, chatbot])
102
 
103
+ msg.submit(process_query, [msg, chatbot], [msg, chatbot])
 
 
 
 
 
 
104
 
105
+ if __name__ == "__main__":
106
+ app.launch(server_name="0.0.0.0", server_port=7860)