krishnadhulipalla commited on
Commit
2fa0d66
·
1 Parent(s): 81f334a

changed llm2 task

Browse files
Files changed (1) hide show
  1. app.py +31 -34
app.py CHANGED
@@ -79,6 +79,7 @@ vectorstore, all_chunks, all_texts, metadatas = initialize_resources()
79
 
80
  # LLMs
81
  repharser_llm = ChatNVIDIA(model="mistralai/mistral-7b-instruct-v0.3") | StrOutputParser()
 
82
  relevance_llm = ChatNVIDIA(model="meta/llama3-70b-instruct") | StrOutputParser()
83
  if not os.environ.get("OPENAI_API_KEY"):
84
  raise RuntimeError("OPENAI_API_KEY not found in environment!")
@@ -97,46 +98,39 @@ repharser_prompt = ChatPromptTemplate.from_template(
97
  )
98
 
99
  relevance_prompt = ChatPromptTemplate.from_template("""
100
- You are Krishna's personal AI assistant validator.
101
- Your job is to review a user's question and a list of retrieved document chunks.
102
- Identify which chunks (if any) directly help answer the question. Return **all relevant chunks**.
103
 
104
- ---
105
- ⚠️ Do NOT select chunks just because they include keywords or technical terms.
106
-
107
- Exclude chunks that:
108
- - Mention universities, CGPA, or education history (they show qualifications, not skills)
109
- - List certifications or course names (they show credentials, not skills used)
110
- - Describe goals, future plans, or job aspirations
111
- - Contain tools mentioned in passing without describing actual usage
112
 
113
- Only include chunks if they contain **evidence of specific knowledge, tools used, skills applied, or experience demonstrated.**
 
 
114
 
115
  ---
116
 
117
- 🔎 Examples:
 
 
 
118
 
119
- Q1: "What are Krishna's skills?"
120
- - Chunk A: Lists programming languages, ML tools, and projects → ✅
121
- - Chunk B: Talks about a Coursera certificate in ML → ❌
122
- - Chunk C: States a CGPA and master’s degree → ❌
123
- - Chunk D: Describes tools Krishna used in his work → ✅
124
 
125
  Output:
126
  {{
127
- "valid_chunks": [A, D],
128
- "is_out_of_scope": false,
129
- "justification": "Chunks A and D describe tools and skills Krishna has actually used."
130
  }}
131
 
132
- Q2: "What is Krishna's favorite color?"
133
- - All chunks are about technical work or academic history → ❌
134
 
135
  Output:
136
  {{
137
- "valid_chunks": [],
138
- "is_out_of_scope": true,
139
- "justification": "None of the chunks are related to the user's question about preferences or colors."
140
  }}
141
 
142
  ---
@@ -149,13 +143,15 @@ User Question:
149
  Chunks:
150
  {contents}
151
 
152
- Return only the JSON object. Think carefully before selecting any chunk.
153
  """)
154
 
 
155
  answer_prompt_relevant = ChatPromptTemplate.from_template(
156
  "You are Krishna's personal AI assistant. Your job is to answer the user’s question clearly and professionally using the provided context.\n"
157
  "Rather than copying sentences, synthesize relevant insights and explain them like a knowledgeable peer.\n\n"
158
  "Krishna's Background:\n{profile}\n\n"
 
159
  "Make your response rich and informative by:\n"
160
  "- Combining relevant facts from multiple parts of the context\n"
161
  "- Using natural, human-style language (not just bullet points)\n"
@@ -166,9 +162,10 @@ answer_prompt_relevant = ChatPromptTemplate.from_template(
166
  "Answer:"
167
  )
168
 
 
169
  answer_prompt_fallback = ChatPromptTemplate.from_template(
170
  "You are Krishna’s personal AI assistant. The user asked a question unrelated to Krishna’s background.\n"
171
- "Gently let the user know, and then pivot to something Krishna is actually involved in to keep the conversation helpful.\n\n"
172
  "Krishna's Background:\n{profile}\n\n"
173
  "User Question:\n{query}\n\n"
174
  "Your Answer:"
@@ -260,16 +257,15 @@ def hybrid_retrieve(inputs, exclude_terms=None):
260
 
261
  def safe_json_parse(s: str) -> Dict:
262
  try:
263
- if isinstance(s, str) and "valid_chunks" in s:
264
  return json.loads(s)
265
  except json.JSONDecodeError:
266
  pass
267
  return {
268
- "valid_chunks": [],
269
  "is_out_of_scope": True,
270
  "justification": "Fallback due to invalid or missing LLM output"
271
  }
272
-
273
  # Rewrite generation
274
  rephraser_chain = (
275
  repharser_prompt
@@ -299,15 +295,16 @@ extract_validation_inputs = RunnableLambda(lambda x: {
299
  validation_chain = (
300
  extract_validation_inputs
301
  | relevance_prompt
302
- | relevance_llm
303
  | RunnableLambda(safe_json_parse)
304
  )
305
 
306
  # Answer Generation
307
  def prepare_answer_inputs(x: Dict) -> Dict:
308
  context = KRISHNA_BIO if x["validation"]["is_out_of_scope"] else "\n\n".join(
309
- [x["chunks"][i-1]["content"] for i in x["validation"]["valid_chunks"]])
310
-
 
311
  return {
312
  "query": x["query"],
313
  "profile": KRISHNA_BIO,
 
79
 
80
  # LLMs
81
  repharser_llm = ChatNVIDIA(model="mistralai/mistral-7b-instruct-v0.3") | StrOutputParser()
82
+ instruct_llm = ChatNVIDIA(model="mistralai/mixtral-8x22b-instruct-v0.1") | StrOutputParser()
83
  relevance_llm = ChatNVIDIA(model="meta/llama3-70b-instruct") | StrOutputParser()
84
  if not os.environ.get("OPENAI_API_KEY"):
85
  raise RuntimeError("OPENAI_API_KEY not found in environment!")
 
98
  )
99
 
100
  relevance_prompt = ChatPromptTemplate.from_template("""
101
+ You are Krishna's personal AI assistant classifier.
 
 
102
 
103
+ Your job is to decide whether a user's question can be meaningfully answered using the provided document chunks.
 
 
 
 
 
 
 
104
 
105
+ Think carefully and return a JSON object with:
106
+ - "is_out_of_scope": true if none of the chunks contain information relevant to the question.
107
+ - "justification": a short sentence explaining your decision.
108
 
109
  ---
110
 
111
+ Rules:
112
+ - Chunks are snippets from Krishna’s resume, project history, and personal background.
113
+ - If none of the chunks contain evidence, examples, or details that directly help answer the question, mark it as out of scope.
114
+ - Do NOT rely on keyword matches. Use reasoning to decide whether the content actually addresses the question.
115
 
116
+ Examples:
117
+
118
+ Q: "What are Krishna's favorite movies?"
119
+ Chunks: Mostly about research, skills, and work experience.
 
120
 
121
  Output:
122
  {{
123
+ "is_out_of_scope": true,
124
+ "justification": "No chunk discusses Krishna's personal preferences like movies."
 
125
  }}
126
 
127
+ Q: "What ML tools has Krishna used in projects?"
128
+ Chunks: Mentions PyTorch, Kafka, Hugging Face, Spark.
129
 
130
  Output:
131
  {{
132
+ "is_out_of_scope": false,
133
+ "justification": "Chunks mention tools Krishna used directly in his work."
 
134
  }}
135
 
136
  ---
 
143
  Chunks:
144
  {contents}
145
 
146
+ Return only the JSON object.
147
  """)
148
 
149
+
150
  answer_prompt_relevant = ChatPromptTemplate.from_template(
151
  "You are Krishna's personal AI assistant. Your job is to answer the user’s question clearly and professionally using the provided context.\n"
152
  "Rather than copying sentences, synthesize relevant insights and explain them like a knowledgeable peer.\n\n"
153
  "Krishna's Background:\n{profile}\n\n"
154
+ "Note: The context might include some unrelated or noisy information. Focus only on content that directly supports your answer.\n\n"
155
  "Make your response rich and informative by:\n"
156
  "- Combining relevant facts from multiple parts of the context\n"
157
  "- Using natural, human-style language (not just bullet points)\n"
 
162
  "Answer:"
163
  )
164
 
165
+
166
  answer_prompt_fallback = ChatPromptTemplate.from_template(
167
  "You are Krishna’s personal AI assistant. The user asked a question unrelated to Krishna’s background.\n"
168
+ "Respond with a touch of humor, then guide the conversation back to Krishna’s actual skills, experiences, or projects.\n\n"
169
  "Krishna's Background:\n{profile}\n\n"
170
  "User Question:\n{query}\n\n"
171
  "Your Answer:"
 
257
 
258
  def safe_json_parse(s: str) -> Dict:
259
  try:
260
+ if isinstance(s, str) and "is_out_of_scope" in s:
261
  return json.loads(s)
262
  except json.JSONDecodeError:
263
  pass
264
  return {
 
265
  "is_out_of_scope": True,
266
  "justification": "Fallback due to invalid or missing LLM output"
267
  }
268
+
269
  # Rewrite generation
270
  rephraser_chain = (
271
  repharser_prompt
 
295
  validation_chain = (
296
  extract_validation_inputs
297
  | relevance_prompt
298
+ | instruct_llm
299
  | RunnableLambda(safe_json_parse)
300
  )
301
 
302
  # Answer Generation
303
  def prepare_answer_inputs(x: Dict) -> Dict:
304
  context = KRISHNA_BIO if x["validation"]["is_out_of_scope"] else "\n\n".join(
305
+ [chunk["content"] for chunk in x["chunks"]]
306
+ )
307
+
308
  return {
309
  "query": x["query"],
310
  "profile": KRISHNA_BIO,