Spaces:
Running
Running
File size: 3,471 Bytes
ed83e84 fced55b ed83e84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import os
from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
from typing import Literal
# Load environment variables
load_dotenv()
# Initialize LLMs
def initialize_llms():
"""Initialize and return the LLM instances"""
groq_api_key = os.getenv("GROQ_API_KEY")
return {
"llm": ChatGroq(
temperature=0.1, model="llama-3.3-70b-versatile", api_key=groq_api_key
),
"step_back_llm": ChatGroq(
temperature=0, model="Gemma2-9B-IT", api_key=groq_api_key
),
}
# Query refinement
def refine_query(query: str, llm: ChatGroq) -> str:
"""Enhance pediatric medicine queries for better retrieval while preserving clinical intent"""
template = """
You are a medical language expert. Your task is to improve the following user question by:
- Correcting any grammatical or spelling errors
- Clarifying vague or ambiguous wording
- Improving sentence structure for readability and precision
- Maintaining the original meaning and clinical focus
Do not add new information. Do not expand abbreviations unless they are unclear. Do not include any commentary or explanation.
Original query: {original_query}
Improved medical question:
"""
prompt = PromptTemplate(input_variables=["original_query"], template=template)
chain = prompt | llm
return chain.invoke({"original_query": query}).content
def query_to_retrieve(query, llm):
"""Convert a query to a format suitable for retrieval"""
template = """
You are an expert in pediatric medical information retrieval.
Your task is to rewrite the following question into a single, concise sentence containing only the most relevant medical and pediatric concepts. This sentence will be used for semantic search in a vector database.
Instructions:
- Include only the core clinical focus (conditions, symptoms, treatments, procedures).
- Mention pediatric-specific entities if relevant (e.g., age group, child-specific medication).
- Remove all conversational language and filler.
- Preserve the original intent.
- Output only one clean, search-optimized sentence.
Original query: {original_query}
Search-ready query:
"""
prompt = PromptTemplate(input_variables=["original_query"], template=template)
chain = prompt | llm
return chain.invoke({"original_query": query}).content
def answer_query_with_chunks(
query: str,
retrieved_docs,
llm: ChatGroq,
) -> str:
try:
# Embed query using the same embedding function
query_improved = refine_query(query, llm)
if not retrieved_docs:
return "Sorry, no relevant medical information was found."
# Construct context for the LLM
context = "\n\n".join(retrieved_docs)
system_prompt = """
You are a pediatric medical assistant.
Based only on the provided context, answer the user's question concisely and accurately with explanation.
If the answer is not present in the context, say: "The answer is not available in the current documents."
Context:
{context}
User question:
{query}
Answer:
"""
prompt = PromptTemplate(
input_variables=["context", "query"],
template=system_prompt,
)
chain = prompt | llm
return chain.invoke({"context": context, "query": query_improved}).content
except Exception as e:
return f"An error occurred while answering the query: {str(e)}"
|