|
import requests |
|
import os |
|
import re |
|
|
|
from typing import List |
|
from utils import encode_image |
|
from PIL import Image |
|
from ollama import chat |
|
import torch |
|
import subprocess |
|
import psutil |
|
import torch |
|
from transformers import AutoModel, AutoTokenizer |
|
from google import genai |
|
|
|
|
|
class Rag: |
|
|
|
def _clean_raw_token_response(self, response_text): |
|
""" |
|
Clean raw token responses that contain undecoded token IDs |
|
This handles cases where models return raw tokens instead of decoded text |
|
""" |
|
if not response_text: |
|
return response_text |
|
|
|
|
|
token_patterns = [ |
|
r'<unused\d+>', |
|
r'<bos>', |
|
r'<eos>', |
|
r'<unk>', |
|
r'<mask>', |
|
r'<pad>', |
|
r'\[multimodal\]', |
|
] |
|
|
|
|
|
has_raw_tokens = any(re.search(pattern, response_text) for pattern in token_patterns) |
|
|
|
if has_raw_tokens: |
|
print("β οΈ Detected raw token response, attempting to clean...") |
|
|
|
|
|
cleaned_text = response_text |
|
|
|
|
|
cleaned_text = re.sub(r'<unused\d+>', '', cleaned_text) |
|
|
|
|
|
cleaned_text = re.sub(r'<(bos|eos|unk|mask|pad)>', '', cleaned_text) |
|
|
|
|
|
cleaned_text = re.sub(r'\[multimodal\]', '', cleaned_text) |
|
|
|
|
|
cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip() |
|
|
|
|
|
if len(cleaned_text.strip()) < 10: |
|
return "β **Model Response Error**: The model returned raw token IDs instead of decoded text. This may be due to model configuration issues. Please try:\n\n1. Restarting the Ollama server\n2. Using a different model\n3. Checking model compatibility with multimodal inputs" |
|
|
|
return cleaned_text |
|
|
|
return response_text |
|
|
|
def get_answer_from_gemini(self, query, imagePaths): |
|
|
|
|
|
print(f"Querying Gemini for query={query}, imagePaths={imagePaths}") |
|
|
|
try: |
|
genai.configure(api_key='AIzaSyCwRr9054tCuh2S8yGpwKFvOAxYMT4WNIs') |
|
model = genai.GenerativeModel('gemini-2.0-flash') |
|
|
|
images = [Image.open(path) for path in imagePaths] |
|
|
|
chat = model.start_chat() |
|
|
|
response = chat.send_message([*images, query]) |
|
|
|
answer = response.text |
|
|
|
print(answer) |
|
|
|
return answer |
|
|
|
except Exception as e: |
|
print(f"An error occurred while querying Gemini: {e}") |
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
def get_answer_from_openai(self, query, imagesPaths): |
|
|
|
import dotenv |
|
|
|
|
|
dotenv_file = dotenv.find_dotenv() |
|
dotenv.load_dotenv(dotenv_file) |
|
|
|
|
|
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
os.environ['OLLAMA_FLASH_ATTENTION'] = os.environ['flashattn'] |
|
if os.environ['ollama'] == "minicpm-v": |
|
os.environ['ollama'] = "minicpm-v:8b-2.6-q8_0" |
|
elif os.environ['ollama'] == "gemma3": |
|
os.environ['ollama'] = "gemma3:12b" |
|
|
|
os.environ['OLLAMA_KEEP_ALIVE'] = "5m" |
|
os.environ['OLLAMA_ORIGINS'] = "*" |
|
|
|
|
|
|
|
print(f"Querying OpenAI for query={query}, imagesPaths={imagesPaths}") |
|
|
|
try: |
|
|
|
|
|
enhanced_query = f""" |
|
Please provide a comprehensive and detailed answer to the following query. |
|
Use ALL available information from the provided document images to give a thorough response. |
|
|
|
Query: {query} |
|
|
|
CRITICAL INSTRUCTIONS: |
|
- You have been provided with {len(imagesPaths)} document page(s) |
|
- You MUST reference information from ALL {len(imagesPaths)} page(s) in your response |
|
- Do not skip any pages - each page contains relevant information |
|
- If you mention one page, you must also mention the others |
|
- Ensure your response reflects the complete information from all pages |
|
|
|
Instructions for detailed response: |
|
1. Provide extensive background information and context |
|
2. Include specific details, examples, and data points from ALL documents |
|
3. Explain concepts thoroughly with step-by-step breakdowns |
|
4. Provide comprehensive analysis rather than simple answers when requested |
|
5. Explicitly reference each page and what information it contributes |
|
6. Cross-reference information between pages when relevant |
|
7. Ensure no page is left unmentioned in your analysis |
|
|
|
SPECIAL INSTRUCTIONS FOR TABULAR DATA: |
|
- If the query requests a table, list, or structured data, organize your response in a clear, structured format |
|
- Use numbered lists, bullet points, or clear categories when appropriate |
|
- Include specific data points or comparisons when available |
|
- Structure information in a way that can be easily converted to a table format |
|
|
|
IMPORTANT: Respond with natural, human-readable text only. Do not include any special tokens, codes, or technical identifiers in your response. |
|
|
|
Make sure to acknowledge and use information from all {len(imagesPaths)} provided pages. |
|
""" |
|
|
|
|
|
current_model = os.environ['ollama'] |
|
|
|
|
|
if "gemma3" in current_model.lower(): |
|
|
|
model_options = { |
|
"num_predict": 1024, |
|
"stop": ["<eos>", "<|endoftext|>", "</s>", "<|im_end|>"], |
|
"top_k": 20, |
|
"top_p": 0.8, |
|
"repeat_penalty": 1.2, |
|
"seed": 42, |
|
"temperature": 0.7, |
|
} |
|
else: |
|
|
|
model_options = { |
|
"num_predict": 2048, |
|
"stop": ["<eos>", "<|endoftext|>", "</s>"], |
|
"top_k": 40, |
|
"top_p": 0.9, |
|
"repeat_penalty": 1.1, |
|
"seed": 42, |
|
} |
|
|
|
response = chat( |
|
model=current_model, |
|
messages=[ |
|
{ |
|
'role': 'user', |
|
'content': enhanced_query, |
|
'images': imagesPaths, |
|
"temperature":float(os.environ['temperature']), |
|
} |
|
], |
|
options=model_options |
|
) |
|
|
|
answer = response.message.content |
|
|
|
|
|
cleaned_answer = self._clean_raw_token_response(answer) |
|
|
|
|
|
if cleaned_answer and "β **Model Response Error**" in cleaned_answer: |
|
print(f"β οΈ Primary model {current_model} failed, trying fallback models...") |
|
|
|
|
|
fallback_models = [ |
|
"llama3.2-vision:latest", |
|
"llava:latest", |
|
"bakllava:latest", |
|
"llama3.2:latest" |
|
] |
|
|
|
for fallback_model in fallback_models: |
|
try: |
|
print(f"π Trying fallback model: {fallback_model}") |
|
response = chat( |
|
model=fallback_model, |
|
messages=[ |
|
{ |
|
'role': 'user', |
|
'content': enhanced_query, |
|
'images': imagesPaths, |
|
"temperature":float(os.environ['temperature']), |
|
} |
|
], |
|
options={ |
|
"num_predict": 2048, |
|
"stop": ["<eos>", "<|endoftext|>", "</s>"], |
|
"top_k": 40, |
|
"top_p": 0.9, |
|
"repeat_penalty": 1.1, |
|
"seed": 42, |
|
} |
|
) |
|
|
|
fallback_answer = response.message.content |
|
cleaned_fallback = self._clean_raw_token_response(fallback_answer) |
|
|
|
if cleaned_fallback and "β **Model Response Error**" not in cleaned_fallback: |
|
print(f"β
Fallback model {fallback_model} succeeded") |
|
return cleaned_fallback |
|
|
|
except Exception as fallback_error: |
|
print(f"β Fallback model {fallback_model} failed: {fallback_error}") |
|
continue |
|
|
|
|
|
return cleaned_answer |
|
|
|
print(f"Original response: {answer}") |
|
print(f"Cleaned response: {cleaned_answer}") |
|
|
|
return cleaned_answer |
|
|
|
except Exception as e: |
|
print(f"An error occurred while querying OpenAI: {e}") |
|
return None |
|
|
|
|
|
|
|
def __get_openai_api_payload(self, query:str, imagesPaths:List[str]): |
|
image_payload = [] |
|
|
|
for imagePath in imagesPaths: |
|
base64_image = encode_image(imagePath) |
|
image_payload.append({ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": f"data:image/jpeg;base64,{base64_image}" |
|
} |
|
}) |
|
|
|
payload = { |
|
"model": "Llama3.2-vision", |
|
"messages": [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{ |
|
"type": "text", |
|
"text": query |
|
}, |
|
*image_payload |
|
] |
|
} |
|
], |
|
"max_tokens": 1024 |
|
} |
|
|
|
return payload |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|