Spaces:
Sleeping
Sleeping
# utils/helpers.py | |
""" | |
Helper utility functions | |
""" | |
import json | |
import os | |
import random | |
from datetime import datetime | |
from zoneinfo import ZoneInfo | |
from langchain_community.document_loaders import PyPDFDirectoryLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
from langchain_community.vectorstores import FAISS | |
from dotenv import load_dotenv | |
load_dotenv() | |
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY') | |
def create_vector_store(): | |
""" | |
Checks if a vector store index exists. If not, it creates one from | |
the PDFs in the knowledge_base folder. | |
""" | |
persist_directory = '/tmp/faiss_index' | |
if os.path.exists(persist_directory): | |
print("--- Knowledge base (FAISS index) already exists. Loading... ---") | |
return | |
# Check if there are files to process | |
if not os.path.exists("./knowledge_base") or not os.listdir("./knowledge_base"): | |
print("--- 'knowledge_base' folder is empty or does not exist. Skipping index creation. ---") | |
return | |
print("--- Creating new knowledge base... ---") | |
loader = PyPDFDirectoryLoader("./knowledge_base/") | |
documents = loader.load() | |
if not documents: | |
print("--- No documents could be loaded. Skipping index creation. ---") | |
return | |
print(f"--- Loaded {len(documents)} document(s). Splitting text... ---") | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
docs = text_splitter.split_documents(documents) | |
print(f"--- Creating embeddings and vector store. This may take a moment... ---") | |
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GEMINI_API_KEY) | |
db = FAISS.from_documents(docs, embeddings) | |
db.save_local(persist_directory) | |
print("--- Knowledge base created successfully. ---") | |
def load_quotes(): | |
"""Load inspirational quotes from Gita/Vedas""" | |
quotes_file = 'data/quotes.json' | |
default_quotes = [ | |
"विद्या ददाति विनयं - Knowledge gives humility", | |
"योग: कर्मसु कौशलम् - Yoga is skill in action", | |
"श्रेयान्स्वधर्मो विगुण: - Better is one's own dharma though imperfectly performed", | |
"कर्मण्येवाधिकारस्ते - You have the right to perform action", | |
"विद्या धनं सर्व धन प्रधानम् - Knowledge is the supreme wealth", | |
"सत्यमेव जयते - Truth alone triumphs", | |
"तमसो मा ज्योतिर्गमय - Lead me from darkness to light", | |
"अहिंसा परमो धर्म: - Non-violence is the supreme virtue" | |
] | |
if not os.path.exists(quotes_file): | |
os.makedirs('data', exist_ok=True) | |
with open(quotes_file, 'w', encoding='utf-8') as f: | |
json.dump(default_quotes, f, indent=2, ensure_ascii=False) | |
return default_quotes | |
try: | |
with open(quotes_file, 'r', encoding='utf-8') as f: | |
return json.load(f) | |
except: | |
return default_quotes | |
def get_greeting(): | |
""" | |
Returns a time-of-day appropriate greeting in English and Hindi, | |
specifically for the Indian Standard Time (IST) timezone. | |
""" | |
# Define the Indian Standard Time timezone | |
ist_timezone = ZoneInfo("Asia/Kolkata") | |
# Get the current time in the IST timezone | |
current_time_ist = datetime.now(ist_timezone) | |
current_hour = current_time_ist.hour | |
if 5 <= current_hour < 12: | |
return "☀️ सुप्रभात (Good Morning)! Ready to start the day?" | |
elif 12 <= current_hour < 17: | |
return "☀️ नमस्कार (Good Afternoon)! Time for a study session?" | |
elif 17 <= current_hour < 21: | |
return "🌇 शुभ संध्या (Good Evening)! Wrapping up your studies?" | |
else: | |
return "🌙 शुभ रात्रि (Good Night)! Late night study session?" | |
def format_indian_text(text, add_emojis=True): | |
"""Format text with Indian cultural elements""" | |
if add_emojis: | |
# Add relevant emojis based on content | |
if any(word in text.lower() for word in ['drug', 'medicine', 'pharmaceutical']): | |
text = f"💊 {text}" | |
elif any(word in text.lower() for word in ['study', 'learn', 'education']): | |
text = f"📚 {text}" | |
elif any(word in text.lower() for word in ['quiz', 'test', 'exam']): | |
text = f"❓ {text}" | |
elif any(word in text.lower() for word in ['memory', 'remember', 'mnemonic']): | |
text = f"🧠 {text}" | |
return text |