import os import gradio as gr import requests import hashlib from functools import lru_cache from docx import Document import PyPDF2 import textract # Check if the API key exists in environment variables GROQ_API_KEY = os.environ.get("GROQ_API_KEY") if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY not found in environment variables. Please add it to Hugging Face Secrets.") # Function to generate hash of data directory contents def get_data_hash(): data_dir = "data" if not os.path.exists(data_dir): return "" hasher = hashlib.sha256() try: for root, dirs, files in os.walk(data_dir): for file in sorted(files): filepath = os.path.join(root, file) if os.path.isfile(filepath): with open(filepath, 'rb') as f: hasher.update(f.read()) return hasher.hexdigest() except Exception as e: print(f"Error hashing files: {e}") return "" # Cache business info processing with hash-based invalidation @lru_cache(maxsize=1) def read_business_info(data_hash): business_info = [] data_dir = "data" if not os.path.exists(data_dir): return "Data directory not found. Please upload a 'data' folder with relevant files." supported_extensions = ['.txt', '.pdf', '.docx', '.doc'] for filename in os.listdir(data_dir): filepath = os.path.join(data_dir, filename) ext = os.path.splitext(filename)[1].lower() if ext not in supported_extensions: continue # Skip unsupported files try: if ext == '.txt': with open(filepath, 'r', encoding='utf-8') as f: business_info.append(f.read()) elif ext == '.pdf': with open(filepath, 'rb') as f: reader = PyPDF2.PdfReader(f) text = '\n'.join([page.extract_text() for page in reader.pages]) business_info.append(text) elif ext == '.docx': doc = Document(filepath) text = '\n'.join([para.text for para in doc.paragraphs]) business_info.append(text) elif ext == '.doc': text = textract.process(filepath).decode('utf-8') business_info.append(text) except Exception as e: business_info.append(f"Error reading {filename}: {str(e)}") if not business_info: return "No valid files found in the data directory." return '\n\n'.join(business_info) # Function to generate response using Groq's LLaMA 3 70B API def generate_response(message, chat_history): current_hash = get_data_hash() business_info = read_business_info(current_hash) # Create system prompt including business information system_prompt = f"""You are a helpful business assistant that answers questions about a specific business. Business Information: {business_info} Answer ONLY using information from the business information above. If the question cannot be answered using the provided business information, respond with "Yeh information abhi available nahi hai." You can respond in Hindi-English mix (Hinglish) if the user asks in that format. Be concise and helpful.""" # Prepare conversation history for the API messages = [{"role": "system", "content": system_prompt}] # Add conversation history for user_msg, assistant_msg in chat_history: messages.append({"role": "user", "content": user_msg}) if assistant_msg: # Only add if not None messages.append({"role": "assistant", "content": assistant_msg}) # Add the current message messages.append({"role": "user", "content": message}) # Make API call to Groq try: response = requests.post( "https://api.groq.com/openai/v1/chat/completions", headers={ "Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json" }, json={ "model": "llama3-70b-8192", "messages": messages, "temperature": 0.7, "max_tokens": 500 }, timeout=60 ) if response.status_code == 200: return response.json()["choices"][0]["message"]["content"] else: return f"Error: {response.status_code} - {response.text}" except Exception as e: return f"An error occurred: {str(e)}" # Create a simple Gradio chat interface def respond(message, history): response = generate_response(message, history) return response demo = gr.ChatInterface( fn=respond, title="🌿 My Business Bot", description="Ask anything about your business in Hindi-English", theme=gr.themes.Soft(), examples=["What are your business hours?", "कीमत क्या है?", "Tell me about your products", "Return policy kya hai?"], cache_examples=False # Disable Gradio's example caching ) # Launch the app if __name__ == "__main__": demo.launch()