File size: 5,166 Bytes
722e6c7
d73f321
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f001211
d73f321
 
 
 
 
 
 
 
f001211
d73f321
f001211
d73f321
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
import gradio as gr
import requests
import hashlib
from functools import lru_cache
from docx import Document
import PyPDF2
import textract

# Check if the API key exists in environment variables
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
if not GROQ_API_KEY:
    raise ValueError("GROQ_API_KEY not found in environment variables. Please add it to Hugging Face Secrets.")

# Function to generate hash of data directory contents
def get_data_hash():
    data_dir = "data"
    if not os.path.exists(data_dir):
        return ""
    hasher = hashlib.sha256()
    try:
        for root, dirs, files in os.walk(data_dir):
            for file in sorted(files):
                filepath = os.path.join(root, file)
                if os.path.isfile(filepath):
                    with open(filepath, 'rb') as f:
                        hasher.update(f.read())
        return hasher.hexdigest()
    except Exception as e:
        print(f"Error hashing files: {e}")
        return ""

# Cache business info processing with hash-based invalidation
@lru_cache(maxsize=1)
def read_business_info(data_hash):
    business_info = []
    data_dir = "data"
    
    if not os.path.exists(data_dir):
        return "Data directory not found. Please upload a 'data' folder with relevant files."
    
    supported_extensions = ['.txt', '.pdf', '.docx', '.doc']
    
    for filename in os.listdir(data_dir):
        filepath = os.path.join(data_dir, filename)
        ext = os.path.splitext(filename)[1].lower()
        
        if ext not in supported_extensions:
            continue  # Skip unsupported files
        
        try:
            if ext == '.txt':
                with open(filepath, 'r', encoding='utf-8') as f:
                    business_info.append(f.read())
            elif ext == '.pdf':
                with open(filepath, 'rb') as f:
                    reader = PyPDF2.PdfReader(f)
                    text = '\n'.join([page.extract_text() for page in reader.pages])
                    business_info.append(text)
            elif ext == '.docx':
                doc = Document(filepath)
                text = '\n'.join([para.text for para in doc.paragraphs])
                business_info.append(text)
            elif ext == '.doc':
                text = textract.process(filepath).decode('utf-8')
                business_info.append(text)
        except Exception as e:
            business_info.append(f"Error reading {filename}: {str(e)}")
    
    if not business_info:
        return "No valid files found in the data directory."
    
    return '\n\n'.join(business_info)

# Function to generate response using Groq's LLaMA 3 70B API
def generate_response(message, chat_history):
    current_hash = get_data_hash()
    business_info = read_business_info(current_hash)
    
    # Create system prompt including business information
    system_prompt = f"""You are a helpful business assistant that answers questions about a specific business.
    
Business Information:
{business_info}
Answer ONLY using information from the business information above. If the question cannot be answered using the provided business information, respond with "Yeh information abhi available nahi hai."
You can respond in Hindi-English mix (Hinglish) if the user asks in that format. Be concise and helpful."""

    # Prepare conversation history for the API
    messages = [{"role": "system", "content": system_prompt}]
    
    # Add conversation history
    for user_msg, assistant_msg in chat_history:
        messages.append({"role": "user", "content": user_msg})
        if assistant_msg:  # Only add if not None
            messages.append({"role": "assistant", "content": assistant_msg})
    
    # Add the current message
    messages.append({"role": "user", "content": message})
    
    # Make API call to Groq
    try:
        response = requests.post(
            "https://api.groq.com/openai/v1/chat/completions",
            headers={
                "Authorization": f"Bearer {GROQ_API_KEY}",
                "Content-Type": "application/json"
            },
            json={
                "model": "llama3-70b-8192",
                "messages": messages,
                "temperature": 0.7,
                "max_tokens": 500
            },
            timeout=60
        )
        
        if response.status_code == 200:
            return response.json()["choices"][0]["message"]["content"]
        else:
            return f"Error: {response.status_code} - {response.text}"
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Create a simple Gradio chat interface
def respond(message, history):
    response = generate_response(message, history)
    return response

demo = gr.ChatInterface(
    fn=respond,
    title="🌿 My Business Bot",
    description="Ask anything about your business in Hindi-English",
    theme=gr.themes.Soft(),
    examples=["What are your business hours?", "कीमत क्या है?", "Tell me about your products", "Return policy kya hai?"],
    cache_examples=False  # Disable Gradio's example caching
)

# Launch the app
if __name__ == "__main__":
    demo.launch()