Spaces:

ceymox
/

Llama_funCall

Sleeping

App Files Files Community

ceymox commited on May 7

Commit

96e7af4

verified ·

1 Parent(s): 34917ad

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -75

app.py CHANGED Viewed

@@ -7,29 +7,26 @@ from googleapiclient.discovery import build
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 # Google Calendar API setup with Service Account
 SCOPES = ['https://www.googleapis.com/auth/calendar']
 # Calendar ID - use your calendar ID here
 CALENDAR_ID = os.getenv('CALENDAR_ID', '26f5856049fab3d6648a2f1dea57c70370de6bc1629a5182be1511b0e75d11d3@group.calendar.google.com')
-# Path to your service account key file
-SERVICE_ACCOUNT_FILE = os.getenv('SERVICE_ACCOUNT_FILE', 'service-account-key.json')
 # Load Llama 3.1 model
 MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
 def get_calendar_service():
     """Set up Google Calendar service using service account"""
-    # Load service account info from environment or file
-    if os.getenv('SERVICE_ACCOUNT_INFO'):
-        # For Spaces deployment, load from environment variable
-        service_account_info = json.loads(os.getenv('SERVICE_ACCOUNT_INFO'))
-        credentials = service_account.Credentials.from_service_account_info(
-            service_account_info, scopes=SCOPES)
-    else:
-        # For local development, load from file
-        credentials = service_account.Credentials.from_service_account_file(
-            SERVICE_ACCOUNT_FILE, scopes=SCOPES)
     service = build('calendar', 'v3', credentials=credentials)
     return service
@@ -106,26 +103,6 @@ def add_event_to_calendar(name, date, time_str, duration_minutes=60):
         print(f"Event details: {json.dumps(event, indent=2)}")
         raise
-# Load model on startup to avoid loading it for each request
-@gr.utils.memoize(utils=["torch"])
-def load_llama_model():
-    """Load the Llama 3.1 model"""
-    print("Loading Llama 3.1 model...")
-    # Spaces will handle the quantization, so we use default loading
-    # or you can adjust quantization based on available resources
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_ID,
-        torch_dtype=torch.bfloat16,
-        device_map="auto",
-        low_cpu_mem_usage=True,
-        use_cache=True
-    )
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-    return model, tokenizer
 def extract_function_call(text):
     """Extract function call parameters from Llama's response text"""
     # Look for JSON-like structure in the response
@@ -257,56 +234,64 @@ any links to view their appointment.
 IMPORTANT: Make sure to interpret times correctly. If a user says '2 PM' or just '2',
 this likely means 2:00 PM (14:00) in 24-hour format."""
-# Initialize model and tokenizer once at startup
-model, tokenizer = load_llama_model()
-# Create text generation pipeline
-llm_pipeline = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    return_full_text=True
-)
 # Create Gradio interface
-def create_interface():
-    # Initialize conversation history
-    conversation_history = [{"role": "system", "content": system_prompt}]
-    with gr.Blocks() as app:
-        gr.Markdown("# Indian Time Zone Appointment Booking with Llama 3.1")
-        gr.Markdown("Say something like 'Book an appointment for John on May 10th at 2pm'")
-        # Chat interface
-        chatbot = gr.Chatbot()
-        msg = gr.Textbox(placeholder="Type your message here...", label="Message")
-        clear = gr.Button("Clear Chat")
-        # State for conversation history
-        state = gr.State(conversation_history)
-        # Handle user input
-        def user_input(message, history, conv_history):
-            if message.strip() == "":
-                return "", history, conv_history
-            # Get response from Llama
-            response, updated_conv_history = process_with_llama(message, conv_history, llm_pipeline)
-            # Update chat display
-            history.append((message, response))
-            return "", history, updated_conv_history
-        # Connect components
-        msg.submit(user_input, [msg, chatbot, state], [msg, chatbot, state])
-        clear.click(lambda: ([], [{"role": "system", "content": system_prompt}]), None, [chatbot, state])
-    return app
-# Create and launch the app
-app = create_interface()
-# Launch for Spaces
 if __name__ == "__main__":
-    app.launch()

 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from huggingface_hub import login
+# Login to Hugging Face if token is provided (for accessing gated models)
+if os.getenv("HF_TOKEN"):
+    login(os.getenv("HF_TOKEN"))
 # Google Calendar API setup with Service Account
 SCOPES = ['https://www.googleapis.com/auth/calendar']
 # Calendar ID - use your calendar ID here
 CALENDAR_ID = os.getenv('CALENDAR_ID', '26f5856049fab3d6648a2f1dea57c70370de6bc1629a5182be1511b0e75d11d3@group.calendar.google.com')
 # Load Llama 3.1 model
 MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
 def get_calendar_service():
     """Set up Google Calendar service using service account"""
+    # Load service account info from environment
+    service_account_info = json.loads(os.getenv('SERVICE_ACCOUNT_INFO', '{}'))
+    credentials = service_account.Credentials.from_service_account_info(
+        service_account_info, scopes=SCOPES)
     service = build('calendar', 'v3', credentials=credentials)
     return service
         print(f"Event details: {json.dumps(event, indent=2)}")
         raise
 def extract_function_call(text):
     """Extract function call parameters from Llama's response text"""
     # Look for JSON-like structure in the response
 IMPORTANT: Make sure to interpret times correctly. If a user says '2 PM' or just '2',
 this likely means 2:00 PM (14:00) in 24-hour format."""
+# Initialize model and pipeline
+def load_model_and_pipeline():
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_ID,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        low_cpu_mem_usage=True
+    )
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+    # Create text generation pipeline
+    llm_pipeline = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        return_full_text=True,
+        max_new_tokens=1024
+    )
+    return llm_pipeline
+# Initialize conversation history with system prompt
+conversation_history = [{"role": "system", "content": system_prompt}]
+# Load model and pipeline at startup
+llm_pipe = load_model_and_pipeline()
 # Create Gradio interface
+with gr.Blocks(title="Calendar Booking Assistant") as demo:
+    gr.Markdown("# Indian Time Zone Appointment Booking with Llama 3.1")
+    gr.Markdown("Say something like 'Book an appointment for John on May 10th at 2pm'")
+    # Chat interface
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox(placeholder="Type your message here...", label="Message")
+    clear = gr.Button("Clear Chat")
+    # State for conversation history
+    state = gr.State(conversation_history)
+    # Handle user input
+    def user_input(message, history, conv_history):
+        if message.strip() == "":
+            return "", history, conv_history
+        # Get response from Llama
+        response, updated_conv_history = process_with_llama(message, conv_history, llm_pipe)
+        # Update chat display
+        history.append((message, response))
+        return "", history, updated_conv_history
+    # Connect components
+    msg.submit(user_input, [msg, chatbot, state], [msg, chatbot, state])
+    clear.click(lambda: ([], [{"role": "system", "content": system_prompt}]), None, [chatbot, state])
+# Launch the app
 if __name__ == "__main__":
+    demo.launch()