QWEN-2.5-Coder-7B

Sleeping

App Files Files Community

Leri777 commited on Oct 8, 2024

Commit

a800c44

verified ·

1 Parent(s): 15fd008

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -17

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import os
 import json
 import subprocess
 from threading import Thread
 import torch
 import spaces
@@ -10,21 +12,29 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 MODEL_ID = "Qwen/Qwen2.5-Coder-7B-Instruct"
 CHAT_TEMPLATE = "ChatML"
 MODEL_NAME = MODEL_ID.split("/")[-1]
 CONTEXT_LENGTH = 16000
-# Estableciendo valores directamente para las variables
-COLOR = "blue"  # Color predeterminado de la interfaz
-EMOJI = "🤖"  # Emoji predeterminado para el modelo
-DESCRIPTION = f"This is the {MODEL_NAME} model designed for coding assistance and general AI tasks."  # Descripción predeterminada
 @spaces.GPU()
 def predict(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
-    # Format history with a given chat template
     if CHAT_TEMPLATE == "Auto":
         stop_tokens = [tokenizer.eos_token_id]
         instruction = system_prompt + "\n\n"
@@ -69,14 +79,17 @@ def predict(message, history, system_prompt, temperature, max_new_tokens, top_k,
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     outputs = []
-    for new_token in streamer:
-        outputs.append(new_token)
-        if new_token in stop_tokens:
-            break
-        yield "".join(outputs)
-# Load model
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,
@@ -90,12 +103,13 @@ model = AutoModelForCausalLM.from_pretrained(
     attn_implementation="flash_attention_2",
 )
-# Create Gradio interface
 gr.ChatInterface(
     predict,
     title=EMOJI + " " + MODEL_NAME,
     description=DESCRIPTION,
-examples=[
        ["Can you solve the equation 2x + 3 = 11 for x in Python?"],
        ["Write a Java program that checks if a number is even or odd."],
        ["How can I reverse a string in JavaScript?"],
@@ -117,4 +131,6 @@ examples=[
         gr.Slider(0, 1, 0.95, label="Top P sampling"),
     ],
     theme=gr.themes.Soft(primary_hue=COLOR),
-).queue().launch()

 import json
 import subprocess
 from threading import Thread
+import logging
+from logging.handlers import RotatingFileHandler
 import torch
 import spaces
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+log_file = '/tmp/app_debug.log'
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+file_handler = RotatingFileHandler(log_file, maxBytes=10*1024*1024, backupCount=5)
+file_handler.setLevel(logging.DEBUG)
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+file_handler.setFormatter(formatter)
+logger.addHandler(file_handler)
+logger.debug("Application started")
 MODEL_ID = "Qwen/Qwen2.5-Coder-7B-Instruct"
 CHAT_TEMPLATE = "ChatML"
 MODEL_NAME = MODEL_ID.split("/")[-1]
 CONTEXT_LENGTH = 16000
+COLOR = "blue"
+EMOJI = "🤖"
+DESCRIPTION = f"This is the {MODEL_NAME} model designed for coding assistance and general AI tasks."
 @spaces.GPU()
 def predict(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
+    logger.debug(f"Received prediction request: message='{message}', system_prompt='{system_prompt}'")
     if CHAT_TEMPLATE == "Auto":
         stop_tokens = [tokenizer.eos_token_id]
         instruction = system_prompt + "\n\n"
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     outputs = []
+    try:
+        for new_token in streamer:
+            outputs.append(new_token)
+            if new_token in stop_tokens:
+                break
+            yield "".join(outputs)
+        logger.debug(f"Prediction completed successfully for message: '{message}'")
+    except Exception as e:
+        logger.exception(f"Error during prediction for message '{message}': {str(e)}")
+        yield "An error occurred during processing."
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,
     attn_implementation="flash_attention_2",
 )
+logger.debug("Model and tokenizer loaded successfully")
 gr.ChatInterface(
     predict,
     title=EMOJI + " " + MODEL_NAME,
     description=DESCRIPTION,
+    examples=[
        ["Can you solve the equation 2x + 3 = 11 for x in Python?"],
        ["Write a Java program that checks if a number is even or odd."],
        ["How can I reverse a string in JavaScript?"],
         gr.Slider(0, 1, 0.95, label="Top P sampling"),
     ],
     theme=gr.themes.Soft(primary_hue=COLOR),
+).queue().launch()
+logger.debug("Chat interface initialized and launched")