Spaces:

CamiloVega
/

aQuaBot

Sleeping

App Files Files Community

CamiloVega commited on Oct 29, 2024

Commit

e0875e2

verified ·

1 Parent(s): 357d91e

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -24

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)
 # Define the model name
-model_name = "microsoft/phi-2"
 try:
     logger.info("Starting model initialization...")
@@ -32,19 +32,21 @@ try:
     logger.info("Loading tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained(
         model_name,
-        trust_remote_code=True
     )
     logger.info("Tokenizer loaded successfully")
-    # Load model
     logger.info("Loading model...")
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-        trust_remote_code=True
     )
-    if device == "cuda":
-        model = model.to(device)
     logger.info("Model loaded successfully")
     # Create pipeline
@@ -58,7 +60,7 @@ try:
         temperature=0.7,
         top_p=0.9,
         repetition_penalty=1.1,
-        device=0 if device == "cuda" else -1
     )
     logger.info("Pipeline created successfully")
@@ -72,6 +74,10 @@ You help users with any topic while raising awareness about water consumption
 in AI. Did you know that training GPT-3 consumed 5.4 million liters of water,
 equivalent to the daily consumption of a city of 10,000 people?"""
 # Constants for water consumption calculation
 WATER_PER_TOKEN = {
     "input_training": 0.0000309,
@@ -96,8 +102,19 @@ def calculate_water_consumption(text, is_input=True):
         return tokens * (WATER_PER_TOKEN["input_training"] + WATER_PER_TOKEN["input_inference"])
     return tokens * (WATER_PER_TOKEN["output_training"] + WATER_PER_TOKEN["output_inference"])
-def format_message(role, content):
-    return {"role": role, "content": content}
 @spaces.GPU(duration=60)
 @torch.inference_mode()
@@ -110,13 +127,8 @@ def generate_response(user_input, chat_history):
         input_water_consumption = calculate_water_consumption(user_input, True)
         total_water_consumption += input_water_consumption
-        # Create prompt
-        conversation_history = ""
-        if chat_history:
-            for message in chat_history:
-                conversation_history += f"User: {message[0]}\nAssistant: {message[1]}\n"
-        prompt = f"{system_message}\n\n{conversation_history}User: {user_input}\nAssistant:"
         logger.info("Generating model response...")
         outputs = model_gen(
@@ -133,7 +145,7 @@ def generate_response(user_input, chat_history):
         output_water_consumption = calculate_water_consumption(assistant_response, False)
         total_water_consumption += output_water_consumption
-        # Update chat history with the new formatted messages
         chat_history.append([user_input, assistant_response])
         # Prepare water consumption message
@@ -167,8 +179,8 @@ try:
             <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
                 <h1 style="color: #2d333a;">AQuaBot</h1>
                 <p style="color: #4a5568;">
-                    Welcome to AQuaBot - An AI assistant that helps raise awareness about water
-                    consumption in language models.
                 </p>
             </div>
         """)
@@ -193,7 +205,7 @@ try:
         """)
         clear = gr.Button("Clear Chat")
-        # Add footer with citation and disclaimer
         gr.HTML("""
             <div style="text-align: center; max-width: 800px; margin: 20px auto; padding: 20px;
                         background-color: #f8f9fa; border-radius: 10px;">
@@ -207,10 +219,15 @@ try:
                 </div>
                 <div style="border-top: 1px solid #ddd; padding-top: 15px;">
                     <p style="color: #666; font-size: 14px;">
-                        <strong>Important note:</strong> This application uses Microsoft's Phi-2 model
-                        instead of GPT-3 for availability and cost reasons. However,
-                        the water consumption calculations per token (input/output) are based on the
-                        conclusions from the cited paper.
                     </p>
                 </div>
             </div>

 logger = logging.getLogger(__name__)
 # Define the model name
+model_name = "huggingface-projects/llama-2-7b-chat"
 try:
     logger.info("Starting model initialization...")
     logger.info("Loading tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained(
         model_name,
+        trust_remote_code=True,
+        use_auth_token=True
     )
     logger.info("Tokenizer loaded successfully")
+    # Load model with 8-bit quantization
     logger.info("Loading model...")
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+        trust_remote_code=True,
+        use_auth_token=True,
+        load_in_8bit=True,
+        device_map="auto"
     )
     logger.info("Model loaded successfully")
     # Create pipeline
         temperature=0.7,
         top_p=0.9,
         repetition_penalty=1.1,
+        device_map="auto"
     )
     logger.info("Pipeline created successfully")
 in AI. Did you know that training GPT-3 consumed 5.4 million liters of water,
 equivalent to the daily consumption of a city of 10,000 people?"""
+# Llama 2 specific tokens
+B_INST, E_INST = "[INST]", "[/INST]"
+B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
 # Constants for water consumption calculation
 WATER_PER_TOKEN = {
     "input_training": 0.0000309,
         return tokens * (WATER_PER_TOKEN["input_training"] + WATER_PER_TOKEN["input_inference"])
     return tokens * (WATER_PER_TOKEN["output_training"] + WATER_PER_TOKEN["output_inference"])
+def format_prompt(user_input, chat_history):
+    """
+    Format the prompt according to Llama 2 specific style
+    """
+    prompt = f"{B_INST}{B_SYS}{system_message}{E_SYS}"
+    if chat_history:
+        for user_msg, assistant_msg in chat_history:
+            prompt += f"{user_msg}{E_INST}{assistant_msg}{B_INST}"
+    prompt += f"{user_input}{E_INST}"
+    return prompt
 @spaces.GPU(duration=60)
 @torch.inference_mode()
         input_water_consumption = calculate_water_consumption(user_input, True)
         total_water_consumption += input_water_consumption
+        # Format prompt for Llama 2
+        prompt = format_prompt(user_input, chat_history)
         logger.info("Generating model response...")
         outputs = model_gen(
         output_water_consumption = calculate_water_consumption(assistant_response, False)
         total_water_consumption += output_water_consumption
+        # Update chat history
         chat_history.append([user_input, assistant_response])
         # Prepare water consumption message
             <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
                 <h1 style="color: #2d333a;">AQuaBot</h1>
                 <p style="color: #4a5568;">
+                    Welcome to AQuaBot - An AI assistant powered by Llama 2 that helps raise awareness
+                    about water consumption in language models.
                 </p>
             </div>
         """)
         """)
         clear = gr.Button("Clear Chat")
+        # Add footer with citation, disclaimer, and credits
         gr.HTML("""
             <div style="text-align: center; max-width: 800px; margin: 20px auto; padding: 20px;
                         background-color: #f8f9fa; border-radius: 10px;">
                 </div>
                 <div style="border-top: 1px solid #ddd; padding-top: 15px;">
                     <p style="color: #666; font-size: 14px;">
+                        <strong>Model Information:</strong> This application uses Meta's Llama 2 (7B) model,
+                        a state-of-the-art language model fine-tuned for chat interactions. Water consumption
+                        calculations are based on the methodology from the cited paper.
+                    </p>
+                </div>
+                <div style="border-top: 1px solid #ddd; margin-top: 15px; padding-top: 15px;">
+                    <p style="color: #666; font-size: 14px;">
+                        Created by Camilo Vega - AI Consultant<br>
+                        <a href="https://github.com/vegadevs/aquabot" target="_blank">GitHub Repository</a>
                     </p>
                 </div>
             </div>