Spaces:

Priyanshukr-1
/

openhermes_mistral_API

Sleeping

App Files Files Community

Priyanshukr-1 commited on 28 days ago

Commit

9ee58dc

verified ·

1 Parent(s): be3fe73

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -19

app.py CHANGED Viewed

@@ -154,45 +154,52 @@ async def generate(request: Request):
     Generates a response from the LLM without retaining chat context.
     Expects a JSON body with 'prompt'.
     """
-    logger.info("➡️ /generate endpoint received a request.") # Log at the very beginning
     data = await request.json()
-    prompt = data.get("prompt", "").strip()
-    if not prompt:
         logger.warning("Prompt cannot be empty in /generate request.")
         return {"error": "Prompt cannot be empty"}, 400
     # Define the system prompt - sent with every request
     system_prompt_content = (
-        "You are a highly efficient and objective data analysis API. "
-        "Your sole function is to process the provided data and instructions, then output ONLY the requested analysis in the specified format. "
-        "**Crucially, do NOT include any conversational text, greetings, introductions (e.g., 'Here is the report', 'Below is the analysis'), conclusions, or any remarks about being an AI.** "
-        "Respond directly with the content. "
-        "Adhere strictly to all formatting requirements given in the user's prompt (e.g., 'summary:{}', 'introduction:{}', numbered lists, bullet points). "
-        "Focus exclusively on data insights, statistics, trends, influencing factors, and actionable recommendations. "
-        "Be concise, professional, and factual. "
-        "If a request cannot be fulfilled due to data limitations or model capabilities, respond with: 'STATUS: FAILED_ANALYSIS; REASON: Unable to process this specific analytical request due to limitations.' No other text should be included."
     )
     # Construct messages for the current request only
     messages_for_llm = [
         {"role": "system", "content": system_prompt_content},
-        {"role": "user", "content": prompt}
     ]
     # Calculate tokens in the user's prompt
-    prompt_tokens = count_tokens_in_text(prompt)
-    logger.info(f"🧾 Prompt received: {prompt}")
     logger.info(f"Tokens in prompt: {prompt_tokens}")
     try:
         response = llm.create_chat_completion(
             messages=messages_for_llm,
-            max_tokens=800,  # Keep response length short for maximum speed
-            temperature=0.7, # Adjust temperature for creativity vs. coherence (0.0-1.0)
-            stop=["</s>"] # Stop sequence for TinyLlama Chat
         )
         ai_response_content = response["choices"][0]["message"]["content"].strip()
@@ -201,9 +208,9 @@ async def generate(request: Request):
         logger.info("✅ Response generated successfully.")
         return {
             "response": ai_response_content,
-            "prompt_tokens": prompt_tokens, # Return tokens in the prompt
             "response_token_count": response_token_count
         }
     except Exception as e:
-        logger.error(f"❌ Error during generation: {e}", exc_info=True) # Log exception details
         return {"error": f"Failed to generate response: {e}. Please try again."}, 500

     Generates a response from the LLM without retaining chat context.
     Expects a JSON body with 'prompt'.
     """
+    logger.info("➡️ /generate endpoint received a request.")
     data = await request.json()
+    user_input = data.get("prompt", "").strip() # Renamed to user_input for clarity
+    if not user_input:
         logger.warning("Prompt cannot be empty in /generate request.")
         return {"error": "Prompt cannot be empty"}, 400
     # Define the system prompt - sent with every request
     system_prompt_content = (
+        "You are a highly efficient and objective data analysis API. You are the 'assistant'. "
+        "Your sole function is to process the user's data and instructions, then output ONLY the requested analysis in the specified format. "
+        "**Crucially, do NOT include any conversational text, greetings, introductions, conclusions, or any remarks about being an AI.** "
+        "Respond directly with the content. Adhere strictly to all formatting requirements. "
+        "If a request cannot be fulfilled, respond ONLY with: 'STATUS: FAILED_ANALYSIS; REASON: Unable to process this specific analytical request due to limitations.'"
     )
+    # === FIX: Wrap user input in a clear instruction to prevent role confusion ===
+    # This frames the user's text as 'data' for the model to analyze.
+    user_content_template = f"""Please analyze the following data based on the instructions within it.
+Provide only the direct output as requested. Do not add any extra conversational text.
+--- DATA ---
+{user_input}
+"""
     # Construct messages for the current request only
     messages_for_llm = [
         {"role": "system", "content": system_prompt_content},
+        {"role": "user", "content": user_content_template} # Use the new template
     ]
     # Calculate tokens in the user's prompt
+    prompt_tokens = count_tokens_in_text(user_input)
+    logger.info(f"🧾 Original user input: {user_input}")
     logger.info(f"Tokens in prompt: {prompt_tokens}")
     try:
         response = llm.create_chat_completion(
             messages=messages_for_llm,
+            max_tokens=800,
+            # === FIX: Lower temperature for more factual, less creative output ===
+            temperature=0.2,
+            # === FIX: Use the CORRECT stop token for the chatml format ===
+            stop=["<|im_end|>"]
         )
         ai_response_content = response["choices"][0]["message"]["content"].strip()
         logger.info("✅ Response generated successfully.")
         return {
             "response": ai_response_content,
+            "prompt_tokens": prompt_tokens,
             "response_token_count": response_token_count
         }
     except Exception as e:
+        logger.error(f"❌ Error during generation: {e}", exc_info=True)
         return {"error": f"Failed to generate response: {e}. Please try again."}, 500