Spaces:

juancamval
/

graph_generator

Sleeping

App Files Files Community

Enhance explanation via prompt engineering

by angelicaporto - opened Apr 30

base: refs/heads/main

←

from: refs/pr/5

Discussion Files changed

+64

-42

Files changed (1) hide show

app.py +64 -42

app.py CHANGED Viewed

@@ -8,50 +8,51 @@ import re
 import pandas as pd  # type: ignore
 from dotenv import load_dotenv  # type: ignore # Para cambios locales
 from supabase import create_client, Client  # type: ignore
-from pandasai import Agent
 # from pandasai import SmartDataframe  # type: ignore
-from pandasai.llm.local_llm import LocalLLM
 from pandasai import Agent
 import matplotlib.pyplot as plt
 # ---------------------------------------------------------------------------------
 # Funciones auxiliares
 # ---------------------------------------------------------------------------------
-# Ejemplo de prompt generado:
-# generate_graph_prompt("Germany", "France", "fertility rate", 2020, 2030)
 def generate_graph_prompt(user_query):
     prompt = f"""
-    You are a highly skilled data scientist working with European demographic data.
-    Given the user's request: "{user_query}"
-    1. Plot the relevant data according to the user's request.
-    2. After generating the plot, write a clear, human-readable explanation of the plot (no code).
-    3. Save the explanation in a variable called "explanation".
-    VERY IMPORTANT:
-    - Declare a result variable as a dictionary that includes:
-      - type = "plot"
-      - value = the path to the saved plot
-      - explanation = the explanation text you wrote
-    Example of expected result dictionary:
-    result = {{
-        "type": "plot",
-        "value": "temp_chart.png",
-        "explanation": explanation
-    }}
-    Only respond with valid Python code.
-    IMPORTANT: Stick strictly to using the data available in the database.
-    """
     return prompt
-# TODO: Mejorar prompt
 # ---------------------------------------------------------------------------------
 # Configuración de conexión a Supabase
@@ -101,20 +102,18 @@ def load_data(table):
 # Cargar datos iniciales
 # ---------------------------------------------------------------------------------
-# # Cargar datos desde la tabla "labor"
-data = load_data("labor")
 # TODO: La idea es luego usar todas las tablas, cuando ya funcione.
-# Se puede si el modelo funciona con las gráficas, sino que toca mejorarlo
-# porque serían consultas más complejas.
-# labor_data = load_data("labor")
-# fertility_data = load_data("fertility")
 # population_data = load_data("population")
-# predictions_data = load_data("predictions")
 # ---------------------------------------------------------------------------------
-# Inicializar modelo
 # ---------------------------------------------------------------------------------
 # ollama_llm = LocalLLM(api_base="http://localhost:11434/v1",
@@ -124,43 +123,66 @@ data = load_data("labor")
 lm_studio_llm = LocalLLM(api_base="http://localhost:1234/v1") # el modelo es gemma-3-12b-it-qat
-agent = Agent([labor_data], config={"llm": lm_studio_llm}) # Inicializar agent
 # ---------------------------------------------------------------------------------
 # Configuración de la app en Streamlit
 # ---------------------------------------------------------------------------------
 # Título de la app
-st.title("_Europe GraphGen_  :blue[Graph generator] :flag-eu:")
 # TODO: Poner instrucciones al usuario sobre cómo hacer un muy buen prompt (sin tecnisismos, pensando en el usuario final)
 # Entrada de usuario para describir el gráfico
 user_input = st.text_input("What graphics do you have in mind")
 generate_button = st.button("Generate")
-# Procesar el input del usuario con PandasAI
 if generate_button and user_input:
     with st.spinner('Generating answer...'):
         try:
             prompt = generate_graph_prompt(user_input)
             answer = agent.chat(prompt)
-            explanation = agent.explain()
             print(f"\nAnswer type: {type(answer)}\n")  # Verificar tipo de objeto
             print(f"\nAnswer content: {answer}\n")  # Inspeccionar contenido de la respuesta
-            print(f"\n explanation type: {type(explanation)}\n")  # Verificar tipo de objeto
-            print(f"\n explanation content: {explanation}\n")
             if isinstance(answer, str) and os.path.isfile(answer):
                 # Si el output es una ruta válida a imagen
                 im = plt.imread(answer)
                 st.image(im)
                 os.remove(answer)  # Limpiar archivo temporal
-                st.markdown(str(explanation))
             else:
                 # Si no es una ruta válida, mostrar como texto
-                st.markdown(str(answer))
         except Exception as e:
             st.error(f"Error generating answer: {e}")

 import pandas as pd  # type: ignore
 from dotenv import load_dotenv  # type: ignore # Para cambios locales
 from supabase import create_client, Client  # type: ignore
 # from pandasai import SmartDataframe  # type: ignore
+from pandasai import SmartDatalake  # type: ignore # Porque ya usamos más de un df (más de una tabla de nuestra db)
+from pandasai.llm.local_llm import LocalLLM # type: ignore
 from pandasai import Agent
 import matplotlib.pyplot as plt
+import time
 # ---------------------------------------------------------------------------------
 # Funciones auxiliares
 # ---------------------------------------------------------------------------------
 def generate_graph_prompt(user_query):
     prompt = f"""
+            You are a senior data scientist analyzing European labor force data.
+            Given the user's request: "{user_query}"
+            1. Plot the relevant data using matplotlib:
+            - Use `df.query("geo == 'X'")` to filter the country, instead of chained comparisons.
+            - Avoid using filters like `df[df['geo'] == 'Germany']`.
+            - Include clear axis labels and a descriptive title.
+            - Save the plot as an image file (e.g., temp_chart.png).
+            2. After plotting, write a **concise analytical summary** of the trend based on those 5 years. The summary should:
+            - Identify the **year with the largest increase** and the percent change.
+            - Identify the **year with the largest decrease** and the percent change.
+            - Provide a **brief overall trend interpretation** (e.g., steady growth, fluctuating, recovery, etc.).
+            - Avoid listing every year individually, summarize intelligently.
+            3. Store the summary in a variable named `explanation`.
+            4. Return a result dictionary structured as follows:
+            result = {{
+                "type": "plot",
+                "value": "temp_chart.png",
+                "explanation": explanation
+            }}
+            IMPORTANT: Use only the data available in the input DataFrame.
+            """
     return prompt
+#TODO: Continuar mejorando el prompt
 # ---------------------------------------------------------------------------------
 # Configuración de conexión a Supabase
 # Cargar datos iniciales
 # ---------------------------------------------------------------------------------
 # TODO: La idea es luego usar todas las tablas, cuando ya funcione.
+# Se puede si el modelo funciona con las gráficas, sino que toca mejorarlo  porque serían consultas más complejas.
+labor_data = load_data("labor")
+fertility_data = load_data("fertility")
 # population_data = load_data("population")
+# predictions_data = load_data("predictions")
+# TODO: Buscar la forma de disminuir la latencia (muchos datos = mucha latencia)
 # ---------------------------------------------------------------------------------
+# Inicializar LLM desde Ollama con PandasAI
 # ---------------------------------------------------------------------------------
 # ollama_llm = LocalLLM(api_base="http://localhost:11434/v1",
 lm_studio_llm = LocalLLM(api_base="http://localhost:1234/v1") # el modelo es gemma-3-12b-it-qat
+# sdl = SmartDatalake([labor_data, fertility_data, population_data, predictions_data], config={"llm": ollama_llm}) # DataFrame PandasAI-ready.
+# sdl = SmartDatalake([labor_data, fertility_data], config={"llm": ollama_llm})
+# agent = Agent([labor_data], config={"llm": lm_studio_llm}) # TODO: Probar Agent con multiples dfs
+agent = Agent(
+    [
+        labor_data,
+        fertility_data
+        ],
+    config={
+        "llm": lm_studio_llm,
+        "enable_cache": False,
+        "enable_filter_extraction": False  # evita errores de parseo
+    }
+)
 # ---------------------------------------------------------------------------------
 # Configuración de la app en Streamlit
 # ---------------------------------------------------------------------------------
 # Título de la app
+st.title("Europe GraphGen  :blue[Graph generator] :flag-eu:")
 # TODO: Poner instrucciones al usuario sobre cómo hacer un muy buen prompt (sin tecnisismos, pensando en el usuario final)
 # Entrada de usuario para describir el gráfico
 user_input = st.text_input("What graphics do you have in mind")
 generate_button = st.button("Generate")
 if generate_button and user_input:
     with st.spinner('Generating answer...'):
         try:
+            print(f"\nGenerating prompt...\n")
             prompt = generate_graph_prompt(user_input)
+            print(f"\nPrompt generated\n")
+            start_time = time.time()
             answer = agent.chat(prompt)
             print(f"\nAnswer type: {type(answer)}\n")  # Verificar tipo de objeto
             print(f"\nAnswer content: {answer}\n")  # Inspeccionar contenido de la respuesta
+            print(f"\nFull result: {agent.last_result}\n")
+            full_result = agent.last_result
+            explanation = full_result.get("explanation", "")
+            elapsed_time = time.time() - start_time
+            print(f"\nExecution time: {elapsed_time:.2f} seconds\n")
             if isinstance(answer, str) and os.path.isfile(answer):
                 # Si el output es una ruta válida a imagen
                 im = plt.imread(answer)
                 st.image(im)
                 os.remove(answer)  # Limpiar archivo temporal
+                if explanation:
+                    st.markdown(f"**Explanation:** {explanation}")
             else:
                 # Si no es una ruta válida, mostrar como texto
+                st.markdown(str(answer))
         except Exception as e:
             st.error(f"Error generating answer: {e}")