# --------------------------------------------------------------------------------- # Aplicación principal para cargar el modelo, generar prompts y explicar los datos # --------------------------------------------------------------------------------- import streamlit as st # type: ignore import os import re import pandas as pd # type: ignore from dotenv import load_dotenv # type: ignore # Para cambios locales from supabase import create_client, Client # type: ignore # from transformers import pipeline from pandasai import SmartDataframe # type: ignore from pandasai.llm.local_llm import LocalLLM # --------------------------------------------------------------------------------- # Funciones auxiliares # --------------------------------------------------------------------------------- # Función para extracción de código Python del output del modelo def extract_code(llm_output): code_match = re.search(r"```python\n(.*?)\n```", llm_output, re.DOTALL) if code_match: return code_match.group(1) return None # Función para generar prompts de gráficos comparativos # Ejemplo de prompt generado: # generate_graph_prompt("Germany", "France", "fertility rate", 2020, 2030) def generate_graph_prompt(country1, country2, metric, start_year, end_year): prompt = f""" You have access to a database of European countries with data on {metric}, labor force participation, population, and their predictions for future years. Generate Python code using matplotlib to create a line graph showing the trend of {metric} for {country1} and {country2} from {start_year} to {end_year}. Also, provide a concise explanation of what this graph represents for an end user who might not be familiar with the data. """ return prompt # --------------------------------------------------------------------------------- # Configuración de conexión a Supabase # --------------------------------------------------------------------------------- # Cargar variables de entorno desde archivo .env load_dotenv() # Conectar las credenciales de Supabase (ubicadas en "Secrets" en Streamlit) SUPABASE_URL = os.getenv("SUPABASE_URL") SUPABASE_KEY = os.getenv("SUPABASE_KEY") # Crear cliente Supabase supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY) # Función para cargar datos de una tabla de Supabase # Tablas posibles: fertility, geo data, labor, population, predictions def load_data(table): try: if supabase: response = supabase.from_(table).select("*").execute() print(f"Response object: {response}") # Inspeccionar objeto completo print(f"Response type: {type(response)}") # Verificar tipo de objeto # Acceder a atributos relacionados a error o data if hasattr(response, 'data'): print(f"Response data: {response.data}") return pd.DataFrame(response.data) elif hasattr(response, 'status_code'): print(f"Response status code: {response.status_code}") elif hasattr(response, '_error'): # Versiones antiguas print(f"Older error attribute: {response._error}") st.error(f"Error fetching data: {response._error}") return pd.DataFrame() else: st.info("Response object does not have 'data' or known error attributes. Check the logs.") return pd.DataFrame() else: st.error("Supabase client not initialized. Check environment variables.") return pd.DataFrame() except Exception as e: st.error(f"An error occurred during data loading: {e}") return pd.DataFrame() # --------------------------------------------------------------------------------- # Cargar datos iniciales # --------------------------------------------------------------------------------- # # Cargar datos desde la tabla "labor" data = load_data("labor") # TODO: La idea es luego usar todas las tablas, cuando ya funcione. # Se puede si el modelo funciona con las gráficas, sino que toca mejorarlo # porque serían consultas más complejas. # labor_data = load_data("labor") # fertility_data = load_data("fertility") # population_data = load_data("population") # predictions_data = load_data("predictions") """ # Ej: # import os # import pandas as pd # from pandasai import SmartDatalake # employees_data = { # 'EmployeeID': [1, 2, 3, 4, 5], # 'Name': ['John', 'Emma', 'Liam', 'Olivia', 'William'], # 'Department': ['HR', 'Sales', 'IT', 'Marketing', 'Finance'] # } # salaries_data = { # 'EmployeeID': [1, 2, 3, 4, 5], # 'Salary': [5000, 6000, 4500, 7000, 5500] # } # employees_df = pd.DataFrame(employees_data) # salaries_df = pd.DataFrame(salaries_data) # # By default, unless you choose a different LLM, it will use BambooLLM. # # You can get your free API key signing up at https://pandabi.ai (you can also configure it in your .env file) # os.environ["PANDASAI_API_KEY"] = "YOUR_API_KEY" # lake = SmartDatalake([employees_df, salaries_df]) # lake.chat("Who gets paid the most?") # # Output: Olivia gets paid the most """ # --------------------------------------------------------------------------------- # Inicializar modelo LLM # --------------------------------------------------------------------------------- # # Pendiente cambiar Keys dependiendo del modelo que escojamos # model_name = "google/flan-t5-small" # Probando modelos # generator = pipeline("text-generation", model=model_name) # --------------------------------------------------------------------------------- # Inicializar PandasAI con StarCoder # --------------------------------------------------------------------------------- # # Definir el modelo StarCoder desde Hugging Face # huggingface_token = os.getenv("HUGGINGFACE_TOKEN") # llm = Starcoder(api_token=huggingface_token) ollama_llm = LocalLLM(api_base="http://localhost:11434/v1", model="gemma3:12b", temperature=0.1, max_tokens=8000) sdf = SmartDataframe(data, config={"llm": ollama_llm}) # DataFrame PandasAI-ready. # --------------------------------------------------------------------------------- # Configuración de la app en Streamlit # --------------------------------------------------------------------------------- # Título de la app st.title("_Europe GraphGen_ :blue[Graph generator] :flag-eu:") # Entrada de usuario para describir el gráfico user_input = st.text_input("What graphics do you have in mind") generate_button = st.button("Generate") # Procesar el input del usuario con PandasAI if generate_button and user_input: st.dataframe(data.head()) with st.spinner('Generating answer...'): try: answer = sdf.chat(user_input) st.write(answer) except Exception as e: st.error(f"Error generating answer: {e}") # TODO: Output estructurado si vemos que es necesario.