Spaces:

zakivibes
/

zaki

Sleeping

App Files Files Community

Abdullah Zaki commited on 28 days ago

Commit

fd00e59

1 Parent(s): 574b1b5

Add plotly t

Browse files

Files changed (1) hide show

app.py +73 -145

app.py CHANGED Viewed

@@ -3,122 +3,80 @@ import pandas as pd
 import numpy as np
 import torch
 from chronos import ChronosPipeline
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from supabase import create_client, Client
-import os
 import plotly.express as px
 # Initialize Chronos-T5-Large for forecasting
-# These models are loaded once at the start of the Gradio app for efficiency.
 # The device_map automatically handles CPU/GPU allocation.
 chronos_pipeline = ChronosPipeline.from_pretrained(
     "amazon/chronos-t5-large",
     device_map="cuda" if torch.cuda.is_available() else "cpu",
     torch_dtype=torch.bfloat16
 )
-# Initialize Prophet-Qwen3-4B-SFT for Arabic reports
-# These models are also loaded once at the start.
-qwen_tokenizer = AutoTokenizer.from_pretrained("radm/prophet-qwen3-4b-sft")
-qwen_model = AutoModelForCausalLM.from_pretrained(
-    "radm/prophet-qwen3-4b-sft",
-    device_map="cuda" if torch.cuda.is_available() else "cpu",
-    torch_dtype=torch.bfloat16
-)
-def fetch_supabase_data(supabase_url: str, supabase_key: str, table_name: str = "sentiment_data") -> pd.DataFrame:
     """
-    Fetches time series data from Supabase using the provided URL and API key.
     Args:
-        supabase_url (str): The URL of your Supabase project.
-        supabase_key (str): Your Supabase API key (anon key).
-        table_name (str): The name of the table to fetch data from.
     Returns:
-        pd.DataFrame: A DataFrame containing 'date' and 'sentiment' columns.
-    Raises:
-        Exception: If there's an error connecting to Supabase or no data is found.
     """
-    if not supabase_url or not supabase_key:
-        raise ValueError("Supabase URL and Key must be provided to fetch data from Supabase.")
     try:
-        # Create a new Supabase client instance for each call, using the provided URL and key.
-        # This allows the user to input different keys/URLs without restarting the app.
-        supabase_client: Client = create_client(supabase_url, supabase_key)
-        response = supabase_client.table(table_name).select("date, sentiment").order("date", desc=False).execute()
-        if response.data:
-            df = pd.DataFrame(response.data)
-            # Ensure 'date' column is in datetime format
-            df['date'] = pd.to_datetime(df['date'])
-            # Ensure 'sentiment' column is numeric for forecasting
-            df['sentiment'] = pd.to_numeric(df['sentiment'])
-            return df
-        else:
-            raise ValueError(f"No data found in Supabase table '{table_name}'.")
-    except Exception as e:
-        raise Exception(f"Error fetching Supabase data: {str(e)}")
-def forecast_and_report(
-    data_source: str,
-    supabase_url: str,  # New input for Supabase URL
-    supabase_key: str,   # New input for Supabase Key
-    csv_file=None,
-    prediction_length: int = 30,
-    table_name: str = "sentiment_data"
-):
-    """
-    Runs forecasting with Chronos-T5-Large and generates an Arabic report with Qwen3-4B-SFT.
-    Args:
-        data_source (str): Specifies whether to use "Supabase" or "CSV Upload".
-        supabase_url (str): The Supabase project URL (used if data_source is "Supabase").
-        supabase_key (str): The Supabase API key (used if data_source is "Supabase").
-        csv_file: The uploaded CSV file (used if data_source is "CSV Upload").
-        prediction_length (int): The number of days to forecast.
-        table_name (str): The name of the Supabase table.
-    Returns:
-        tuple: A tuple containing:
-            - dict: Forecast results as a dictionary.
-            - plotly.graph_objects.Figure: A Plotly figure of the forecast.
-            - str: The generated Arabic report.
-            - str: An error message if an error occurs.
-    """
-    try:
-        # Load data based on selected source
-        df = pd.DataFrame() # Initialize df to avoid UnboundLocalError
-        if data_source == "Supabase":
-            df = fetch_supabase_data(supabase_url, supabase_key, table_name)
-        else: # data_source == "CSV Upload"
-            if csv_file is None:
-                return {"error": "Please upload a CSV file when 'CSV Upload' is selected."}, None, None, "Error: CSV file not provided."
-            df = pd.read_csv(csv_file.name) # Access the file path
-            # Basic validation for required columns in CSV
-            if "sentiment" not in df.columns or "date" not in df.columns:
-                return {"error": "CSV must contain 'date' and 'sentiment' columns."}, None, None, "Error: Missing 'date' or 'sentiment' columns in CSV."
-            df['date'] = pd.to_datetime(df['date'])
-            df['sentiment'] = pd.to_numeric(df['sentiment'])
-        # Ensure there's data to process
         if df.empty:
-            return {"error": "No data available for forecasting or reporting."}, None, None, "Error: No data available."
         # Prepare time series data for Chronos
-        # Ensure sentiment is float32 for the model
         context = torch.tensor(df["sentiment"].values, dtype=torch.float32)
         # Run forecast using Chronos-T5-Large pipeline
-        forecast = chronos_pipeline.predict(context, prediction_length)
-        # Calculate quantiles for low, median, and high predictions
-        low, median, high = np.quantile(forecast[0].numpy(), [0.1, 0.5, 0.9], axis=0)
-        # Format forecast results into a DataFrame
-        # Generate future dates starting from the day after the last historical date
-        forecast_dates = pd.date_range(start=df["date"].iloc[-1] + pd.Timedelta(days=1), periods=prediction_length, freq="D")
         forecast_df = pd.DataFrame({
             "date": forecast_dates,
             "low": low,
@@ -127,73 +85,43 @@ def forecast_and_report(
         })
         # Create forecast plot using Plotly
-        # Combine historical data for plotting if desired, but here we plot only forecast
         fig = px.line(forecast_df, x="date", y=["median", "low", "high"], title="Sentiment Forecast")
-        fig.update_traces(line=dict(color="blue"), selector=dict(name="median"))
         fig.update_traces(line=dict(color="red", dash="dash"), selector=dict(name="low"))
         fig.update_traces(line=dict(color="green", dash="dash"), selector=dict(name="high"))
-        # Generate Arabic report using Prophet-Qwen3-4B-SFT
-        # Construct the prompt with relevant forecast snippets
-        prompt = (
-            "اكتب تقريراً رسمياً بالعربية يلخص توقعات المشاعر للأيام الثلاثين القادمة بناءً على البيانات التالية:\n"
-            f"- متوسط التوقعات: {median[:5].tolist()} (أول 5 أيام)...\n"
-            f"- الحد الأدنى (10%): {low[:5].tolist()}...\n"
-            f"- الحد الأعلى (90%): {high[:5].tolist()}...\n"
-            "التقرير يجب أن يكون موجزاً (200-300 كلمة)، يشرح الاتجاهات، ويستخدم لغة رسمية."
-        )
-        # Tokenize the prompt and move to the model's device (CPU/GPU)
-        inputs = qwen_tokenizer(prompt, return_tensors="pt").to(qwen_model.device)
-        # Generate the report text
-        outputs = qwen_model.generate(
-            inputs["input_ids"],
-            max_new_tokens=500, # Max length for the generated report
-            do_sample=True,     # Enable sampling for more diverse text
-            temperature=0.7,    # Control randomness (lower for less random)
-            top_p=0.9           # Nucleus sampling parameter
-        )
-        # Decode the generated tokens back to text, skipping special tokens
-        report = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return forecast_df.to_dict(), fig, report, "Success" # Return success message
     except Exception as e:
-        # Catch any exceptions and return an error message
-        return {}, None, None, f"An error occurred: {str(e)}"
 # Gradio interface definition
 with gr.Blocks() as demo:
-    gr.Markdown("# Sentiment Forecasting and Arabic Reporting")
-    # Input components for Supabase credentials and data source selection
     with gr.Row():
-        data_source = gr.Radio(["Supabase", "CSV Upload"], label="Data Source", value="Supabase")
-        supabase_url = gr.Textbox(label="Supabase URL", placeholder="e.g., https://your-project-ref.supabase.co", interactive=True)
-        supabase_key = gr.Textbox(label="Supabase Key", placeholder="Your Supabase anon key", type="password", interactive=True)
-    csv_file = gr.File(label="Upload CSV (if CSV selected)")
-    table_name = gr.Textbox(label="Supabase Table Name", value="sentiment_data")
-    prediction_length = gr.Slider(1, 60, value=30, step=1, label="Prediction Length (days)")
-    submit = gr.Button("Run Forecast and Generate Report")
-    # Output components for results
-    output = gr.JSON(label="Forecast Results")
-    plot = gr.Plot(label="Forecast Plot")
-    report = gr.Textbox(label="Arabic Report", lines=10, rtl=True, show_copy_button=True) # Added rtl=True for Arabic display
-    status_message = gr.Textbox(label="Status", interactive=False) # For displaying success/error messages
-    # Define the click event handler for the submit button
-    submit.click(
-        fn=forecast_and_report,
-        inputs=[
-            data_source,
-            supabase_url,
-            supabase_key,
-            csv_file,
-            prediction_length,
-            table_name
-        ],
-        outputs=[output, plot, report, status_message]
     )
 # Launch the Gradio application

 import numpy as np
 import torch
 from chronos import ChronosPipeline
 import plotly.express as px
 # Initialize Chronos-T5-Large for forecasting
+# This model is loaded once at the start of the Gradio app for efficiency.
 # The device_map automatically handles CPU/GPU allocation.
+# torch_dtype=torch.bfloat16 is used for optimized performance if a compatible GPU is available.
 chronos_pipeline = ChronosPipeline.from_pretrained(
     "amazon/chronos-t5-large",
     device_map="cuda" if torch.cuda.is_available() else "cpu",
     torch_dtype=torch.bfloat16
 )
+def run_chronos_forecast(
+    csv_file: gr.File,
+    prediction_length: int = 30
+) -> tuple[pd.DataFrame, px.line, str]:
     """
+    Runs time series forecasting using the Chronos-T5-Large model.
     Args:
+        csv_file (gr.File): The uploaded CSV file containing historical data.
+                            Must have 'date' and 'sentiment' columns.
+        prediction_length (int): The number of future periods (days) to forecast.
     Returns:
+        tuple: A tuple containing:
+            - pd.DataFrame: A DataFrame of the forecast results (date, low, median, high).
+            - plotly.graph_objects.Figure: A Plotly figure visualizing the forecast.
+            - str: A status message (e.g., "Success" or an error message).
     """
+    if csv_file is None:
+        return pd.DataFrame(), None, "Error: Please upload a CSV file."
     try:
+        # Read the uploaded CSV file into a pandas DataFrame
+        df = pd.read_csv(csv_file.name)
+        # Validate required columns
+        if "date" not in df.columns or "sentiment" not in df.columns:
+            return pd.DataFrame(), None, "Error: CSV must contain 'date' and 'sentiment' columns."
+        # Convert 'date' column to datetime objects
+        df['date'] = pd.to_datetime(df['date'])
+        # Convert 'sentiment' column to numeric, handling potential errors
+        df['sentiment'] = pd.to_numeric(df['sentiment'], errors='coerce')
+        # Drop rows where sentiment could not be converted (e.g., NaN values)
+        df.dropna(subset=['sentiment'], inplace=True)
         if df.empty:
+            return pd.DataFrame(), None, "Error: No valid sentiment data found in the CSV."
+        # Sort data by date to ensure correct time series order
+        df = df.sort_values(by='date').reset_index(drop=True)
         # Prepare time series data for Chronos
+        # Chronos expects a 1D tensor of the time series values
         context = torch.tensor(df["sentiment"].values, dtype=torch.float32)
         # Run forecast using Chronos-T5-Large pipeline
+        # The predict method returns a tensor of forecasts
+        forecast_tensor = chronos_pipeline.predict(context, prediction_length)
+        # Calculate quantiles (10%, 50% (median), 90%) for the forecast
+        # forecast_tensor[0] selects the first (and usually only) batch of predictions
+        low, median, high = np.quantile(forecast_tensor[0].numpy(), [0.1, 0.5, 0.9], axis=0)
+        # Generate future dates for the forecast results
+        # Start from the day after the last historical date
+        last_historical_date = df["date"].iloc[-1]
+        forecast_dates = pd.date_range(start=last_historical_date + pd.Timedelta(days=1),
+                                       periods=prediction_length,
+                                       freq="D")
+        # Create a DataFrame for the forecast results
         forecast_df = pd.DataFrame({
             "date": forecast_dates,
             "low": low,
         })
         # Create forecast plot using Plotly
         fig = px.line(forecast_df, x="date", y=["median", "low", "high"], title="Sentiment Forecast")
+        fig.update_traces(line=dict(color="blue", width=3), selector=dict(name="median"))
         fig.update_traces(line=dict(color="red", dash="dash"), selector=dict(name="low"))
         fig.update_traces(line=dict(color="green", dash="dash"), selector=dict(name="high"))
+        fig.update_layout(hovermode="x unified", title_x=0.5) # Improve hover interactivity and center title
+        return forecast_df, fig, "Forecast generated successfully!"
     except Exception as e:
+        # Catch any exceptions and return an error message to the user
+        return pd.DataFrame(), None, f"An error occurred: {str(e)}"
 # Gradio interface definition
 with gr.Blocks() as demo:
+    gr.Markdown("# Chronos Time Series Forecasting")
+    gr.Markdown("Upload a CSV file containing historical data with 'date' and 'sentiment' columns to get a sentiment forecast.")
     with gr.Row():
+        csv_input = gr.File(label="Upload Historical Data (CSV)")
+        prediction_length_slider = gr.Slider(
+            1, 60, value=30, step=1, label="Prediction Length (days)"
+        )
+    run_button = gr.Button("Generate Forecast")
+    with gr.Tab("Forecast Plot"):
+        forecast_plot_output = gr.Plot(label="Sentiment Forecast Plot")
+    with gr.Tab("Forecast Data"):
+        forecast_json_output = gr.DataFrame(label="Raw Forecast Data") # Changed to DataFrame for better readability
+    status_message_output = gr.Textbox(label="Status", interactive=False)
+    # Define the click event handler for the run button
+    run_button.click(
+        fn=run_chronos_forecast,
+        inputs=[csv_input, prediction_length_slider],
+        outputs=[forecast_json_output, forecast_plot_output, status_message_output]
     )
 # Launch the Gradio application