Spaces:

zakivibes
/

zaki

Sleeping

App Files Files Community

Abdullah Zaki commited on Jun 12

Commit

574b1b5

1 Parent(s): 7aa3e0f

Add plotly t

Browse files

Files changed (1) hide show

app.py +116 -40

app.py CHANGED Viewed

@@ -8,14 +8,9 @@ from supabase import create_client, Client
 import os
 import plotly.express as px
-# Initialize Supabase client with API key from environment variables
-SUPABASE_URL = os.getenv("SUPABASE_URL")
-SUPABASE_KEY = os.getenv("SUPABASE_KEY")
-if not SUPABASE_URL or not SUPABASE_KEY:
-    raise ValueError("SUPABASE_URL and SUPABASE_KEY must be set as environment variables.")
-supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
 # Initialize Chronos-T5-Large for forecasting
 chronos_pipeline = ChronosPipeline.from_pretrained(
     "amazon/chronos-t5-large",
     device_map="cuda" if torch.cuda.is_available() else "cpu",
@@ -23,6 +18,7 @@ chronos_pipeline = ChronosPipeline.from_pretrained(
 )
 # Initialize Prophet-Qwen3-4B-SFT for Arabic reports
 qwen_tokenizer = AutoTokenizer.from_pretrained("radm/prophet-qwen3-4b-sft")
 qwen_model = AutoModelForCausalLM.from_pretrained(
     "radm/prophet-qwen3-4b-sft",
@@ -30,41 +26,98 @@ qwen_model = AutoModelForCausalLM.from_pretrained(
     torch_dtype=torch.bfloat16
 )
-def fetch_supabase_data(table_name: str = "sentiment_data") -> pd.DataFrame:
-    """Fetch time series data from Supabase using the provided API key."""
     try:
-        response = supabase.table(table_name).select("date, sentiment").order("date", desc=False).execute()
         if response.data:
             df = pd.DataFrame(response.data)
             df['date'] = pd.to_datetime(df['date'])
             return df
         else:
-            raise ValueError("No data found in Supabase table.")
     except Exception as e:
         raise Exception(f"Error fetching Supabase data: {str(e)}")
-def forecast_and_report(data_source: str, csv_file=None, prediction_length: int = 30, table_name: str = "sentiment_data"):
-    """Run forecasting with Chronos-T5-Large and generate Arabic report with Qwen3-4B-SFT."""
     try:
-        # Load data
         if data_source == "Supabase":
-            df = fetch_supabase_data(table_name)
-        else:
-            if not csv_file:
-                return {"error": "Please upload a CSV file."}, None, None
-            df = pd.read_csv(csv_file)
             if "sentiment" not in df.columns or "date" not in df.columns:
-                return {"error": "CSV must contain 'date' and 'sentiment' columns."}, None, None
             df['date'] = pd.to_datetime(df['date'])
-        # Prepare time series
         context = torch.tensor(df["sentiment"].values, dtype=torch.float32)
-        # Run forecast
         forecast = chronos_pipeline.predict(context, prediction_length)
         low, median, high = np.quantile(forecast[0].numpy(), [0.1, 0.5, 0.9], axis=0)
-        # Format forecast results
         forecast_dates = pd.date_range(start=df["date"].iloc[-1] + pd.Timedelta(days=1), periods=prediction_length, freq="D")
         forecast_df = pd.DataFrame({
             "date": forecast_dates,
@@ -73,14 +126,15 @@ def forecast_and_report(data_source: str, csv_file=None, prediction_length: int
             "high": high
         })
-        # Create forecast plot
-        plot_df = forecast_df.copy()
-        fig = px.line(plot_df, x="date", y=["median", "low", "high"], title="Sentiment Forecast")
         fig.update_traces(line=dict(color="blue"), selector=dict(name="median"))
         fig.update_traces(line=dict(color="red", dash="dash"), selector=dict(name="low"))
         fig.update_traces(line=dict(color="green", dash="dash"), selector=dict(name="high"))
-        # Generate Arabic report
         prompt = (
             "اكتب تقريراً رسمياً بالعربية يلخص توقعات المشاعر للأيام الثلاثين القادمة بناءً على البيانات التالية:\n"
             f"- متوسط التوقعات: {median[:5].tolist()} (أول 5 أيام)...\n"
@@ -88,37 +142,59 @@ def forecast_and_report(data_source: str, csv_file=None, prediction_length: int
             f"- الحد الأعلى (90%): {high[:5].tolist()}...\n"
             "التقرير يجب أن يكون موجزاً (200-300 كلمة)، يشرح الاتجاهات، ويستخدم لغة رسمية."
         )
         inputs = qwen_tokenizer(prompt, return_tensors="pt").to(qwen_model.device)
         outputs = qwen_model.generate(
             inputs["input_ids"],
-            max_new_tokens=500,
-            do_sample=True,
-            temperature=0.7,
-            top_p=0.9
         )
         report = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return forecast_df.to_dict(), fig, report
     except Exception as e:
-        return {"error": f"An error occurred: {str(e)}"}, None, None
-# Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# Sentiment Forecasting and Arabic Reporting")
-    data_source = gr.Radio(["Supabase", "CSV Upload"], label="Data Source", value="Supabase")
     csv_file = gr.File(label="Upload CSV (if CSV selected)")
     table_name = gr.Textbox(label="Supabase Table Name", value="sentiment_data")
     prediction_length = gr.Slider(1, 60, value=30, step=1, label="Prediction Length (days)")
     submit = gr.Button("Run Forecast and Generate Report")
     output = gr.JSON(label="Forecast Results")
     plot = gr.Plot(label="Forecast Plot")
-    report = gr.Textbox(label="Arabic Report", lines=10)
     submit.click(
         fn=forecast_and_report,
-        inputs=[data_source, csv_file, prediction_length, table_name],
-        outputs=[output, plot, report]
     )
-demo.launch()

 import os
 import plotly.express as px
 # Initialize Chronos-T5-Large for forecasting
+# These models are loaded once at the start of the Gradio app for efficiency.
+# The device_map automatically handles CPU/GPU allocation.
 chronos_pipeline = ChronosPipeline.from_pretrained(
     "amazon/chronos-t5-large",
     device_map="cuda" if torch.cuda.is_available() else "cpu",
 )
 # Initialize Prophet-Qwen3-4B-SFT for Arabic reports
+# These models are also loaded once at the start.
 qwen_tokenizer = AutoTokenizer.from_pretrained("radm/prophet-qwen3-4b-sft")
 qwen_model = AutoModelForCausalLM.from_pretrained(
     "radm/prophet-qwen3-4b-sft",
     torch_dtype=torch.bfloat16
 )
+def fetch_supabase_data(supabase_url: str, supabase_key: str, table_name: str = "sentiment_data") -> pd.DataFrame:
+    """
+    Fetches time series data from Supabase using the provided URL and API key.
+    Args:
+        supabase_url (str): The URL of your Supabase project.
+        supabase_key (str): Your Supabase API key (anon key).
+        table_name (str): The name of the table to fetch data from.
+    Returns:
+        pd.DataFrame: A DataFrame containing 'date' and 'sentiment' columns.
+    Raises:
+        Exception: If there's an error connecting to Supabase or no data is found.
+    """
+    if not supabase_url or not supabase_key:
+        raise ValueError("Supabase URL and Key must be provided to fetch data from Supabase.")
     try:
+        # Create a new Supabase client instance for each call, using the provided URL and key.
+        # This allows the user to input different keys/URLs without restarting the app.
+        supabase_client: Client = create_client(supabase_url, supabase_key)
+        response = supabase_client.table(table_name).select("date, sentiment").order("date", desc=False).execute()
         if response.data:
             df = pd.DataFrame(response.data)
+            # Ensure 'date' column is in datetime format
             df['date'] = pd.to_datetime(df['date'])
+            # Ensure 'sentiment' column is numeric for forecasting
+            df['sentiment'] = pd.to_numeric(df['sentiment'])
             return df
         else:
+            raise ValueError(f"No data found in Supabase table '{table_name}'.")
     except Exception as e:
         raise Exception(f"Error fetching Supabase data: {str(e)}")
+def forecast_and_report(
+    data_source: str,
+    supabase_url: str,  # New input for Supabase URL
+    supabase_key: str,   # New input for Supabase Key
+    csv_file=None,
+    prediction_length: int = 30,
+    table_name: str = "sentiment_data"
+):
+    """
+    Runs forecasting with Chronos-T5-Large and generates an Arabic report with Qwen3-4B-SFT.
+    Args:
+        data_source (str): Specifies whether to use "Supabase" or "CSV Upload".
+        supabase_url (str): The Supabase project URL (used if data_source is "Supabase").
+        supabase_key (str): The Supabase API key (used if data_source is "Supabase").
+        csv_file: The uploaded CSV file (used if data_source is "CSV Upload").
+        prediction_length (int): The number of days to forecast.
+        table_name (str): The name of the Supabase table.
+    Returns:
+        tuple: A tuple containing:
+            - dict: Forecast results as a dictionary.
+            - plotly.graph_objects.Figure: A Plotly figure of the forecast.
+            - str: The generated Arabic report.
+            - str: An error message if an error occurs.
+    """
     try:
+        # Load data based on selected source
+        df = pd.DataFrame() # Initialize df to avoid UnboundLocalError
         if data_source == "Supabase":
+            df = fetch_supabase_data(supabase_url, supabase_key, table_name)
+        else: # data_source == "CSV Upload"
+            if csv_file is None:
+                return {"error": "Please upload a CSV file when 'CSV Upload' is selected."}, None, None, "Error: CSV file not provided."
+            df = pd.read_csv(csv_file.name) # Access the file path
+            # Basic validation for required columns in CSV
             if "sentiment" not in df.columns or "date" not in df.columns:
+                return {"error": "CSV must contain 'date' and 'sentiment' columns."}, None, None, "Error: Missing 'date' or 'sentiment' columns in CSV."
             df['date'] = pd.to_datetime(df['date'])
+            df['sentiment'] = pd.to_numeric(df['sentiment'])
+        # Ensure there's data to process
+        if df.empty:
+            return {"error": "No data available for forecasting or reporting."}, None, None, "Error: No data available."
+        # Prepare time series data for Chronos
+        # Ensure sentiment is float32 for the model
         context = torch.tensor(df["sentiment"].values, dtype=torch.float32)
+        # Run forecast using Chronos-T5-Large pipeline
         forecast = chronos_pipeline.predict(context, prediction_length)
+        # Calculate quantiles for low, median, and high predictions
         low, median, high = np.quantile(forecast[0].numpy(), [0.1, 0.5, 0.9], axis=0)
+        # Format forecast results into a DataFrame
+        # Generate future dates starting from the day after the last historical date
         forecast_dates = pd.date_range(start=df["date"].iloc[-1] + pd.Timedelta(days=1), periods=prediction_length, freq="D")
         forecast_df = pd.DataFrame({
             "date": forecast_dates,
             "high": high
         })
+        # Create forecast plot using Plotly
+        # Combine historical data for plotting if desired, but here we plot only forecast
+        fig = px.line(forecast_df, x="date", y=["median", "low", "high"], title="Sentiment Forecast")
         fig.update_traces(line=dict(color="blue"), selector=dict(name="median"))
         fig.update_traces(line=dict(color="red", dash="dash"), selector=dict(name="low"))
         fig.update_traces(line=dict(color="green", dash="dash"), selector=dict(name="high"))
+        # Generate Arabic report using Prophet-Qwen3-4B-SFT
+        # Construct the prompt with relevant forecast snippets
         prompt = (
             "اكتب تقريراً رسمياً بالعربية يلخص توقعات المشاعر للأيام الثلاثين القادمة بناءً على البيانات التالية:\n"
             f"- متوسط التوقعات: {median[:5].tolist()} (أول 5 أيام)...\n"
             f"- الحد الأعلى (90%): {high[:5].tolist()}...\n"
             "التقرير يجب أن يكون موجزاً (200-300 كلمة)، يشرح الاتجاهات، ويستخدم لغة رسمية."
         )
+        # Tokenize the prompt and move to the model's device (CPU/GPU)
         inputs = qwen_tokenizer(prompt, return_tensors="pt").to(qwen_model.device)
+        # Generate the report text
         outputs = qwen_model.generate(
             inputs["input_ids"],
+            max_new_tokens=500, # Max length for the generated report
+            do_sample=True,     # Enable sampling for more diverse text
+            temperature=0.7,    # Control randomness (lower for less random)
+            top_p=0.9           # Nucleus sampling parameter
         )
+        # Decode the generated tokens back to text, skipping special tokens
         report = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return forecast_df.to_dict(), fig, report, "Success" # Return success message
     except Exception as e:
+        # Catch any exceptions and return an error message
+        return {}, None, None, f"An error occurred: {str(e)}"
+# Gradio interface definition
 with gr.Blocks() as demo:
     gr.Markdown("# Sentiment Forecasting and Arabic Reporting")
+    # Input components for Supabase credentials and data source selection
+    with gr.Row():
+        data_source = gr.Radio(["Supabase", "CSV Upload"], label="Data Source", value="Supabase")
+        supabase_url = gr.Textbox(label="Supabase URL", placeholder="e.g., https://your-project-ref.supabase.co", interactive=True)
+        supabase_key = gr.Textbox(label="Supabase Key", placeholder="Your Supabase anon key", type="password", interactive=True)
     csv_file = gr.File(label="Upload CSV (if CSV selected)")
     table_name = gr.Textbox(label="Supabase Table Name", value="sentiment_data")
     prediction_length = gr.Slider(1, 60, value=30, step=1, label="Prediction Length (days)")
     submit = gr.Button("Run Forecast and Generate Report")
+    # Output components for results
     output = gr.JSON(label="Forecast Results")
     plot = gr.Plot(label="Forecast Plot")
+    report = gr.Textbox(label="Arabic Report", lines=10, rtl=True, show_copy_button=True) # Added rtl=True for Arabic display
+    status_message = gr.Textbox(label="Status", interactive=False) # For displaying success/error messages
+    # Define the click event handler for the submit button
     submit.click(
         fn=forecast_and_report,
+        inputs=[
+            data_source,
+            supabase_url,
+            supabase_key,
+            csv_file,
+            prediction_length,
+            table_name
+        ],
+        outputs=[output, plot, report, status_message]
     )
+# Launch the Gradio application
+demo.launch()