Spaces:

ArchCoder
/

llm-excel-plotter-agent

Running

App Files Files Community

“Transcendental-Programmer” commited on 1 day ago

Commit

d773e1b

1 Parent(s): 1b807f7

feat: inital project files and Docker setup

Browse files

Files changed (15) hide show

.gitignore +2 -0
Dockerfile +12 -0
README.md +37 -11
__init__.py +1 -0
app.py +76 -0
chart_generator.py +45 -0
data/readme +1 -0
data/train_data.csv +100 -0
data_processor.py +42 -0
image_verifier.py +34 -0
llm_agent.py +125 -0
readme +1 -0
requirements.txt +73 -0
start.sh +8 -0
train_model.py +64 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .env
2	+ __pycache__

Dockerfile ADDED Viewed

	@@ -0,0 +1,12 @@

+FROM python:3.10-slim
+WORKDIR /code
+COPY . .
+RUN pip install --upgrade pip
+RUN pip install -r requirements.txt
+EXPOSE 7860
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,11 +1,37 @@
----
-title: Llm Excel Plotter Agent
-emoji: 🌍
-colorFrom: purple
-colorTo: yellow
-sdk: docker
-pinned: false
-short_description: A Flask API for natural language-driven Excel data plotting,
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Backend for Excel Plotter App
+This backend is a Flask application serving the Excel Plotter API.
+## Deployment on Hugging Face Spaces
+- Ensure the fine-tuned BART large model files are included in the `backend/fine-tuned-bart-large/` directory or uploaded to Hugging Face Hub.
+- The app runs on port 7860.
+- To start the app, run:
+```bash
+bash start.sh
+```
+- The `requirements.txt` includes all necessary dependencies.
+- Make sure to set any required environment variables in the Hugging Face Space settings.
+## Using the Fine-tuned BART Large Model from Hugging Face Hub
+You can load the fine-tuned BART large model directly from Hugging Face Hub in your backend code as follows:
+```python
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model_name = "ArchCoder/fine-tuned-bart-large"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+```
+Replace `"ArchCoder/fine-tuned-bart-large"` with your actual model repository name if different.
+Make sure your backend code (e.g., in `llm_agent.py` or wherever the model is loaded) uses this method to load the model from the Hub instead of local files.
+## Notes
+- Static files are served from the `static` directory.
+- Adjust API URLs in the frontend to point to the deployed backend URL.

__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # backend/__init__.py

app.py ADDED Viewed

	@@ -0,0 +1,76 @@

+from flask import Flask, request, jsonify, send_from_directory
+from flask_cors import CORS
+from llm_agent import LLM_Agent
+import os
+import logging
+import time
+from dotenv import load_dotenv
+from werkzeug.utils import secure_filename
+load_dotenv()
+logging.basicConfig(level=logging.INFO)
+logging.getLogger('matplotlib').setLevel(logging.WARNING)
+logging.getLogger('PIL').setLevel(logging.WARNING)
+app = Flask(__name__, static_folder=os.path.join(os.path.dirname(__file__), '..', 'static'))
+CORS(app)
+agent = LLM_Agent()
+UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '..', 'data', 'uploads')
+ALLOWED_EXTENSIONS = {'csv', 'xls', 'xlsx'}
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+if not os.path.exists(UPLOAD_FOLDER):
+    os.makedirs(UPLOAD_FOLDER)
+def allowed_file(filename):
+    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+@app.route('/')
+def index():
+    logging.info("Index route accessed")
+    return "Welcome to the Excel Plotter API. Use the /plot endpoint to make requests."
+@app.route('/plot', methods=['POST'])
+def plot():
+    start_time = time.time()
+    data = request.json
+    logging.info(f"Received request data: {data}")
+    response = agent.process_request(data)
+    end_time = time.time()
+    logging.info(f"Processed request in {end_time - start_time} seconds")
+    return jsonify(response)
+@app.route('/static/<path:filename>')
+def serve_static(filename):
+    logging.info(f"Serving static file: {filename}")
+    return send_from_directory(app.static_folder, filename)
+@app.route('/upload', methods=['POST'])
+def upload_file():
+    if 'file' not in request.files:
+        return jsonify({'error': 'No file part'}), 400
+    file = request.files['file']
+    if file.filename == '':
+        return jsonify({'error': 'No selected file'}), 400
+    if file and allowed_file(file.filename):
+        filename = secure_filename(file.filename)
+        file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+        file.save(file_path)
+        # Optionally, validate columns here using DataProcessor
+        dp = LLM_Agent().data_processor.__class__(file_path)
+        columns = dp.get_columns()
+        preview = dp.preview(5)
+        return jsonify({'message': 'File uploaded successfully', 'columns': columns, 'preview': preview, 'file_path': file_path})
+    else:
+        return jsonify({'error': 'Invalid file type'}), 400
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860)

chart_generator.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import matplotlib.pyplot as plt
+import pandas as pd
+import os
+import logging
+import time
+class ChartGenerator:
+    def __init__(self, data=None):
+        logging.info("Initializing ChartGenerator")
+        if data is not None:
+            self.data = data
+        else:
+            self.data = pd.read_excel(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'data', 'sample_data.xlsx'))
+    def generate_chart(self, plot_args):
+        start_time = time.time()
+        logging.info(f"Generating chart with arguments: {plot_args}")
+        fig, ax = plt.subplots()
+        for y in plot_args['y']:
+            color = plot_args.get('color', None)
+            if plot_args.get('chart_type', 'line') == 'bar':
+                ax.bar(self.data[plot_args['x']], self.data[y], label=y, color=color)
+            else:
+                ax.plot(self.data[plot_args['x']], self.data[y], label=y, color=color)
+        ax.set_xlabel(plot_args['x'])
+        ax.legend()
+        chart_filename = 'chart.png'
+        output_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'static', 'images')
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+        full_path = os.path.join(output_dir, chart_filename)
+        if os.path.exists(full_path):
+            os.remove(full_path)
+        plt.savefig(full_path)
+        logging.info(f"Chart generated and saved to {full_path}")
+        return os.path.join('static', 'images', chart_filename)

data/readme ADDED Viewed

	@@ -0,0 +1 @@


1	+

data/train_data.csv ADDED Viewed

	@@ -0,0 +1,100 @@

+query,arguments
+plot the sales in the years with red line,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'red'}"
+show employee expenses and net profit over the years,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
+display the EBITDA for each year with a blue bar,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'blue'}"
+plot the RoCE over time,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line'}"
+show the interest payments each year with a green bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'green'}"
+display the working capital percentage over the years,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
+plot the EBIT for each year with an orange line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'orange'}"
+show sales and EBIT over the years,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
+display the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
+plot the employee expenses each year with a red line,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line', 'color': 'red'}"
+show the annual sales in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
+display EBIT and EBITDA over the years,"{'x': 'Year', 'y': ['EBIT', 'EBITDA'], 'chart_type': 'line'}"
+plot the RoCE for each year with a purple line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'purple'}"
+show the interest and working capital percentage,"{'x': 'Year', 'y': ['interest', 'WC %'], 'chart_type': 'line'}"
+display the annual net profit with a blue bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'blue'}"
+plot the sales and employee expenses in a line chart,"{'x': 'Year', 'y': ['Sales', 'Employee expense'], 'chart_type': 'line'}"
+show the EBITDA for each year with a green bar,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'green'}"
+display the EBIT over time with an orange line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'orange'}"
+plot the net profit each year with a red bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'red'}"
+show the employee expenses in a line chart,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
+display the annual interest payments with a blue line,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line', 'color': 'blue'}"
+plot the RoCE and WC % over the years,"{'x': 'Year', 'y': ['RoCE', 'WC %'], 'chart_type': 'line'}"
+show the sales each year with an orange bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'orange'}"
+display EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
+plot the employee expenses and EBIT,"{'x': 'Year', 'y': ['Employee expense', 'EBIT'], 'chart_type': 'line'}"
+show the working capital percentage each year,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
+display the RoCE in a bar chart,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'bar'}"
+plot the annual sales with a green line,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'green'}"
+show the EBIT and interest over time,"{'x': 'Year', 'y': ['EBIT', 'interest'], 'chart_type': 'line'}"
+display the net profit each year with a purple bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'purple'}"
+plot the employee expenses over the years,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
+show the EBITDA in a line chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
+display EBIT and RoCE,"{'x': 'Year', 'y': ['EBIT', 'RoCE'], 'chart_type': 'line'}"
+plot the sales and net profit each year,"{'x': 'Year', 'y': ['Sales', 'Net profit'], 'chart_type': 'line'}"
+show the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
+display the working capital percentage with a red bar,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'bar', 'color': 'red'}"
+plot the RoCE for each year with a blue line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'blue'}"
+show the sales over the years in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
+display EBITDA and employee expenses,"{'x': 'Year', 'y': ['EBITDA', 'Employee expense'], 'chart_type': 'line'}"
+plot the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
+show the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
+display the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
+plot the EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
+show the RoCE each year with an orange line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'orange'}"
+display the sales and EBIT over time,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
+plot the working capital percentage annually,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
+show the interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
+display the annual net profit,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
+plot the employee expenses each year,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
+show the EBITDA in a bar chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar'}"
+display the EBIT with a red line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'red'}"
+plot the sales each year with an orange bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'orange'}"
+show the employee expenses and net profit,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
+display the RoCE and working capital percentage,"{'x': 'Year', 'y': ['RoCE', 'WC %'], 'chart_type': 'line'}"
+plot the EBITDA and EBIT,"{'x': 'Year', 'y': ['EBITDA', 'EBIT'], 'chart_type': 'line'}"
+show the sales in a line chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}"
+display the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
+plot the annual interest payments,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
+show the RoCE over time,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line'}"
+display the working capital percentage,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
+plot the EBITDA each year,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
+show the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
+display the EBIT and RoCE,"{'x': 'Year', 'y': ['EBIT', 'RoCE'], 'chart_type': 'line'}"
+plot the sales and net profit each year,"{'x': 'Year', 'y': ['Sales', 'Net profit'], 'chart_type': 'line'}"
+show the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
+display the working capital percentage with a red bar,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'bar', 'color': 'red'}"
+plot the RoCE for each year with a blue line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'blue'}"
+show the sales over the years in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
+display EBITDA and employee expenses,"{'x': 'Year', 'y': ['EBITDA', 'Employee expense'], 'chart_type': 'line'}"
+plot the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
+show the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
+display the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
+plot the EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
+show the RoCE each year with an orange line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'orange'}"
+display the sales and EBIT over time,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
+plot the working capital percentage annually,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
+show the interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
+display the annual net profit,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
+plot the EBIT and sales over time,"{'x': 'Year', 'y': ['EBIT', 'Sales'], 'chart_type': 'line'}"
+show the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
+display the employee expenses and EBITDA over the years,"{'x': 'Year', 'y': ['Employee expense', 'EBITDA'], 'chart_type': 'line'}"
+plot the RoCE for each year with a red line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'red'}"
+show the interest payments each year,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
+display the working capital percentage over time,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
+plot the EBIT for each year with a blue line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'blue'}"
+show sales and EBITDA over time,"{'x': 'Year', 'y': ['Sales', 'EBITDA'], 'chart_type': 'line'}"
+display the net profit and RoCE,"{'x': 'Year', 'y': ['Net profit', 'RoCE'], 'chart_type': 'line'}"
+plot the employee expenses and net profit each year,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
+show the EBITDA in a line chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
+display the sales and EBIT over the years,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
+plot the working capital percentage with a red line,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line', 'color': 'red'}"
+show the annual interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
+display the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
+plot the sales each year with a green bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'green'}"
+show the employee expenses and net profit over time,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
+display the RoCE and EBITDA,"{'x': 'Year', 'y': ['RoCE', 'EBITDA'], 'chart_type': 'line'}"
+plot the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
+show the EBIT and sales each year,"{'x': 'Year', 'y': ['EBIT', 'Sales'], 'chart_type': 'line'}"
+display the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"

data_processor.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import pandas as pd
+import os
+import logging
+class DataProcessor:
+    def __init__(self, data_path=None):
+        logging.info("Initializing DataProcessor")
+        # Allow dynamic data path (for user uploads), fallback to default
+        if data_path and os.path.exists(data_path):
+            self.data_path = data_path
+        else:
+            self.data_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'data', 'sample_data.xlsx')
+        self.data = self.load_data(self.data_path)
+    def load_data(self, path):
+        ext = os.path.splitext(path)[1].lower()
+        try:
+            if ext == '.csv':
+                data = pd.read_csv(path)
+            elif ext in ['.xls', '.xlsx']:
+                data = pd.read_excel(path)
+            else:
+                raise ValueError(f"Unsupported file type: {ext}")
+            logging.info(f"Loaded data from {path} with shape {data.shape}")
+            return data
+        except Exception as e:
+            logging.error(f"Failed to load data: {e}")
+            return pd.DataFrame()
+    def validate_columns(self, required_columns):
+        missing = [col for col in required_columns if col not in self.data.columns]
+        if missing:
+            logging.warning(f"Missing columns: {missing}")
+            return False, missing
+        return True, []
+    def get_columns(self):
+        return list(self.data.columns)
+    def preview(self, n=5):
+        return self.data.head(n).to_dict(orient='records')

image_verifier.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from PIL import Image
+import torch
+from transformers import CLIPProcessor, CLIPModel
+import os
+import logging
+import time
+from dotenv import load_dotenv
+load_dotenv()
+class ImageVerifier:
+    def __init__(self):
+        logging.info("Initializing ImageVerifier")
+        self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+        self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+    def verify(self, image_path, query):
+        start_time = time.time()
+        logging.info(f"Verifying image {image_path} with query: {query}")
+        full_image_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), image_path)
+        image = Image.open(full_image_path)
+        inputs = self.processor(text=[query], images=image, return_tensors="pt", padding=True)
+        outputs = self.model(**inputs)
+        logits_per_image = outputs.logits_per_image
+        probs = logits_per_image.softmax(dim=1)
+        verification_result = probs.argmax().item() == 0
+        end_time = time.time()
+        logging.info(f"Image verification result: {verification_result} in {end_time - start_time} seconds")
+        return verification_result

llm_agent.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from data_processor import DataProcessor
+from chart_generator import ChartGenerator
+from image_verifier import ImageVerifier
+from huggingface_hub import login
+import logging
+import time
+import os
+from dotenv import load_dotenv
+import ast
+import requests
+load_dotenv()
+class LLM_Agent:
+    def __init__(self, data_path=None):
+        logging.info("Initializing LLM_Agent")
+        self.data_processor = DataProcessor(data_path)
+        self.chart_generator = ChartGenerator(self.data_processor.data)
+        self.image_verifier = ImageVerifier()
+        model_path = os.path.join(os.path.dirname(__file__), "fine-tuned-bart-large")
+        self.query_tokenizer = AutoTokenizer.from_pretrained(model_path)
+        self.query_model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
+    def validate_plot_args(plot_args):
+        required_keys = ['x', 'y', 'chart_type']
+        if not all(key in plot_args for key in required_keys):
+            return False
+        if not isinstance(plot_args['y'], list):
+            plot_args['y'] = [plot_args['y']]
+        return True
+    def process_request(self, data):
+        start_time = time.time()
+        logging.info(f"Processing request data: {data}")
+        query = data['query']
+        data_path = data.get('file_path')
+        model_choice = data.get('model', 'bart')
+        # Few-shot + persona prompt for Flan-UL2 (best model)
+        flan_prompt = (
+            "You are VizBot, an expert data visualization assistant. "
+            "Given a user's natural language request about plotting data, output ONLY a valid Python dictionary with keys: x, y, chart_type, and color (if specified). "
+            "Do not include any explanation or extra text.\n\n"
+            "Example 1:\n"
+            "User: plot the sales in the years with red line\n"
+            "Output: {'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'red'}\n\n"
+            "Example 2:\n"
+            "User: show employee expenses and net profit over the years\n"
+            "Output: {'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}\n\n"
+            "Example 3:\n"
+            "User: display the EBITDA for each year with a blue bar\n"
+            "Output: {'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'blue'}\n\n"
+            f"User: {query}\nOutput:"
+        )
+        # Re-initialize data processor and chart generator if a file is specified
+        if data_path:
+            self.data_processor = DataProcessor(data_path)
+            self.chart_generator = ChartGenerator(self.data_processor.data)
+        if model_choice == 'bart':
+            # Use local fine-tuned BART model
+            inputs = self.query_tokenizer(query, return_tensors="pt", max_length=512, truncation=True)
+            outputs = self.query_model.generate(**inputs, max_length=100, num_return_sequences=1)
+            response_text = self.query_tokenizer.decode(outputs[0], skip_special_tokens=True)
+        elif model_choice == 'flan-t5-base':
+            # Use Hugging Face Inference API with Flan-T5-Base model
+            api_url = "https://api-inference.huggingface.co/models/google/flan-t5-base"
+            headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACEHUB_API_TOKEN')}", "Content-Type": "application/json"}
+            response = requests.post(api_url, headers=headers, json={"inputs": flan_prompt})
+            if response.status_code != 200:
+                logging.error(f"Hugging Face API error: {response.status_code} {response.text}")
+                response_text = "Error: Unable to get response from Flan-T5-Base API. Please try again later."
+            else:
+                try:
+                    resp_json = response.json()
+                    response_text = resp_json[0]['generated_text'] if isinstance(resp_json, list) else resp_json.get('generated_text', '')
+                except Exception as e:
+                    logging.error(f"Error parsing Hugging Face API response: {e}, raw: {response.text}")
+                    response_text = f"Error: Unexpected response from Flan-T5-Base API."
+        elif model_choice == 'flan-ul2':
+            # Use Hugging Face Inference API with Flan-UL2 model
+            api_url = "https://api-inference.huggingface.co/models/google/flan-ul2"
+            # Corrected model name to "google/flan-ul2" does not exist, use "google/flan-t5-xxl" as best available
+            api_url = "https://api-inference.huggingface.co/models/google/flan-t5-xxl"
+            headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACEHUB_API_TOKEN')}", "Content-Type": "application/json"}
+            response = requests.post(api_url, headers=headers, json={"inputs": flan_prompt})
+            if response.status_code != 200:
+                logging.error(f"Hugging Face API error: {response.status_code} {response.text}")
+                response_text = "Error: Unable to get response from Flan-T5-XXL API. Please try again later."
+            else:
+                try:
+                    resp_json = response.json()
+                    response_text = resp_json[0]['generated_text'] if isinstance(resp_json, list) else resp_json.get('generated_text', '')
+                except Exception as e:
+                    logging.error(f"Error parsing Hugging Face API response: {e}, raw: {response.text}")
+                    response_text = f"Error: Unexpected response from Flan-T5-XXL API."
+        else:
+            # Default fallback to local fine-tuned BART model
+            inputs = self.query_tokenizer(query, return_tensors="pt", max_length=512, truncation=True)
+            outputs = self.query_model.generate(**inputs, max_length=100, num_return_sequences=1)
+            response_text = self.query_tokenizer.decode(outputs[0], skip_special_tokens=True)
+        logging.info(f"LLM response text: {response_text}")
+        try:
+            plot_args = ast.literal_eval(response_text)
+        except (SyntaxError, ValueError):
+            plot_args = {'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}
+            logging.warning(f"Invalid LLM response. Using default plot args: {plot_args}")
+        if LLM_Agent.validate_plot_args(plot_args):
+            chart_path = self.chart_generator.generate_chart(plot_args)
+        else:
+            logging.warning("Invalid plot arguments. Using default.")
+            chart_path = self.chart_generator.generate_chart({'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'})
+        verified = self.image_verifier.verify(chart_path, query)
+        end_time = time.time()
+        logging.info(f"Processed request in {end_time - start_time} seconds")
+        return {
+            "response": response_text,
+            "chart_path": chart_path,
+            "verified": verified
+        }

readme ADDED Viewed

	@@ -0,0 +1 @@


1	+

requirements.txt ADDED Viewed

	@@ -0,0 +1,73 @@

+accelerate
+aiohttp
+aiosignal
+attrs
+blinker
+certifi
+charset-normalizer
+click
+colorama
+coloredlogs
+contourpy
+cycler
+datasets
+dill
+et-xmlfile
+filelock
+Flask
+Flask-Cors
+fonttools
+frozenlist
+fsspec
+huggingface-hub
+humanfriendly
+idna
+intel-openmp
+itsdangerous
+Jinja2
+joblib
+kiwisolver
+MarkupSafe
+matplotlib
+mkl
+mpmath
+multidict
+multiprocess
+networkx
+numpy
+openpyxl
+optimum
+packaging
+pandas
+pillow
+protobuf
+psutil
+pyarrow
+pyarrow-hotfix
+pyparsing
+pyreadline3
+python-dateutil
+python-dotenv
+pytz
+PyYAML
+regex
+requests
+safetensors
+scikit-learn
+scipy
+sentencepiece
+six
+sympy
+tbb
+threadpoolctl
+tokenizers
+torch
+torchvision
+tqdm
+transformers
+typing_extensions
+tzdata
+urllib3
+Werkzeug
+xxhash
+yarl

start.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+#!/bin/bash
+# Start script for backend Flask app on Hugging Face Spaces
+export FLASK_APP=app.py
+export FLASK_ENV=production
+# Run the Flask app on 0.0.0.0:7860
+python backend/app.py

train_model.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import pandas as pd
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments
+from sklearn.model_selection import train_test_split
+data = pd.read_csv('data/train_data.csv')
+queries = data['query'].tolist()
+arguments = data['arguments'].tolist()
+train_queries, eval_queries, train_arguments, eval_arguments = train_test_split(queries, arguments, test_size=0.2, random_state=42)
+tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large")
+train_encodings = tokenizer(train_queries, truncation=True, padding=True)
+eval_encodings = tokenizer(eval_queries, truncation=True, padding=True)
+with tokenizer.as_target_tokenizer():
+    train_labels = tokenizer(train_arguments, truncation=True, padding=True)
+    eval_labels = tokenizer(eval_arguments, truncation=True, padding=True)
+class PlotDataset(torch.utils.data.Dataset):
+    def __init__(self, encodings, labels):
+        self.encodings = encodings
+        self.labels = labels
+    def __getitem__(self, idx):
+        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
+        item['labels'] = torch.tensor(self.labels['input_ids'][idx])
+        return item
+    def __len__(self):
+        return len(self.encodings.input_ids)
+train_dataset = PlotDataset(train_encodings, train_labels)
+eval_dataset = PlotDataset(eval_encodings, eval_labels)
+training_args = Seq2SeqTrainingArguments(
+    output_dir='./results',
+    per_device_train_batch_size=2,
+    per_device_eval_batch_size=2,
+    num_train_epochs=3,
+    logging_dir='./logs',
+    logging_steps=10,
+    save_steps=500,
+    save_total_limit=2,
+    evaluation_strategy="epoch",
+    predict_with_generate=True,
+    generation_max_length=100,
+)
+trainer = Seq2SeqTrainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_dataset,
+    eval_dataset=eval_dataset,
+    tokenizer=tokenizer,
+)
+trainer.train()
+trainer.save_model("fine-tuned-bart-large")
+tokenizer.save_pretrained("fine-tuned-bart-large")
+print("Model and tokenizer fine-tuned and saved as 'fine-tuned-bart-large'")