“Transcendental-Programmer” commited on
Commit
d773e1b
·
1 Parent(s): 1b807f7

feat: inital project files and Docker setup

Browse files
Files changed (15) hide show
  1. .gitignore +2 -0
  2. Dockerfile +12 -0
  3. README.md +37 -11
  4. __init__.py +1 -0
  5. app.py +76 -0
  6. chart_generator.py +45 -0
  7. data/readme +1 -0
  8. data/train_data.csv +100 -0
  9. data_processor.py +42 -0
  10. image_verifier.py +34 -0
  11. llm_agent.py +125 -0
  12. readme +1 -0
  13. requirements.txt +73 -0
  14. start.sh +8 -0
  15. train_model.py +64 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ __pycache__
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /code
4
+
5
+ COPY . .
6
+
7
+ RUN pip install --upgrade pip
8
+ RUN pip install -r requirements.txt
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,11 +1,37 @@
1
- ---
2
- title: Llm Excel Plotter Agent
3
- emoji: 🌍
4
- colorFrom: purple
5
- colorTo: yellow
6
- sdk: docker
7
- pinned: false
8
- short_description: A Flask API for natural language-driven Excel data plotting,
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Backend for Excel Plotter App
2
+
3
+ This backend is a Flask application serving the Excel Plotter API.
4
+
5
+ ## Deployment on Hugging Face Spaces
6
+
7
+ - Ensure the fine-tuned BART large model files are included in the `backend/fine-tuned-bart-large/` directory or uploaded to Hugging Face Hub.
8
+ - The app runs on port 7860.
9
+ - To start the app, run:
10
+
11
+ ```bash
12
+ bash start.sh
13
+ ```
14
+
15
+ - The `requirements.txt` includes all necessary dependencies.
16
+ - Make sure to set any required environment variables in the Hugging Face Space settings.
17
+
18
+ ## Using the Fine-tuned BART Large Model from Hugging Face Hub
19
+
20
+ You can load the fine-tuned BART large model directly from Hugging Face Hub in your backend code as follows:
21
+
22
+ ```python
23
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
24
+
25
+ model_name = "ArchCoder/fine-tuned-bart-large"
26
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
27
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
28
+ ```
29
+
30
+ Replace `"ArchCoder/fine-tuned-bart-large"` with your actual model repository name if different.
31
+
32
+ Make sure your backend code (e.g., in `llm_agent.py` or wherever the model is loaded) uses this method to load the model from the Hub instead of local files.
33
+
34
+ ## Notes
35
+
36
+ - Static files are served from the `static` directory.
37
+ - Adjust API URLs in the frontend to point to the deployed backend URL.
__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # backend/__init__.py
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, send_from_directory
2
+ from flask_cors import CORS
3
+ from llm_agent import LLM_Agent
4
+ import os
5
+ import logging
6
+ import time
7
+ from dotenv import load_dotenv
8
+ from werkzeug.utils import secure_filename
9
+
10
+ load_dotenv()
11
+
12
+
13
+ logging.basicConfig(level=logging.INFO)
14
+ logging.getLogger('matplotlib').setLevel(logging.WARNING)
15
+ logging.getLogger('PIL').setLevel(logging.WARNING)
16
+
17
+ app = Flask(__name__, static_folder=os.path.join(os.path.dirname(__file__), '..', 'static'))
18
+
19
+ CORS(app)
20
+ agent = LLM_Agent()
21
+
22
+ UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '..', 'data', 'uploads')
23
+ ALLOWED_EXTENSIONS = {'csv', 'xls', 'xlsx'}
24
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
25
+
26
+ if not os.path.exists(UPLOAD_FOLDER):
27
+ os.makedirs(UPLOAD_FOLDER)
28
+
29
+ def allowed_file(filename):
30
+ return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
31
+
32
+ @app.route('/')
33
+ def index():
34
+ logging.info("Index route accessed")
35
+ return "Welcome to the Excel Plotter API. Use the /plot endpoint to make requests."
36
+
37
+ @app.route('/plot', methods=['POST'])
38
+ def plot():
39
+ start_time = time.time()
40
+ data = request.json
41
+ logging.info(f"Received request data: {data}")
42
+
43
+ response = agent.process_request(data)
44
+
45
+ end_time = time.time()
46
+ logging.info(f"Processed request in {end_time - start_time} seconds")
47
+
48
+ return jsonify(response)
49
+
50
+
51
+ @app.route('/static/<path:filename>')
52
+ def serve_static(filename):
53
+ logging.info(f"Serving static file: {filename}")
54
+ return send_from_directory(app.static_folder, filename)
55
+
56
+ @app.route('/upload', methods=['POST'])
57
+ def upload_file():
58
+ if 'file' not in request.files:
59
+ return jsonify({'error': 'No file part'}), 400
60
+ file = request.files['file']
61
+ if file.filename == '':
62
+ return jsonify({'error': 'No selected file'}), 400
63
+ if file and allowed_file(file.filename):
64
+ filename = secure_filename(file.filename)
65
+ file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
66
+ file.save(file_path)
67
+ # Optionally, validate columns here using DataProcessor
68
+ dp = LLM_Agent().data_processor.__class__(file_path)
69
+ columns = dp.get_columns()
70
+ preview = dp.preview(5)
71
+ return jsonify({'message': 'File uploaded successfully', 'columns': columns, 'preview': preview, 'file_path': file_path})
72
+ else:
73
+ return jsonify({'error': 'Invalid file type'}), 400
74
+
75
+ if __name__ == '__main__':
76
+ app.run(host='0.0.0.0', port=7860)
chart_generator.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import pandas as pd
3
+ import os
4
+ import logging
5
+ import time
6
+
7
+ class ChartGenerator:
8
+ def __init__(self, data=None):
9
+ logging.info("Initializing ChartGenerator")
10
+ if data is not None:
11
+ self.data = data
12
+ else:
13
+ self.data = pd.read_excel(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'data', 'sample_data.xlsx'))
14
+
15
+ def generate_chart(self, plot_args):
16
+ start_time = time.time()
17
+ logging.info(f"Generating chart with arguments: {plot_args}")
18
+
19
+ fig, ax = plt.subplots()
20
+ for y in plot_args['y']:
21
+ color = plot_args.get('color', None)
22
+ if plot_args.get('chart_type', 'line') == 'bar':
23
+ ax.bar(self.data[plot_args['x']], self.data[y], label=y, color=color)
24
+ else:
25
+ ax.plot(self.data[plot_args['x']], self.data[y], label=y, color=color)
26
+
27
+ ax.set_xlabel(plot_args['x'])
28
+ ax.legend()
29
+
30
+
31
+ chart_filename = 'chart.png'
32
+ output_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'static', 'images')
33
+ if not os.path.exists(output_dir):
34
+ os.makedirs(output_dir)
35
+
36
+ full_path = os.path.join(output_dir, chart_filename)
37
+
38
+ if os.path.exists(full_path):
39
+ os.remove(full_path)
40
+
41
+ plt.savefig(full_path)
42
+
43
+ logging.info(f"Chart generated and saved to {full_path}")
44
+
45
+ return os.path.join('static', 'images', chart_filename)
data/readme ADDED
@@ -0,0 +1 @@
 
 
1
+
data/train_data.csv ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ query,arguments
2
+ plot the sales in the years with red line,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'red'}"
3
+ show employee expenses and net profit over the years,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
4
+ display the EBITDA for each year with a blue bar,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'blue'}"
5
+ plot the RoCE over time,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line'}"
6
+ show the interest payments each year with a green bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'green'}"
7
+ display the working capital percentage over the years,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
8
+ plot the EBIT for each year with an orange line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'orange'}"
9
+ show sales and EBIT over the years,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
10
+ display the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
11
+ plot the employee expenses each year with a red line,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line', 'color': 'red'}"
12
+ show the annual sales in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
13
+ display EBIT and EBITDA over the years,"{'x': 'Year', 'y': ['EBIT', 'EBITDA'], 'chart_type': 'line'}"
14
+ plot the RoCE for each year with a purple line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'purple'}"
15
+ show the interest and working capital percentage,"{'x': 'Year', 'y': ['interest', 'WC %'], 'chart_type': 'line'}"
16
+ display the annual net profit with a blue bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'blue'}"
17
+ plot the sales and employee expenses in a line chart,"{'x': 'Year', 'y': ['Sales', 'Employee expense'], 'chart_type': 'line'}"
18
+ show the EBITDA for each year with a green bar,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'green'}"
19
+ display the EBIT over time with an orange line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'orange'}"
20
+ plot the net profit each year with a red bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'red'}"
21
+ show the employee expenses in a line chart,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
22
+ display the annual interest payments with a blue line,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line', 'color': 'blue'}"
23
+ plot the RoCE and WC % over the years,"{'x': 'Year', 'y': ['RoCE', 'WC %'], 'chart_type': 'line'}"
24
+ show the sales each year with an orange bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'orange'}"
25
+ display EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
26
+ plot the employee expenses and EBIT,"{'x': 'Year', 'y': ['Employee expense', 'EBIT'], 'chart_type': 'line'}"
27
+ show the working capital percentage each year,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
28
+ display the RoCE in a bar chart,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'bar'}"
29
+ plot the annual sales with a green line,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'green'}"
30
+ show the EBIT and interest over time,"{'x': 'Year', 'y': ['EBIT', 'interest'], 'chart_type': 'line'}"
31
+ display the net profit each year with a purple bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'purple'}"
32
+ plot the employee expenses over the years,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
33
+ show the EBITDA in a line chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
34
+ display EBIT and RoCE,"{'x': 'Year', 'y': ['EBIT', 'RoCE'], 'chart_type': 'line'}"
35
+ plot the sales and net profit each year,"{'x': 'Year', 'y': ['Sales', 'Net profit'], 'chart_type': 'line'}"
36
+ show the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
37
+ display the working capital percentage with a red bar,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'bar', 'color': 'red'}"
38
+ plot the RoCE for each year with a blue line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'blue'}"
39
+ show the sales over the years in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
40
+ display EBITDA and employee expenses,"{'x': 'Year', 'y': ['EBITDA', 'Employee expense'], 'chart_type': 'line'}"
41
+ plot the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
42
+ show the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
43
+ display the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
44
+ plot the EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
45
+ show the RoCE each year with an orange line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'orange'}"
46
+ display the sales and EBIT over time,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
47
+ plot the working capital percentage annually,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
48
+ show the interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
49
+ display the annual net profit,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
50
+ plot the employee expenses each year,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
51
+ show the EBITDA in a bar chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar'}"
52
+ display the EBIT with a red line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'red'}"
53
+ plot the sales each year with an orange bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'orange'}"
54
+ show the employee expenses and net profit,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
55
+ display the RoCE and working capital percentage,"{'x': 'Year', 'y': ['RoCE', 'WC %'], 'chart_type': 'line'}"
56
+ plot the EBITDA and EBIT,"{'x': 'Year', 'y': ['EBITDA', 'EBIT'], 'chart_type': 'line'}"
57
+ show the sales in a line chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}"
58
+ display the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
59
+ plot the annual interest payments,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
60
+ show the RoCE over time,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line'}"
61
+ display the working capital percentage,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
62
+ plot the EBITDA each year,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
63
+ show the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
64
+ display the EBIT and RoCE,"{'x': 'Year', 'y': ['EBIT', 'RoCE'], 'chart_type': 'line'}"
65
+ plot the sales and net profit each year,"{'x': 'Year', 'y': ['Sales', 'Net profit'], 'chart_type': 'line'}"
66
+ show the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
67
+ display the working capital percentage with a red bar,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'bar', 'color': 'red'}"
68
+ plot the RoCE for each year with a blue line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'blue'}"
69
+ show the sales over the years in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
70
+ display EBITDA and employee expenses,"{'x': 'Year', 'y': ['EBITDA', 'Employee expense'], 'chart_type': 'line'}"
71
+ plot the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
72
+ show the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
73
+ display the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
74
+ plot the EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
75
+ show the RoCE each year with an orange line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'orange'}"
76
+ display the sales and EBIT over time,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
77
+ plot the working capital percentage annually,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
78
+ show the interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
79
+ display the annual net profit,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
80
+ plot the EBIT and sales over time,"{'x': 'Year', 'y': ['EBIT', 'Sales'], 'chart_type': 'line'}"
81
+ show the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
82
+ display the employee expenses and EBITDA over the years,"{'x': 'Year', 'y': ['Employee expense', 'EBITDA'], 'chart_type': 'line'}"
83
+ plot the RoCE for each year with a red line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'red'}"
84
+ show the interest payments each year,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
85
+ display the working capital percentage over time,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
86
+ plot the EBIT for each year with a blue line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'blue'}"
87
+ show sales and EBITDA over time,"{'x': 'Year', 'y': ['Sales', 'EBITDA'], 'chart_type': 'line'}"
88
+ display the net profit and RoCE,"{'x': 'Year', 'y': ['Net profit', 'RoCE'], 'chart_type': 'line'}"
89
+ plot the employee expenses and net profit each year,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
90
+ show the EBITDA in a line chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
91
+ display the sales and EBIT over the years,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
92
+ plot the working capital percentage with a red line,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line', 'color': 'red'}"
93
+ show the annual interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
94
+ display the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
95
+ plot the sales each year with a green bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'green'}"
96
+ show the employee expenses and net profit over time,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
97
+ display the RoCE and EBITDA,"{'x': 'Year', 'y': ['RoCE', 'EBITDA'], 'chart_type': 'line'}"
98
+ plot the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
99
+ show the EBIT and sales each year,"{'x': 'Year', 'y': ['EBIT', 'Sales'], 'chart_type': 'line'}"
100
+ display the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
data_processor.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+ import logging
4
+
5
+ class DataProcessor:
6
+ def __init__(self, data_path=None):
7
+ logging.info("Initializing DataProcessor")
8
+ # Allow dynamic data path (for user uploads), fallback to default
9
+ if data_path and os.path.exists(data_path):
10
+ self.data_path = data_path
11
+ else:
12
+ self.data_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'data', 'sample_data.xlsx')
13
+ self.data = self.load_data(self.data_path)
14
+
15
+ def load_data(self, path):
16
+ ext = os.path.splitext(path)[1].lower()
17
+ try:
18
+ if ext == '.csv':
19
+ data = pd.read_csv(path)
20
+ elif ext in ['.xls', '.xlsx']:
21
+ data = pd.read_excel(path)
22
+ else:
23
+ raise ValueError(f"Unsupported file type: {ext}")
24
+ logging.info(f"Loaded data from {path} with shape {data.shape}")
25
+ return data
26
+ except Exception as e:
27
+ logging.error(f"Failed to load data: {e}")
28
+ return pd.DataFrame()
29
+
30
+ def validate_columns(self, required_columns):
31
+ missing = [col for col in required_columns if col not in self.data.columns]
32
+ if missing:
33
+ logging.warning(f"Missing columns: {missing}")
34
+ return False, missing
35
+ return True, []
36
+
37
+ def get_columns(self):
38
+ return list(self.data.columns)
39
+
40
+ def preview(self, n=5):
41
+ return self.data.head(n).to_dict(orient='records')
42
+
image_verifier.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import torch
3
+ from transformers import CLIPProcessor, CLIPModel
4
+ import os
5
+ import logging
6
+ import time
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv()
10
+
11
+ class ImageVerifier:
12
+ def __init__(self):
13
+ logging.info("Initializing ImageVerifier")
14
+ self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
15
+ self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
16
+
17
+ def verify(self, image_path, query):
18
+ start_time = time.time()
19
+ logging.info(f"Verifying image {image_path} with query: {query}")
20
+
21
+ full_image_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), image_path)
22
+
23
+ image = Image.open(full_image_path)
24
+
25
+ inputs = self.processor(text=[query], images=image, return_tensors="pt", padding=True)
26
+ outputs = self.model(**inputs)
27
+ logits_per_image = outputs.logits_per_image
28
+ probs = logits_per_image.softmax(dim=1)
29
+
30
+ verification_result = probs.argmax().item() == 0
31
+ end_time = time.time()
32
+
33
+ logging.info(f"Image verification result: {verification_result} in {end_time - start_time} seconds")
34
+ return verification_result
llm_agent.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ from data_processor import DataProcessor
4
+ from chart_generator import ChartGenerator
5
+ from image_verifier import ImageVerifier
6
+ from huggingface_hub import login
7
+ import logging
8
+ import time
9
+ import os
10
+ from dotenv import load_dotenv
11
+ import ast
12
+ import requests
13
+
14
+ load_dotenv()
15
+
16
+ class LLM_Agent:
17
+ def __init__(self, data_path=None):
18
+ logging.info("Initializing LLM_Agent")
19
+ self.data_processor = DataProcessor(data_path)
20
+ self.chart_generator = ChartGenerator(self.data_processor.data)
21
+ self.image_verifier = ImageVerifier()
22
+
23
+ model_path = os.path.join(os.path.dirname(__file__), "fine-tuned-bart-large")
24
+ self.query_tokenizer = AutoTokenizer.from_pretrained(model_path)
25
+ self.query_model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
26
+
27
+ def validate_plot_args(plot_args):
28
+ required_keys = ['x', 'y', 'chart_type']
29
+ if not all(key in plot_args for key in required_keys):
30
+ return False
31
+ if not isinstance(plot_args['y'], list):
32
+ plot_args['y'] = [plot_args['y']]
33
+ return True
34
+
35
+ def process_request(self, data):
36
+ start_time = time.time()
37
+ logging.info(f"Processing request data: {data}")
38
+ query = data['query']
39
+ data_path = data.get('file_path')
40
+ model_choice = data.get('model', 'bart')
41
+
42
+ # Few-shot + persona prompt for Flan-UL2 (best model)
43
+ flan_prompt = (
44
+ "You are VizBot, an expert data visualization assistant. "
45
+ "Given a user's natural language request about plotting data, output ONLY a valid Python dictionary with keys: x, y, chart_type, and color (if specified). "
46
+ "Do not include any explanation or extra text.\n\n"
47
+ "Example 1:\n"
48
+ "User: plot the sales in the years with red line\n"
49
+ "Output: {'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'red'}\n\n"
50
+ "Example 2:\n"
51
+ "User: show employee expenses and net profit over the years\n"
52
+ "Output: {'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}\n\n"
53
+ "Example 3:\n"
54
+ "User: display the EBITDA for each year with a blue bar\n"
55
+ "Output: {'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'blue'}\n\n"
56
+ f"User: {query}\nOutput:"
57
+ )
58
+
59
+ # Re-initialize data processor and chart generator if a file is specified
60
+ if data_path:
61
+ self.data_processor = DataProcessor(data_path)
62
+ self.chart_generator = ChartGenerator(self.data_processor.data)
63
+
64
+ if model_choice == 'bart':
65
+ # Use local fine-tuned BART model
66
+ inputs = self.query_tokenizer(query, return_tensors="pt", max_length=512, truncation=True)
67
+ outputs = self.query_model.generate(**inputs, max_length=100, num_return_sequences=1)
68
+ response_text = self.query_tokenizer.decode(outputs[0], skip_special_tokens=True)
69
+ elif model_choice == 'flan-t5-base':
70
+ # Use Hugging Face Inference API with Flan-T5-Base model
71
+ api_url = "https://api-inference.huggingface.co/models/google/flan-t5-base"
72
+ headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACEHUB_API_TOKEN')}", "Content-Type": "application/json"}
73
+ response = requests.post(api_url, headers=headers, json={"inputs": flan_prompt})
74
+ if response.status_code != 200:
75
+ logging.error(f"Hugging Face API error: {response.status_code} {response.text}")
76
+ response_text = "Error: Unable to get response from Flan-T5-Base API. Please try again later."
77
+ else:
78
+ try:
79
+ resp_json = response.json()
80
+ response_text = resp_json[0]['generated_text'] if isinstance(resp_json, list) else resp_json.get('generated_text', '')
81
+ except Exception as e:
82
+ logging.error(f"Error parsing Hugging Face API response: {e}, raw: {response.text}")
83
+ response_text = f"Error: Unexpected response from Flan-T5-Base API."
84
+ elif model_choice == 'flan-ul2':
85
+ # Use Hugging Face Inference API with Flan-UL2 model
86
+ api_url = "https://api-inference.huggingface.co/models/google/flan-ul2"
87
+ # Corrected model name to "google/flan-ul2" does not exist, use "google/flan-t5-xxl" as best available
88
+ api_url = "https://api-inference.huggingface.co/models/google/flan-t5-xxl"
89
+ headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACEHUB_API_TOKEN')}", "Content-Type": "application/json"}
90
+ response = requests.post(api_url, headers=headers, json={"inputs": flan_prompt})
91
+ if response.status_code != 200:
92
+ logging.error(f"Hugging Face API error: {response.status_code} {response.text}")
93
+ response_text = "Error: Unable to get response from Flan-T5-XXL API. Please try again later."
94
+ else:
95
+ try:
96
+ resp_json = response.json()
97
+ response_text = resp_json[0]['generated_text'] if isinstance(resp_json, list) else resp_json.get('generated_text', '')
98
+ except Exception as e:
99
+ logging.error(f"Error parsing Hugging Face API response: {e}, raw: {response.text}")
100
+ response_text = f"Error: Unexpected response from Flan-T5-XXL API."
101
+ else:
102
+ # Default fallback to local fine-tuned BART model
103
+ inputs = self.query_tokenizer(query, return_tensors="pt", max_length=512, truncation=True)
104
+ outputs = self.query_model.generate(**inputs, max_length=100, num_return_sequences=1)
105
+ response_text = self.query_tokenizer.decode(outputs[0], skip_special_tokens=True)
106
+
107
+ logging.info(f"LLM response text: {response_text}")
108
+ try:
109
+ plot_args = ast.literal_eval(response_text)
110
+ except (SyntaxError, ValueError):
111
+ plot_args = {'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}
112
+ logging.warning(f"Invalid LLM response. Using default plot args: {plot_args}")
113
+ if LLM_Agent.validate_plot_args(plot_args):
114
+ chart_path = self.chart_generator.generate_chart(plot_args)
115
+ else:
116
+ logging.warning("Invalid plot arguments. Using default.")
117
+ chart_path = self.chart_generator.generate_chart({'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'})
118
+ verified = self.image_verifier.verify(chart_path, query)
119
+ end_time = time.time()
120
+ logging.info(f"Processed request in {end_time - start_time} seconds")
121
+ return {
122
+ "response": response_text,
123
+ "chart_path": chart_path,
124
+ "verified": verified
125
+ }
readme ADDED
@@ -0,0 +1 @@
 
 
1
+
requirements.txt ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate
2
+ aiohttp
3
+ aiosignal
4
+ attrs
5
+ blinker
6
+ certifi
7
+ charset-normalizer
8
+ click
9
+ colorama
10
+ coloredlogs
11
+ contourpy
12
+ cycler
13
+ datasets
14
+ dill
15
+ et-xmlfile
16
+ filelock
17
+ Flask
18
+ Flask-Cors
19
+ fonttools
20
+ frozenlist
21
+ fsspec
22
+ huggingface-hub
23
+ humanfriendly
24
+ idna
25
+ intel-openmp
26
+ itsdangerous
27
+ Jinja2
28
+ joblib
29
+ kiwisolver
30
+ MarkupSafe
31
+ matplotlib
32
+ mkl
33
+ mpmath
34
+ multidict
35
+ multiprocess
36
+ networkx
37
+ numpy
38
+ openpyxl
39
+ optimum
40
+ packaging
41
+ pandas
42
+ pillow
43
+ protobuf
44
+ psutil
45
+ pyarrow
46
+ pyarrow-hotfix
47
+ pyparsing
48
+ pyreadline3
49
+ python-dateutil
50
+ python-dotenv
51
+ pytz
52
+ PyYAML
53
+ regex
54
+ requests
55
+ safetensors
56
+ scikit-learn
57
+ scipy
58
+ sentencepiece
59
+ six
60
+ sympy
61
+ tbb
62
+ threadpoolctl
63
+ tokenizers
64
+ torch
65
+ torchvision
66
+ tqdm
67
+ transformers
68
+ typing_extensions
69
+ tzdata
70
+ urllib3
71
+ Werkzeug
72
+ xxhash
73
+ yarl
start.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Start script for backend Flask app on Hugging Face Spaces
3
+
4
+ export FLASK_APP=app.py
5
+ export FLASK_ENV=production
6
+
7
+ # Run the Flask app on 0.0.0.0:7860
8
+ python backend/app.py
train_model.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ data = pd.read_csv('data/train_data.csv')
7
+ queries = data['query'].tolist()
8
+ arguments = data['arguments'].tolist()
9
+
10
+ train_queries, eval_queries, train_arguments, eval_arguments = train_test_split(queries, arguments, test_size=0.2, random_state=42)
11
+
12
+ tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large")
13
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large")
14
+
15
+ train_encodings = tokenizer(train_queries, truncation=True, padding=True)
16
+ eval_encodings = tokenizer(eval_queries, truncation=True, padding=True)
17
+
18
+ with tokenizer.as_target_tokenizer():
19
+ train_labels = tokenizer(train_arguments, truncation=True, padding=True)
20
+ eval_labels = tokenizer(eval_arguments, truncation=True, padding=True)
21
+
22
+ class PlotDataset(torch.utils.data.Dataset):
23
+ def __init__(self, encodings, labels):
24
+ self.encodings = encodings
25
+ self.labels = labels
26
+
27
+ def __getitem__(self, idx):
28
+ item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
29
+ item['labels'] = torch.tensor(self.labels['input_ids'][idx])
30
+ return item
31
+
32
+ def __len__(self):
33
+ return len(self.encodings.input_ids)
34
+
35
+ train_dataset = PlotDataset(train_encodings, train_labels)
36
+ eval_dataset = PlotDataset(eval_encodings, eval_labels)
37
+
38
+ training_args = Seq2SeqTrainingArguments(
39
+ output_dir='./results',
40
+ per_device_train_batch_size=2,
41
+ per_device_eval_batch_size=2,
42
+ num_train_epochs=3,
43
+ logging_dir='./logs',
44
+ logging_steps=10,
45
+ save_steps=500,
46
+ save_total_limit=2,
47
+ evaluation_strategy="epoch",
48
+ predict_with_generate=True,
49
+ generation_max_length=100,
50
+ )
51
+ trainer = Seq2SeqTrainer(
52
+ model=model,
53
+ args=training_args,
54
+ train_dataset=train_dataset,
55
+ eval_dataset=eval_dataset,
56
+ tokenizer=tokenizer,
57
+ )
58
+
59
+ trainer.train()
60
+
61
+ trainer.save_model("fine-tuned-bart-large")
62
+ tokenizer.save_pretrained("fine-tuned-bart-large")
63
+
64
+ print("Model and tokenizer fine-tuned and saved as 'fine-tuned-bart-large'")