“Transcendental-Programmer”
commited on
Commit
·
d773e1b
1
Parent(s):
1b807f7
feat: inital project files and Docker setup
Browse files- .gitignore +2 -0
- Dockerfile +12 -0
- README.md +37 -11
- __init__.py +1 -0
- app.py +76 -0
- chart_generator.py +45 -0
- data/readme +1 -0
- data/train_data.csv +100 -0
- data_processor.py +42 -0
- image_verifier.py +34 -0
- llm_agent.py +125 -0
- readme +1 -0
- requirements.txt +73 -0
- start.sh +8 -0
- train_model.py +64 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
__pycache__
|
Dockerfile
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10-slim
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY . .
|
6 |
+
|
7 |
+
RUN pip install --upgrade pip
|
8 |
+
RUN pip install -r requirements.txt
|
9 |
+
|
10 |
+
EXPOSE 7860
|
11 |
+
|
12 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
@@ -1,11 +1,37 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Backend for Excel Plotter App
|
2 |
+
|
3 |
+
This backend is a Flask application serving the Excel Plotter API.
|
4 |
+
|
5 |
+
## Deployment on Hugging Face Spaces
|
6 |
+
|
7 |
+
- Ensure the fine-tuned BART large model files are included in the `backend/fine-tuned-bart-large/` directory or uploaded to Hugging Face Hub.
|
8 |
+
- The app runs on port 7860.
|
9 |
+
- To start the app, run:
|
10 |
+
|
11 |
+
```bash
|
12 |
+
bash start.sh
|
13 |
+
```
|
14 |
+
|
15 |
+
- The `requirements.txt` includes all necessary dependencies.
|
16 |
+
- Make sure to set any required environment variables in the Hugging Face Space settings.
|
17 |
+
|
18 |
+
## Using the Fine-tuned BART Large Model from Hugging Face Hub
|
19 |
+
|
20 |
+
You can load the fine-tuned BART large model directly from Hugging Face Hub in your backend code as follows:
|
21 |
+
|
22 |
+
```python
|
23 |
+
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
24 |
+
|
25 |
+
model_name = "ArchCoder/fine-tuned-bart-large"
|
26 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
27 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
28 |
+
```
|
29 |
+
|
30 |
+
Replace `"ArchCoder/fine-tuned-bart-large"` with your actual model repository name if different.
|
31 |
+
|
32 |
+
Make sure your backend code (e.g., in `llm_agent.py` or wherever the model is loaded) uses this method to load the model from the Hub instead of local files.
|
33 |
+
|
34 |
+
## Notes
|
35 |
+
|
36 |
+
- Static files are served from the `static` directory.
|
37 |
+
- Adjust API URLs in the frontend to point to the deployed backend URL.
|
__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# backend/__init__.py
|
app.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, jsonify, send_from_directory
|
2 |
+
from flask_cors import CORS
|
3 |
+
from llm_agent import LLM_Agent
|
4 |
+
import os
|
5 |
+
import logging
|
6 |
+
import time
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
from werkzeug.utils import secure_filename
|
9 |
+
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
|
13 |
+
logging.basicConfig(level=logging.INFO)
|
14 |
+
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
15 |
+
logging.getLogger('PIL').setLevel(logging.WARNING)
|
16 |
+
|
17 |
+
app = Flask(__name__, static_folder=os.path.join(os.path.dirname(__file__), '..', 'static'))
|
18 |
+
|
19 |
+
CORS(app)
|
20 |
+
agent = LLM_Agent()
|
21 |
+
|
22 |
+
UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '..', 'data', 'uploads')
|
23 |
+
ALLOWED_EXTENSIONS = {'csv', 'xls', 'xlsx'}
|
24 |
+
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
25 |
+
|
26 |
+
if not os.path.exists(UPLOAD_FOLDER):
|
27 |
+
os.makedirs(UPLOAD_FOLDER)
|
28 |
+
|
29 |
+
def allowed_file(filename):
|
30 |
+
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
31 |
+
|
32 |
+
@app.route('/')
|
33 |
+
def index():
|
34 |
+
logging.info("Index route accessed")
|
35 |
+
return "Welcome to the Excel Plotter API. Use the /plot endpoint to make requests."
|
36 |
+
|
37 |
+
@app.route('/plot', methods=['POST'])
|
38 |
+
def plot():
|
39 |
+
start_time = time.time()
|
40 |
+
data = request.json
|
41 |
+
logging.info(f"Received request data: {data}")
|
42 |
+
|
43 |
+
response = agent.process_request(data)
|
44 |
+
|
45 |
+
end_time = time.time()
|
46 |
+
logging.info(f"Processed request in {end_time - start_time} seconds")
|
47 |
+
|
48 |
+
return jsonify(response)
|
49 |
+
|
50 |
+
|
51 |
+
@app.route('/static/<path:filename>')
|
52 |
+
def serve_static(filename):
|
53 |
+
logging.info(f"Serving static file: {filename}")
|
54 |
+
return send_from_directory(app.static_folder, filename)
|
55 |
+
|
56 |
+
@app.route('/upload', methods=['POST'])
|
57 |
+
def upload_file():
|
58 |
+
if 'file' not in request.files:
|
59 |
+
return jsonify({'error': 'No file part'}), 400
|
60 |
+
file = request.files['file']
|
61 |
+
if file.filename == '':
|
62 |
+
return jsonify({'error': 'No selected file'}), 400
|
63 |
+
if file and allowed_file(file.filename):
|
64 |
+
filename = secure_filename(file.filename)
|
65 |
+
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
66 |
+
file.save(file_path)
|
67 |
+
# Optionally, validate columns here using DataProcessor
|
68 |
+
dp = LLM_Agent().data_processor.__class__(file_path)
|
69 |
+
columns = dp.get_columns()
|
70 |
+
preview = dp.preview(5)
|
71 |
+
return jsonify({'message': 'File uploaded successfully', 'columns': columns, 'preview': preview, 'file_path': file_path})
|
72 |
+
else:
|
73 |
+
return jsonify({'error': 'Invalid file type'}), 400
|
74 |
+
|
75 |
+
if __name__ == '__main__':
|
76 |
+
app.run(host='0.0.0.0', port=7860)
|
chart_generator.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import matplotlib.pyplot as plt
|
2 |
+
import pandas as pd
|
3 |
+
import os
|
4 |
+
import logging
|
5 |
+
import time
|
6 |
+
|
7 |
+
class ChartGenerator:
|
8 |
+
def __init__(self, data=None):
|
9 |
+
logging.info("Initializing ChartGenerator")
|
10 |
+
if data is not None:
|
11 |
+
self.data = data
|
12 |
+
else:
|
13 |
+
self.data = pd.read_excel(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'data', 'sample_data.xlsx'))
|
14 |
+
|
15 |
+
def generate_chart(self, plot_args):
|
16 |
+
start_time = time.time()
|
17 |
+
logging.info(f"Generating chart with arguments: {plot_args}")
|
18 |
+
|
19 |
+
fig, ax = plt.subplots()
|
20 |
+
for y in plot_args['y']:
|
21 |
+
color = plot_args.get('color', None)
|
22 |
+
if plot_args.get('chart_type', 'line') == 'bar':
|
23 |
+
ax.bar(self.data[plot_args['x']], self.data[y], label=y, color=color)
|
24 |
+
else:
|
25 |
+
ax.plot(self.data[plot_args['x']], self.data[y], label=y, color=color)
|
26 |
+
|
27 |
+
ax.set_xlabel(plot_args['x'])
|
28 |
+
ax.legend()
|
29 |
+
|
30 |
+
|
31 |
+
chart_filename = 'chart.png'
|
32 |
+
output_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'static', 'images')
|
33 |
+
if not os.path.exists(output_dir):
|
34 |
+
os.makedirs(output_dir)
|
35 |
+
|
36 |
+
full_path = os.path.join(output_dir, chart_filename)
|
37 |
+
|
38 |
+
if os.path.exists(full_path):
|
39 |
+
os.remove(full_path)
|
40 |
+
|
41 |
+
plt.savefig(full_path)
|
42 |
+
|
43 |
+
logging.info(f"Chart generated and saved to {full_path}")
|
44 |
+
|
45 |
+
return os.path.join('static', 'images', chart_filename)
|
data/readme
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
data/train_data.csv
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
query,arguments
|
2 |
+
plot the sales in the years with red line,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'red'}"
|
3 |
+
show employee expenses and net profit over the years,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
|
4 |
+
display the EBITDA for each year with a blue bar,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'blue'}"
|
5 |
+
plot the RoCE over time,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line'}"
|
6 |
+
show the interest payments each year with a green bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'green'}"
|
7 |
+
display the working capital percentage over the years,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
|
8 |
+
plot the EBIT for each year with an orange line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'orange'}"
|
9 |
+
show sales and EBIT over the years,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
|
10 |
+
display the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
|
11 |
+
plot the employee expenses each year with a red line,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line', 'color': 'red'}"
|
12 |
+
show the annual sales in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
|
13 |
+
display EBIT and EBITDA over the years,"{'x': 'Year', 'y': ['EBIT', 'EBITDA'], 'chart_type': 'line'}"
|
14 |
+
plot the RoCE for each year with a purple line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'purple'}"
|
15 |
+
show the interest and working capital percentage,"{'x': 'Year', 'y': ['interest', 'WC %'], 'chart_type': 'line'}"
|
16 |
+
display the annual net profit with a blue bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'blue'}"
|
17 |
+
plot the sales and employee expenses in a line chart,"{'x': 'Year', 'y': ['Sales', 'Employee expense'], 'chart_type': 'line'}"
|
18 |
+
show the EBITDA for each year with a green bar,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'green'}"
|
19 |
+
display the EBIT over time with an orange line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'orange'}"
|
20 |
+
plot the net profit each year with a red bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'red'}"
|
21 |
+
show the employee expenses in a line chart,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
|
22 |
+
display the annual interest payments with a blue line,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line', 'color': 'blue'}"
|
23 |
+
plot the RoCE and WC % over the years,"{'x': 'Year', 'y': ['RoCE', 'WC %'], 'chart_type': 'line'}"
|
24 |
+
show the sales each year with an orange bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'orange'}"
|
25 |
+
display EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
|
26 |
+
plot the employee expenses and EBIT,"{'x': 'Year', 'y': ['Employee expense', 'EBIT'], 'chart_type': 'line'}"
|
27 |
+
show the working capital percentage each year,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
|
28 |
+
display the RoCE in a bar chart,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'bar'}"
|
29 |
+
plot the annual sales with a green line,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'green'}"
|
30 |
+
show the EBIT and interest over time,"{'x': 'Year', 'y': ['EBIT', 'interest'], 'chart_type': 'line'}"
|
31 |
+
display the net profit each year with a purple bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'purple'}"
|
32 |
+
plot the employee expenses over the years,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
|
33 |
+
show the EBITDA in a line chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
|
34 |
+
display EBIT and RoCE,"{'x': 'Year', 'y': ['EBIT', 'RoCE'], 'chart_type': 'line'}"
|
35 |
+
plot the sales and net profit each year,"{'x': 'Year', 'y': ['Sales', 'Net profit'], 'chart_type': 'line'}"
|
36 |
+
show the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
|
37 |
+
display the working capital percentage with a red bar,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'bar', 'color': 'red'}"
|
38 |
+
plot the RoCE for each year with a blue line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'blue'}"
|
39 |
+
show the sales over the years in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
|
40 |
+
display EBITDA and employee expenses,"{'x': 'Year', 'y': ['EBITDA', 'Employee expense'], 'chart_type': 'line'}"
|
41 |
+
plot the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
|
42 |
+
show the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
|
43 |
+
display the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
|
44 |
+
plot the EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
|
45 |
+
show the RoCE each year with an orange line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'orange'}"
|
46 |
+
display the sales and EBIT over time,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
|
47 |
+
plot the working capital percentage annually,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
|
48 |
+
show the interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
|
49 |
+
display the annual net profit,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
|
50 |
+
plot the employee expenses each year,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
|
51 |
+
show the EBITDA in a bar chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar'}"
|
52 |
+
display the EBIT with a red line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'red'}"
|
53 |
+
plot the sales each year with an orange bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'orange'}"
|
54 |
+
show the employee expenses and net profit,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
|
55 |
+
display the RoCE and working capital percentage,"{'x': 'Year', 'y': ['RoCE', 'WC %'], 'chart_type': 'line'}"
|
56 |
+
plot the EBITDA and EBIT,"{'x': 'Year', 'y': ['EBITDA', 'EBIT'], 'chart_type': 'line'}"
|
57 |
+
show the sales in a line chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}"
|
58 |
+
display the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
|
59 |
+
plot the annual interest payments,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
|
60 |
+
show the RoCE over time,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line'}"
|
61 |
+
display the working capital percentage,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
|
62 |
+
plot the EBITDA each year,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
|
63 |
+
show the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
|
64 |
+
display the EBIT and RoCE,"{'x': 'Year', 'y': ['EBIT', 'RoCE'], 'chart_type': 'line'}"
|
65 |
+
plot the sales and net profit each year,"{'x': 'Year', 'y': ['Sales', 'Net profit'], 'chart_type': 'line'}"
|
66 |
+
show the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
|
67 |
+
display the working capital percentage with a red bar,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'bar', 'color': 'red'}"
|
68 |
+
plot the RoCE for each year with a blue line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'blue'}"
|
69 |
+
show the sales over the years in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
|
70 |
+
display EBITDA and employee expenses,"{'x': 'Year', 'y': ['EBITDA', 'Employee expense'], 'chart_type': 'line'}"
|
71 |
+
plot the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
|
72 |
+
show the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
|
73 |
+
display the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
|
74 |
+
plot the EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
|
75 |
+
show the RoCE each year with an orange line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'orange'}"
|
76 |
+
display the sales and EBIT over time,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
|
77 |
+
plot the working capital percentage annually,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
|
78 |
+
show the interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
|
79 |
+
display the annual net profit,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
|
80 |
+
plot the EBIT and sales over time,"{'x': 'Year', 'y': ['EBIT', 'Sales'], 'chart_type': 'line'}"
|
81 |
+
show the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
|
82 |
+
display the employee expenses and EBITDA over the years,"{'x': 'Year', 'y': ['Employee expense', 'EBITDA'], 'chart_type': 'line'}"
|
83 |
+
plot the RoCE for each year with a red line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'red'}"
|
84 |
+
show the interest payments each year,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
|
85 |
+
display the working capital percentage over time,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
|
86 |
+
plot the EBIT for each year with a blue line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'blue'}"
|
87 |
+
show sales and EBITDA over time,"{'x': 'Year', 'y': ['Sales', 'EBITDA'], 'chart_type': 'line'}"
|
88 |
+
display the net profit and RoCE,"{'x': 'Year', 'y': ['Net profit', 'RoCE'], 'chart_type': 'line'}"
|
89 |
+
plot the employee expenses and net profit each year,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
|
90 |
+
show the EBITDA in a line chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
|
91 |
+
display the sales and EBIT over the years,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
|
92 |
+
plot the working capital percentage with a red line,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line', 'color': 'red'}"
|
93 |
+
show the annual interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
|
94 |
+
display the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
|
95 |
+
plot the sales each year with a green bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'green'}"
|
96 |
+
show the employee expenses and net profit over time,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
|
97 |
+
display the RoCE and EBITDA,"{'x': 'Year', 'y': ['RoCE', 'EBITDA'], 'chart_type': 'line'}"
|
98 |
+
plot the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
|
99 |
+
show the EBIT and sales each year,"{'x': 'Year', 'y': ['EBIT', 'Sales'], 'chart_type': 'line'}"
|
100 |
+
display the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
|
data_processor.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import os
|
3 |
+
import logging
|
4 |
+
|
5 |
+
class DataProcessor:
|
6 |
+
def __init__(self, data_path=None):
|
7 |
+
logging.info("Initializing DataProcessor")
|
8 |
+
# Allow dynamic data path (for user uploads), fallback to default
|
9 |
+
if data_path and os.path.exists(data_path):
|
10 |
+
self.data_path = data_path
|
11 |
+
else:
|
12 |
+
self.data_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'data', 'sample_data.xlsx')
|
13 |
+
self.data = self.load_data(self.data_path)
|
14 |
+
|
15 |
+
def load_data(self, path):
|
16 |
+
ext = os.path.splitext(path)[1].lower()
|
17 |
+
try:
|
18 |
+
if ext == '.csv':
|
19 |
+
data = pd.read_csv(path)
|
20 |
+
elif ext in ['.xls', '.xlsx']:
|
21 |
+
data = pd.read_excel(path)
|
22 |
+
else:
|
23 |
+
raise ValueError(f"Unsupported file type: {ext}")
|
24 |
+
logging.info(f"Loaded data from {path} with shape {data.shape}")
|
25 |
+
return data
|
26 |
+
except Exception as e:
|
27 |
+
logging.error(f"Failed to load data: {e}")
|
28 |
+
return pd.DataFrame()
|
29 |
+
|
30 |
+
def validate_columns(self, required_columns):
|
31 |
+
missing = [col for col in required_columns if col not in self.data.columns]
|
32 |
+
if missing:
|
33 |
+
logging.warning(f"Missing columns: {missing}")
|
34 |
+
return False, missing
|
35 |
+
return True, []
|
36 |
+
|
37 |
+
def get_columns(self):
|
38 |
+
return list(self.data.columns)
|
39 |
+
|
40 |
+
def preview(self, n=5):
|
41 |
+
return self.data.head(n).to_dict(orient='records')
|
42 |
+
|
image_verifier.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PIL import Image
|
2 |
+
import torch
|
3 |
+
from transformers import CLIPProcessor, CLIPModel
|
4 |
+
import os
|
5 |
+
import logging
|
6 |
+
import time
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
class ImageVerifier:
|
12 |
+
def __init__(self):
|
13 |
+
logging.info("Initializing ImageVerifier")
|
14 |
+
self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
15 |
+
self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
16 |
+
|
17 |
+
def verify(self, image_path, query):
|
18 |
+
start_time = time.time()
|
19 |
+
logging.info(f"Verifying image {image_path} with query: {query}")
|
20 |
+
|
21 |
+
full_image_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), image_path)
|
22 |
+
|
23 |
+
image = Image.open(full_image_path)
|
24 |
+
|
25 |
+
inputs = self.processor(text=[query], images=image, return_tensors="pt", padding=True)
|
26 |
+
outputs = self.model(**inputs)
|
27 |
+
logits_per_image = outputs.logits_per_image
|
28 |
+
probs = logits_per_image.softmax(dim=1)
|
29 |
+
|
30 |
+
verification_result = probs.argmax().item() == 0
|
31 |
+
end_time = time.time()
|
32 |
+
|
33 |
+
logging.info(f"Image verification result: {verification_result} in {end_time - start_time} seconds")
|
34 |
+
return verification_result
|
llm_agent.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
+
from data_processor import DataProcessor
|
4 |
+
from chart_generator import ChartGenerator
|
5 |
+
from image_verifier import ImageVerifier
|
6 |
+
from huggingface_hub import login
|
7 |
+
import logging
|
8 |
+
import time
|
9 |
+
import os
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
import ast
|
12 |
+
import requests
|
13 |
+
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
class LLM_Agent:
|
17 |
+
def __init__(self, data_path=None):
|
18 |
+
logging.info("Initializing LLM_Agent")
|
19 |
+
self.data_processor = DataProcessor(data_path)
|
20 |
+
self.chart_generator = ChartGenerator(self.data_processor.data)
|
21 |
+
self.image_verifier = ImageVerifier()
|
22 |
+
|
23 |
+
model_path = os.path.join(os.path.dirname(__file__), "fine-tuned-bart-large")
|
24 |
+
self.query_tokenizer = AutoTokenizer.from_pretrained(model_path)
|
25 |
+
self.query_model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
|
26 |
+
|
27 |
+
def validate_plot_args(plot_args):
|
28 |
+
required_keys = ['x', 'y', 'chart_type']
|
29 |
+
if not all(key in plot_args for key in required_keys):
|
30 |
+
return False
|
31 |
+
if not isinstance(plot_args['y'], list):
|
32 |
+
plot_args['y'] = [plot_args['y']]
|
33 |
+
return True
|
34 |
+
|
35 |
+
def process_request(self, data):
|
36 |
+
start_time = time.time()
|
37 |
+
logging.info(f"Processing request data: {data}")
|
38 |
+
query = data['query']
|
39 |
+
data_path = data.get('file_path')
|
40 |
+
model_choice = data.get('model', 'bart')
|
41 |
+
|
42 |
+
# Few-shot + persona prompt for Flan-UL2 (best model)
|
43 |
+
flan_prompt = (
|
44 |
+
"You are VizBot, an expert data visualization assistant. "
|
45 |
+
"Given a user's natural language request about plotting data, output ONLY a valid Python dictionary with keys: x, y, chart_type, and color (if specified). "
|
46 |
+
"Do not include any explanation or extra text.\n\n"
|
47 |
+
"Example 1:\n"
|
48 |
+
"User: plot the sales in the years with red line\n"
|
49 |
+
"Output: {'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'red'}\n\n"
|
50 |
+
"Example 2:\n"
|
51 |
+
"User: show employee expenses and net profit over the years\n"
|
52 |
+
"Output: {'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}\n\n"
|
53 |
+
"Example 3:\n"
|
54 |
+
"User: display the EBITDA for each year with a blue bar\n"
|
55 |
+
"Output: {'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'blue'}\n\n"
|
56 |
+
f"User: {query}\nOutput:"
|
57 |
+
)
|
58 |
+
|
59 |
+
# Re-initialize data processor and chart generator if a file is specified
|
60 |
+
if data_path:
|
61 |
+
self.data_processor = DataProcessor(data_path)
|
62 |
+
self.chart_generator = ChartGenerator(self.data_processor.data)
|
63 |
+
|
64 |
+
if model_choice == 'bart':
|
65 |
+
# Use local fine-tuned BART model
|
66 |
+
inputs = self.query_tokenizer(query, return_tensors="pt", max_length=512, truncation=True)
|
67 |
+
outputs = self.query_model.generate(**inputs, max_length=100, num_return_sequences=1)
|
68 |
+
response_text = self.query_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
69 |
+
elif model_choice == 'flan-t5-base':
|
70 |
+
# Use Hugging Face Inference API with Flan-T5-Base model
|
71 |
+
api_url = "https://api-inference.huggingface.co/models/google/flan-t5-base"
|
72 |
+
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACEHUB_API_TOKEN')}", "Content-Type": "application/json"}
|
73 |
+
response = requests.post(api_url, headers=headers, json={"inputs": flan_prompt})
|
74 |
+
if response.status_code != 200:
|
75 |
+
logging.error(f"Hugging Face API error: {response.status_code} {response.text}")
|
76 |
+
response_text = "Error: Unable to get response from Flan-T5-Base API. Please try again later."
|
77 |
+
else:
|
78 |
+
try:
|
79 |
+
resp_json = response.json()
|
80 |
+
response_text = resp_json[0]['generated_text'] if isinstance(resp_json, list) else resp_json.get('generated_text', '')
|
81 |
+
except Exception as e:
|
82 |
+
logging.error(f"Error parsing Hugging Face API response: {e}, raw: {response.text}")
|
83 |
+
response_text = f"Error: Unexpected response from Flan-T5-Base API."
|
84 |
+
elif model_choice == 'flan-ul2':
|
85 |
+
# Use Hugging Face Inference API with Flan-UL2 model
|
86 |
+
api_url = "https://api-inference.huggingface.co/models/google/flan-ul2"
|
87 |
+
# Corrected model name to "google/flan-ul2" does not exist, use "google/flan-t5-xxl" as best available
|
88 |
+
api_url = "https://api-inference.huggingface.co/models/google/flan-t5-xxl"
|
89 |
+
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACEHUB_API_TOKEN')}", "Content-Type": "application/json"}
|
90 |
+
response = requests.post(api_url, headers=headers, json={"inputs": flan_prompt})
|
91 |
+
if response.status_code != 200:
|
92 |
+
logging.error(f"Hugging Face API error: {response.status_code} {response.text}")
|
93 |
+
response_text = "Error: Unable to get response from Flan-T5-XXL API. Please try again later."
|
94 |
+
else:
|
95 |
+
try:
|
96 |
+
resp_json = response.json()
|
97 |
+
response_text = resp_json[0]['generated_text'] if isinstance(resp_json, list) else resp_json.get('generated_text', '')
|
98 |
+
except Exception as e:
|
99 |
+
logging.error(f"Error parsing Hugging Face API response: {e}, raw: {response.text}")
|
100 |
+
response_text = f"Error: Unexpected response from Flan-T5-XXL API."
|
101 |
+
else:
|
102 |
+
# Default fallback to local fine-tuned BART model
|
103 |
+
inputs = self.query_tokenizer(query, return_tensors="pt", max_length=512, truncation=True)
|
104 |
+
outputs = self.query_model.generate(**inputs, max_length=100, num_return_sequences=1)
|
105 |
+
response_text = self.query_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
106 |
+
|
107 |
+
logging.info(f"LLM response text: {response_text}")
|
108 |
+
try:
|
109 |
+
plot_args = ast.literal_eval(response_text)
|
110 |
+
except (SyntaxError, ValueError):
|
111 |
+
plot_args = {'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}
|
112 |
+
logging.warning(f"Invalid LLM response. Using default plot args: {plot_args}")
|
113 |
+
if LLM_Agent.validate_plot_args(plot_args):
|
114 |
+
chart_path = self.chart_generator.generate_chart(plot_args)
|
115 |
+
else:
|
116 |
+
logging.warning("Invalid plot arguments. Using default.")
|
117 |
+
chart_path = self.chart_generator.generate_chart({'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'})
|
118 |
+
verified = self.image_verifier.verify(chart_path, query)
|
119 |
+
end_time = time.time()
|
120 |
+
logging.info(f"Processed request in {end_time - start_time} seconds")
|
121 |
+
return {
|
122 |
+
"response": response_text,
|
123 |
+
"chart_path": chart_path,
|
124 |
+
"verified": verified
|
125 |
+
}
|
readme
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate
|
2 |
+
aiohttp
|
3 |
+
aiosignal
|
4 |
+
attrs
|
5 |
+
blinker
|
6 |
+
certifi
|
7 |
+
charset-normalizer
|
8 |
+
click
|
9 |
+
colorama
|
10 |
+
coloredlogs
|
11 |
+
contourpy
|
12 |
+
cycler
|
13 |
+
datasets
|
14 |
+
dill
|
15 |
+
et-xmlfile
|
16 |
+
filelock
|
17 |
+
Flask
|
18 |
+
Flask-Cors
|
19 |
+
fonttools
|
20 |
+
frozenlist
|
21 |
+
fsspec
|
22 |
+
huggingface-hub
|
23 |
+
humanfriendly
|
24 |
+
idna
|
25 |
+
intel-openmp
|
26 |
+
itsdangerous
|
27 |
+
Jinja2
|
28 |
+
joblib
|
29 |
+
kiwisolver
|
30 |
+
MarkupSafe
|
31 |
+
matplotlib
|
32 |
+
mkl
|
33 |
+
mpmath
|
34 |
+
multidict
|
35 |
+
multiprocess
|
36 |
+
networkx
|
37 |
+
numpy
|
38 |
+
openpyxl
|
39 |
+
optimum
|
40 |
+
packaging
|
41 |
+
pandas
|
42 |
+
pillow
|
43 |
+
protobuf
|
44 |
+
psutil
|
45 |
+
pyarrow
|
46 |
+
pyarrow-hotfix
|
47 |
+
pyparsing
|
48 |
+
pyreadline3
|
49 |
+
python-dateutil
|
50 |
+
python-dotenv
|
51 |
+
pytz
|
52 |
+
PyYAML
|
53 |
+
regex
|
54 |
+
requests
|
55 |
+
safetensors
|
56 |
+
scikit-learn
|
57 |
+
scipy
|
58 |
+
sentencepiece
|
59 |
+
six
|
60 |
+
sympy
|
61 |
+
tbb
|
62 |
+
threadpoolctl
|
63 |
+
tokenizers
|
64 |
+
torch
|
65 |
+
torchvision
|
66 |
+
tqdm
|
67 |
+
transformers
|
68 |
+
typing_extensions
|
69 |
+
tzdata
|
70 |
+
urllib3
|
71 |
+
Werkzeug
|
72 |
+
xxhash
|
73 |
+
yarl
|
start.sh
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
# Start script for backend Flask app on Hugging Face Spaces
|
3 |
+
|
4 |
+
export FLASK_APP=app.py
|
5 |
+
export FLASK_ENV=production
|
6 |
+
|
7 |
+
# Run the Flask app on 0.0.0.0:7860
|
8 |
+
python backend/app.py
|
train_model.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import torch
|
3 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments
|
4 |
+
from sklearn.model_selection import train_test_split
|
5 |
+
|
6 |
+
data = pd.read_csv('data/train_data.csv')
|
7 |
+
queries = data['query'].tolist()
|
8 |
+
arguments = data['arguments'].tolist()
|
9 |
+
|
10 |
+
train_queries, eval_queries, train_arguments, eval_arguments = train_test_split(queries, arguments, test_size=0.2, random_state=42)
|
11 |
+
|
12 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large")
|
13 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large")
|
14 |
+
|
15 |
+
train_encodings = tokenizer(train_queries, truncation=True, padding=True)
|
16 |
+
eval_encodings = tokenizer(eval_queries, truncation=True, padding=True)
|
17 |
+
|
18 |
+
with tokenizer.as_target_tokenizer():
|
19 |
+
train_labels = tokenizer(train_arguments, truncation=True, padding=True)
|
20 |
+
eval_labels = tokenizer(eval_arguments, truncation=True, padding=True)
|
21 |
+
|
22 |
+
class PlotDataset(torch.utils.data.Dataset):
|
23 |
+
def __init__(self, encodings, labels):
|
24 |
+
self.encodings = encodings
|
25 |
+
self.labels = labels
|
26 |
+
|
27 |
+
def __getitem__(self, idx):
|
28 |
+
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
|
29 |
+
item['labels'] = torch.tensor(self.labels['input_ids'][idx])
|
30 |
+
return item
|
31 |
+
|
32 |
+
def __len__(self):
|
33 |
+
return len(self.encodings.input_ids)
|
34 |
+
|
35 |
+
train_dataset = PlotDataset(train_encodings, train_labels)
|
36 |
+
eval_dataset = PlotDataset(eval_encodings, eval_labels)
|
37 |
+
|
38 |
+
training_args = Seq2SeqTrainingArguments(
|
39 |
+
output_dir='./results',
|
40 |
+
per_device_train_batch_size=2,
|
41 |
+
per_device_eval_batch_size=2,
|
42 |
+
num_train_epochs=3,
|
43 |
+
logging_dir='./logs',
|
44 |
+
logging_steps=10,
|
45 |
+
save_steps=500,
|
46 |
+
save_total_limit=2,
|
47 |
+
evaluation_strategy="epoch",
|
48 |
+
predict_with_generate=True,
|
49 |
+
generation_max_length=100,
|
50 |
+
)
|
51 |
+
trainer = Seq2SeqTrainer(
|
52 |
+
model=model,
|
53 |
+
args=training_args,
|
54 |
+
train_dataset=train_dataset,
|
55 |
+
eval_dataset=eval_dataset,
|
56 |
+
tokenizer=tokenizer,
|
57 |
+
)
|
58 |
+
|
59 |
+
trainer.train()
|
60 |
+
|
61 |
+
trainer.save_model("fine-tuned-bart-large")
|
62 |
+
tokenizer.save_pretrained("fine-tuned-bart-large")
|
63 |
+
|
64 |
+
print("Model and tokenizer fine-tuned and saved as 'fine-tuned-bart-large'")
|