Update app.py
Browse files
app.py
CHANGED
@@ -1,17 +1,9 @@
|
|
1 |
#!/usr/bin/env python3
|
2 |
"""
|
3 |
ai_csv_editor_hf.py ββ AI-powered CSV editor using a Hugging Face model on CPU.
|
4 |
-
|
5 |
-
Features:
|
6 |
-
- Upload one or more CSV files (main + optional lookup tables)
|
7 |
-
- Type spreadsheet-style commands: CONCAT, VLOOKUP, XLOOKUP, SUMIF
|
8 |
-
- LLM (google/flan-t5-base) converts commands β JSON βedit planβ
|
9 |
-
- pandas applies each action in sequence
|
10 |
-
- Preview first 20 rows & download modified CSV
|
11 |
"""
|
12 |
|
13 |
import json
|
14 |
-
import io
|
15 |
import tempfile
|
16 |
import textwrap
|
17 |
import pathlib
|
@@ -31,14 +23,18 @@ TEMPERATURE = 0.0
|
|
31 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
32 |
model = AutoModelForSeq2SeqLM.from_pretrained(
|
33 |
MODEL_NAME,
|
34 |
-
device_map="cpu", # force CPU
|
35 |
torch_dtype="auto"
|
36 |
)
|
|
|
|
|
37 |
generator = pipeline(
|
38 |
"text2text-generation",
|
39 |
model=model,
|
40 |
tokenizer=tokenizer,
|
41 |
-
|
|
|
|
|
42 |
)
|
43 |
|
44 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
@@ -76,12 +72,7 @@ into JSON edit plans. Respond with ONLY valid JSON matching this schema:
|
|
76 |
|
77 |
def plan_from_command(cmd: str) -> Dict[str, Any]:
|
78 |
prompt = f"{SYSTEM_PROMPT}\n\nUser: {cmd}\nJSON:"
|
79 |
-
output = generator(
|
80 |
-
prompt,
|
81 |
-
max_new_tokens=MAX_NEW_TOK,
|
82 |
-
temperature=TEMPERATURE,
|
83 |
-
do_sample=False,
|
84 |
-
)[0]["generated_text"]
|
85 |
try:
|
86 |
return json.loads(output)
|
87 |
except json.JSONDecodeError as e:
|
@@ -105,7 +96,6 @@ def apply_action(df: pd.DataFrame,
|
|
105 |
|
106 |
elif op in {"vlookup", "xlookup"}:
|
107 |
lookup_df = uploads[act["lookup_file"]]
|
108 |
-
# select only the two relevant columns and rename for merging
|
109 |
right = lookup_df[[act["lookup_column"], act["return_column"]]] \
|
110 |
.rename(columns={
|
111 |
act["lookup_column"]: act["lookup_value"],
|
@@ -130,29 +120,24 @@ def run_editor(files: List[gr.File], command: str):
|
|
130 |
if not files:
|
131 |
return None, "β οΈ Please upload at least one CSV file.", None
|
132 |
|
133 |
-
# Load uploaded CSVs into a dictionary
|
134 |
uploads = {
|
135 |
pathlib.Path(f.name).name: pd.read_csv(f.name)
|
136 |
for f in files
|
137 |
}
|
138 |
-
# Treat the first file as the main dataset
|
139 |
main_name = list(uploads.keys())[0]
|
140 |
df = uploads[main_name]
|
141 |
|
142 |
-
# Generate plan
|
143 |
try:
|
144 |
plan = plan_from_command(command)
|
145 |
except Exception as e:
|
146 |
return None, f"β LLM error: {e}", None
|
147 |
|
148 |
-
# Apply actions
|
149 |
try:
|
150 |
for act in plan["actions"]:
|
151 |
df = apply_action(df, uploads, act)
|
152 |
except Exception as e:
|
153 |
return None, f"β Execution error: {e}", None
|
154 |
|
155 |
-
# Write modified CSV to a temp file and return
|
156 |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
|
157 |
df.to_csv(tmp.name, index=False)
|
158 |
return df.head(20), "β
Success! Download below.", tmp.name
|
@@ -170,9 +155,9 @@ with gr.Blocks(title="AI CSV Editor (HF, CPU)") as demo:
|
|
170 |
download = gr.File(label="Download Result")
|
171 |
|
172 |
run_btn.click(
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
)
|
177 |
|
178 |
if __name__ == "__main__":
|
|
|
1 |
#!/usr/bin/env python3
|
2 |
"""
|
3 |
ai_csv_editor_hf.py ββ AI-powered CSV editor using a Hugging Face model on CPU.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
"""
|
5 |
|
6 |
import json
|
|
|
7 |
import tempfile
|
8 |
import textwrap
|
9 |
import pathlib
|
|
|
23 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
24 |
model = AutoModelForSeq2SeqLM.from_pretrained(
|
25 |
MODEL_NAME,
|
26 |
+
device_map="cpu", # force CPU placement
|
27 |
torch_dtype="auto"
|
28 |
)
|
29 |
+
|
30 |
+
# **Important change**: no `device=` argument here!
|
31 |
generator = pipeline(
|
32 |
"text2text-generation",
|
33 |
model=model,
|
34 |
tokenizer=tokenizer,
|
35 |
+
max_new_tokens=MAX_NEW_TOK,
|
36 |
+
temperature=TEMPERATURE,
|
37 |
+
do_sample=False,
|
38 |
)
|
39 |
|
40 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
72 |
|
73 |
def plan_from_command(cmd: str) -> Dict[str, Any]:
|
74 |
prompt = f"{SYSTEM_PROMPT}\n\nUser: {cmd}\nJSON:"
|
75 |
+
output = generator(prompt)[0]["generated_text"]
|
|
|
|
|
|
|
|
|
|
|
76 |
try:
|
77 |
return json.loads(output)
|
78 |
except json.JSONDecodeError as e:
|
|
|
96 |
|
97 |
elif op in {"vlookup", "xlookup"}:
|
98 |
lookup_df = uploads[act["lookup_file"]]
|
|
|
99 |
right = lookup_df[[act["lookup_column"], act["return_column"]]] \
|
100 |
.rename(columns={
|
101 |
act["lookup_column"]: act["lookup_value"],
|
|
|
120 |
if not files:
|
121 |
return None, "β οΈ Please upload at least one CSV file.", None
|
122 |
|
|
|
123 |
uploads = {
|
124 |
pathlib.Path(f.name).name: pd.read_csv(f.name)
|
125 |
for f in files
|
126 |
}
|
|
|
127 |
main_name = list(uploads.keys())[0]
|
128 |
df = uploads[main_name]
|
129 |
|
|
|
130 |
try:
|
131 |
plan = plan_from_command(command)
|
132 |
except Exception as e:
|
133 |
return None, f"β LLM error: {e}", None
|
134 |
|
|
|
135 |
try:
|
136 |
for act in plan["actions"]:
|
137 |
df = apply_action(df, uploads, act)
|
138 |
except Exception as e:
|
139 |
return None, f"β Execution error: {e}", None
|
140 |
|
|
|
141 |
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
|
142 |
df.to_csv(tmp.name, index=False)
|
143 |
return df.head(20), "β
Success! Download below.", tmp.name
|
|
|
155 |
download = gr.File(label="Download Result")
|
156 |
|
157 |
run_btn.click(
|
158 |
+
run_editor,
|
159 |
+
[csv_files, cmd_box],
|
160 |
+
[preview, status, download]
|
161 |
)
|
162 |
|
163 |
if __name__ == "__main__":
|