wifix199 commited on
Commit
37839fd
Β·
verified Β·
1 Parent(s): 91c6887

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -25
app.py CHANGED
@@ -1,17 +1,9 @@
1
  #!/usr/bin/env python3
2
  """
3
  ai_csv_editor_hf.py ── AI-powered CSV editor using a Hugging Face model on CPU.
4
-
5
- Features:
6
- - Upload one or more CSV files (main + optional lookup tables)
7
- - Type spreadsheet-style commands: CONCAT, VLOOKUP, XLOOKUP, SUMIF
8
- - LLM (google/flan-t5-base) converts commands β†’ JSON β€œedit plan”
9
- - pandas applies each action in sequence
10
- - Preview first 20 rows & download modified CSV
11
  """
12
 
13
  import json
14
- import io
15
  import tempfile
16
  import textwrap
17
  import pathlib
@@ -31,14 +23,18 @@ TEMPERATURE = 0.0
31
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
32
  model = AutoModelForSeq2SeqLM.from_pretrained(
33
  MODEL_NAME,
34
- device_map="cpu", # force CPU
35
  torch_dtype="auto"
36
  )
 
 
37
  generator = pipeline(
38
  "text2text-generation",
39
  model=model,
40
  tokenizer=tokenizer,
41
- device=-1, # -1 = CPU
 
 
42
  )
43
 
44
  # ──────────────────────────────────────────────────────────
@@ -76,12 +72,7 @@ into JSON edit plans. Respond with ONLY valid JSON matching this schema:
76
 
77
  def plan_from_command(cmd: str) -> Dict[str, Any]:
78
  prompt = f"{SYSTEM_PROMPT}\n\nUser: {cmd}\nJSON:"
79
- output = generator(
80
- prompt,
81
- max_new_tokens=MAX_NEW_TOK,
82
- temperature=TEMPERATURE,
83
- do_sample=False,
84
- )[0]["generated_text"]
85
  try:
86
  return json.loads(output)
87
  except json.JSONDecodeError as e:
@@ -105,7 +96,6 @@ def apply_action(df: pd.DataFrame,
105
 
106
  elif op in {"vlookup", "xlookup"}:
107
  lookup_df = uploads[act["lookup_file"]]
108
- # select only the two relevant columns and rename for merging
109
  right = lookup_df[[act["lookup_column"], act["return_column"]]] \
110
  .rename(columns={
111
  act["lookup_column"]: act["lookup_value"],
@@ -130,29 +120,24 @@ def run_editor(files: List[gr.File], command: str):
130
  if not files:
131
  return None, "⚠️ Please upload at least one CSV file.", None
132
 
133
- # Load uploaded CSVs into a dictionary
134
  uploads = {
135
  pathlib.Path(f.name).name: pd.read_csv(f.name)
136
  for f in files
137
  }
138
- # Treat the first file as the main dataset
139
  main_name = list(uploads.keys())[0]
140
  df = uploads[main_name]
141
 
142
- # Generate plan
143
  try:
144
  plan = plan_from_command(command)
145
  except Exception as e:
146
  return None, f"❌ LLM error: {e}", None
147
 
148
- # Apply actions
149
  try:
150
  for act in plan["actions"]:
151
  df = apply_action(df, uploads, act)
152
  except Exception as e:
153
  return None, f"❌ Execution error: {e}", None
154
 
155
- # Write modified CSV to a temp file and return
156
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
157
  df.to_csv(tmp.name, index=False)
158
  return df.head(20), "βœ… Success! Download below.", tmp.name
@@ -170,9 +155,9 @@ with gr.Blocks(title="AI CSV Editor (HF, CPU)") as demo:
170
  download = gr.File(label="Download Result")
171
 
172
  run_btn.click(
173
- fn=run_editor,
174
- inputs=[csv_files, cmd_box],
175
- outputs=[preview, status, download]
176
  )
177
 
178
  if __name__ == "__main__":
 
1
  #!/usr/bin/env python3
2
  """
3
  ai_csv_editor_hf.py ── AI-powered CSV editor using a Hugging Face model on CPU.
 
 
 
 
 
 
 
4
  """
5
 
6
  import json
 
7
  import tempfile
8
  import textwrap
9
  import pathlib
 
23
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
24
  model = AutoModelForSeq2SeqLM.from_pretrained(
25
  MODEL_NAME,
26
+ device_map="cpu", # force CPU placement
27
  torch_dtype="auto"
28
  )
29
+
30
+ # **Important change**: no `device=` argument here!
31
  generator = pipeline(
32
  "text2text-generation",
33
  model=model,
34
  tokenizer=tokenizer,
35
+ max_new_tokens=MAX_NEW_TOK,
36
+ temperature=TEMPERATURE,
37
+ do_sample=False,
38
  )
39
 
40
  # ──────────────────────────────────────────────────────────
 
72
 
73
  def plan_from_command(cmd: str) -> Dict[str, Any]:
74
  prompt = f"{SYSTEM_PROMPT}\n\nUser: {cmd}\nJSON:"
75
+ output = generator(prompt)[0]["generated_text"]
 
 
 
 
 
76
  try:
77
  return json.loads(output)
78
  except json.JSONDecodeError as e:
 
96
 
97
  elif op in {"vlookup", "xlookup"}:
98
  lookup_df = uploads[act["lookup_file"]]
 
99
  right = lookup_df[[act["lookup_column"], act["return_column"]]] \
100
  .rename(columns={
101
  act["lookup_column"]: act["lookup_value"],
 
120
  if not files:
121
  return None, "⚠️ Please upload at least one CSV file.", None
122
 
 
123
  uploads = {
124
  pathlib.Path(f.name).name: pd.read_csv(f.name)
125
  for f in files
126
  }
 
127
  main_name = list(uploads.keys())[0]
128
  df = uploads[main_name]
129
 
 
130
  try:
131
  plan = plan_from_command(command)
132
  except Exception as e:
133
  return None, f"❌ LLM error: {e}", None
134
 
 
135
  try:
136
  for act in plan["actions"]:
137
  df = apply_action(df, uploads, act)
138
  except Exception as e:
139
  return None, f"❌ Execution error: {e}", None
140
 
 
141
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
142
  df.to_csv(tmp.name, index=False)
143
  return df.head(20), "βœ… Success! Download below.", tmp.name
 
155
  download = gr.File(label="Download Result")
156
 
157
  run_btn.click(
158
+ run_editor,
159
+ [csv_files, cmd_box],
160
+ [preview, status, download]
161
  )
162
 
163
  if __name__ == "__main__":