Spaces:

wifix199
/

Coding

Running

App Files Files Community

wifix199 commited on 4 days ago

Commit

bad52c6

verified ·

1 Parent(s): 12fc0b1

Update app.py

Browse files

Files changed (1) hide show

app.py +164 -49

app.py CHANGED Viewed

@@ -1,64 +1,179 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
 """
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
 if __name__ == "__main__":
     demo.launch()

+#!/usr/bin/env python3
 """
+ai_csv_editor_hf.py ── AI-powered CSV editor using a Hugging Face model on CPU.
+Features:
+- Upload one or more CSV files (main + optional lookup tables)
+- Type spreadsheet-style commands: CONCAT, VLOOKUP, XLOOKUP, SUMIF
+- LLM (google/flan-t5-base) converts commands → JSON “edit plan”
+- pandas applies each action in sequence
+- Preview first 20 rows & download modified CSV
 """
+import json
+import io
+import tempfile
+import textwrap
+import pathlib
+from typing import List, Dict, Any
+import pandas as pd
+import gradio as gr
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+# ──────────────────────────────────────────────────────────
+# 1.  LOAD A SMALL INSTRUCTION-FOLLOWING MODEL (CPU only)
+# ──────────────────────────────────────────────────────────
+MODEL_NAME  = "google/flan-t5-base"
+MAX_NEW_TOK  = 256
+TEMPERATURE  = 0.0
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model     = AutoModelForSeq2SeqLM.from_pretrained(
+                MODEL_NAME,
+                device_map="cpu",      # force CPU
+                torch_dtype="auto"
+            )
+generator = pipeline(
+    "text2text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    device=-1,                  # -1 = CPU
+)
+# ──────────────────────────────────────────────────────────
+# 2.  PROMPT → JSON “EDIT PLAN”
+# ──────────────────────────────────────────────────────────
+SYSTEM_PROMPT = textwrap.dedent("""\
+You are an assistant that converts natural-language spreadsheet commands
+into JSON edit plans. Respond with ONLY valid JSON matching this schema:
+{
+  "actions": [
+    {
+      "operation": "concat | vlookup | xlookup | sumif",
+      "target": "string",
+      # For CONCAT:
+      "columns": ["colA","colB"],
+      "separator": " ",
+      # For VLOOKUP / XLOOKUP:
+      "lookup_value": "KeyInMain",
+      "lookup_file": "other.csv",
+      "lookup_column": "KeyInOther",
+      "return_column": "Value",
+      "exact": true,
+      # For SUMIF:
+      "criteria_column": "Category",
+      "criteria": "Foo",
+      "sum_column": "Amount"
+    }
+  ]
+}
+""")
+def plan_from_command(cmd: str) -> Dict[str, Any]:
+    prompt = f"{SYSTEM_PROMPT}\n\nUser: {cmd}\nJSON:"
+    output = generator(
+        prompt,
+        max_new_tokens=MAX_NEW_TOK,
+        temperature=TEMPERATURE,
+        do_sample=False,
+    )[0]["generated_text"]
+    try:
+        return json.loads(output)
+    except json.JSONDecodeError as e:
+        raise ValueError(f"Model returned invalid JSON:\n{output}") from e
+# ──────────────────────────────────────────────────────────
+# 3.  DATA OPERATIONS
+# ──────────────────────────────────────────────────────────
+def apply_action(df: pd.DataFrame,
+                 uploads: Dict[str, pd.DataFrame],
+                 act: Dict[str, Any]) -> pd.DataFrame:
+    op = act["operation"]
+    if op == "concat":
+        sep = act.get("separator", "")
+        df[act["target"]] = (
+            df[act["columns"]]
+            .astype(str)
+            .agg(sep.join, axis=1)
+        )
+    elif op in {"vlookup", "xlookup"}:
+        lookup_df = uploads[act["lookup_file"]]
+        # select only the two relevant columns and rename for merging
+        right = lookup_df[[act["lookup_column"], act["return_column"]]] \
+            .rename(columns={
+                act["lookup_column"]: act["lookup_value"],
+                act["return_column"]: act["target"]
+            })
+        df = df.merge(right, on=act["lookup_value"], how="left")
+    elif op == "sumif":
+        mask = df[act["criteria_column"]] == act["criteria"]
+        total = df.loc[mask, act["sum_column"]].sum()
+        df[act["target"]] = total
+    else:
+        raise ValueError(f"Unsupported operation: {op}")
+    return df
+# ──────────────────────────────────────────────────────────
+# 4.  GRADIO UI
+# ──────────────────────────────────────────────────────────
+def run_editor(files: List[gr.File], command: str):
+    if not files:
+        return None, "⚠️ Please upload at least one CSV file.", None
+    # Load uploaded CSVs into a dictionary
+    uploads = {
+        pathlib.Path(f.name).name: pd.read_csv(f.name)
+        for f in files
+    }
+    # Treat the first file as the main dataset
+    main_name = list(uploads.keys())[0]
+    df = uploads[main_name]
+    # Generate plan
+    try:
+        plan = plan_from_command(command)
+    except Exception as e:
+        return None, f"❌ LLM error: {e}", None
+    # Apply actions
+    try:
+        for act in plan["actions"]:
+            df = apply_action(df, uploads, act)
+    except Exception as e:
+        return None, f"❌ Execution error: {e}", None
+    # Write modified CSV to a temp file and return
+    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
+    df.to_csv(tmp.name, index=False)
+    return df.head(20), "✅ Success! Download below.", tmp.name
+with gr.Blocks(title="AI CSV Editor (HF, CPU)") as demo:
+    gr.Markdown("## AI-powered CSV Editor  \n"
+                "1. Upload one main CSV (first) plus any lookup tables  \n"
+                "2. Type a spreadsheet-style instruction  \n"
+                "3. Download the modified CSV")
+    csv_files = gr.Files(file_types=[".csv"], label="Upload CSV file(s)")
+    cmd_box   = gr.Textbox(lines=2, placeholder="e.g. concat First Last → FullName")
+    run_btn   = gr.Button("Apply")
+    preview   = gr.Dataframe(label="Preview (first 20 rows)")
+    status    = gr.Markdown()
+    download  = gr.File(label="Download Result")
+    run_btn.click(
+        fn=run_editor,
+        inputs=[csv_files, cmd_box],
+        outputs=[preview, status, download]
+    )
 if __name__ == "__main__":
     demo.launch()