Spaces:

Cylanoid
/

Nursing-Home-Fraud-Detection-using-Llama

Paused

App Files Files Community

Cylanoid commited on Mar 5

Commit

cffe234

verified ·

1 Parent(s): ba6c89d

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -13

app.py CHANGED Viewed

@@ -74,18 +74,18 @@ def train_ui_tars(file):
         # Step 1: Load and preprocess the uploaded JSON file
         with open(file.name, "r", encoding="utf-8") as f:
             raw_data = json.load(f)
         # Extract training pairs or use flat structure
         training_data = raw_data.get("training_pairs", raw_data)
         # Save fixed JSON to avoid issues
         fixed_json_path = "fixed_fraud_data.json"
         with open(fixed_json_path, "w", encoding="utf-8") as f:
             json.dump(training_data, f, indent=4)
         # Load dataset
         dataset = datasets.load_dataset("json", data_files=fixed_json_path)
         # Step 2: Tokenize dataset with Llama-compatible context length
         def tokenize_data(example):
             # Format input for Llama (instruction-following style)
@@ -99,9 +99,9 @@ def train_ui_tars(file):
             )
             inputs["labels"] = inputs["input_ids"].clone()
             return {k: v.squeeze(0) for k, v in inputs.items()}
         tokenized_dataset = dataset["train"].map(tokenize_data, batched=True, remove_columns=dataset["train"].column_names)
         # Step 3: Training setup
         training_args = TrainingArguments(
             output_dir="./fine_tuned_llama",
@@ -120,7 +120,7 @@ def train_ui_tars(file):
             optim="adamw_torch",
             warmup_steps=100,
         )
         # Custom data collator for Llama
         def custom_data_collator(features):
             batch = {
@@ -136,16 +136,16 @@ def train_ui_tars(file):
             train_dataset=tokenized_dataset,
             data_collator=custom_data_collator,
         )
         # Step 4: Start training
         trainer.train()
         # Step 5: Save the model
         model.save_pretrained("./fine_tuned_llama")
         tokenizer.save_pretrained("./fine_tuned_llama")
         return "Training completed successfully! Model saved to ./fine_tuned_llama"
     except Exception as e:
         return f"Error: {str(e)}"
@@ -153,11 +153,11 @@ def train_ui_tars(file):
 with gr.Blocks(title="Model Fine-Tuning Interface") as demo:
     gr.Markdown("# Llama Fraud Detection Fine-Tuning UI")
     gr.Markdown("Upload a JSON file with 'input' and 'output' pairs to fine-tune the Llama model on your fraud dataset.")
     file_input = gr.File(label="Upload Fraud Dataset (JSON)")
     train_button = gr.Button("Start Fine-Tuning")
     output = gr.Textbox(label="Training Status")
     train_button.click(fn=train_ui_tars, inputs=file_input, outputs=output)
 demo.launch()

         # Step 1: Load and preprocess the uploaded JSON file
         with open(file.name, "r", encoding="utf-8") as f:
             raw_data = json.load(f)
         # Extract training pairs or use flat structure
         training_data = raw_data.get("training_pairs", raw_data)
         # Save fixed JSON to avoid issues
         fixed_json_path = "fixed_fraud_data.json"
         with open(fixed_json_path, "w", encoding="utf-8") as f:
             json.dump(training_data, f, indent=4)
         # Load dataset
         dataset = datasets.load_dataset("json", data_files=fixed_json_path)
         # Step 2: Tokenize dataset with Llama-compatible context length
         def tokenize_data(example):
             # Format input for Llama (instruction-following style)
             )
             inputs["labels"] = inputs["input_ids"].clone()
             return {k: v.squeeze(0) for k, v in inputs.items()}
         tokenized_dataset = dataset["train"].map(tokenize_data, batched=True, remove_columns=dataset["train"].column_names)
         # Step 3: Training setup
         training_args = TrainingArguments(
             output_dir="./fine_tuned_llama",
             optim="adamw_torch",
             warmup_steps=100,
         )
         # Custom data collator for Llama
         def custom_data_collator(features):
             batch = {
             train_dataset=tokenized_dataset,
             data_collator=custom_data_collator,
         )
         # Step 4: Start training
         trainer.train()
         # Step 5: Save the model
         model.save_pretrained("./fine_tuned_llama")
         tokenizer.save_pretrained("./fine_tuned_llama")
         return "Training completed successfully! Model saved to ./fine_tuned_llama"
     except Exception as e:
         return f"Error: {str(e)}"
 with gr.Blocks(title="Model Fine-Tuning Interface") as demo:
     gr.Markdown("# Llama Fraud Detection Fine-Tuning UI")
     gr.Markdown("Upload a JSON file with 'input' and 'output' pairs to fine-tune the Llama model on your fraud dataset.")
     file_input = gr.File(label="Upload Fraud Dataset (JSON)")
     train_button = gr.Button("Start Fine-Tuning")
     output = gr.Textbox(label="Training Status")
     train_button.click(fn=train_ui_tars, inputs=file_input, outputs=output)
 demo.launch()