alohaboy commited on
Commit
2185d4b
·
1 Parent(s): 449e8c9

Fix indentation error in guided mitigation methods

Browse files
Files changed (1) hide show
  1. app.py +0 -18
app.py CHANGED
@@ -404,15 +404,6 @@ Mitigated sentence:"""
404
  if hate_tokens:
405
  hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
406
  prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nPlease remove hate speech or aggressive expressions, while maintaining the original intent (criticism, complaint, opinion, etc.).\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\n[Important] All offensive, derogatory, and explicit hate expressions (e.g., 씨발, 좆, 병신) must be deleted.\n\nMitigated sentence:"""
407
- label_desc = {
408
- "offensive": "Aggressive",
409
- "L1_hate": "Mild Hate",
410
- "L2_hate": "Severe Hate"
411
- }
412
- hate_tokens_str = ""
413
- if hate_tokens:
414
- hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
415
- prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nPlease remove hate speech or aggressive expressions, while maintaining the original intent (criticism, complaint, opinion, etc.).\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\n[Important] All offensive, derogatory, and explicit hate expressions (e.g., 씨발, 좆, 병신) must be deleted.\n\nMitigated sentence:"""
416
  # LLM inference
417
  inputs = self.llm_tokenizer(prompt, return_tensors="pt").to(self.llm_model.device)
418
  with torch.no_grad():
@@ -488,15 +479,6 @@ Mitigated sentence:"""
488
  if hate_tokens:
489
  hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
490
  initial_prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nExpressions containing offensive words (e.g., 좃, 씨발, 병신) must be deleted.\nOther aggressive or inappropriate expressions should be mitigated by expressing them more politely and inclusively.\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\nMitigated sentence:"""
491
- label_desc = {
492
- "offensive": "Aggressive",
493
- "L1_hate": "Mild Hate",
494
- "L2_hate": "Severe Hate"
495
- }
496
- hate_tokens_str = ""
497
- if hate_tokens:
498
- hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
499
- initial_prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nExpressions containing offensive words (e.g., 좃, 씨발, 병신) must be deleted.\nOther aggressive or inappropriate expressions should be mitigated by expressing them more politely and inclusively.\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\nMitigated sentence:"""
500
  # Iterative mitigation and evaluation
501
  max_iter = 3 # Reduced from 5 to 3 for Space deployment
502
  metrics_history = []
 
404
  if hate_tokens:
405
  hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
406
  prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nPlease remove hate speech or aggressive expressions, while maintaining the original intent (criticism, complaint, opinion, etc.).\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\n[Important] All offensive, derogatory, and explicit hate expressions (e.g., 씨발, 좆, 병신) must be deleted.\n\nMitigated sentence:"""
 
 
 
 
 
 
 
 
 
407
  # LLM inference
408
  inputs = self.llm_tokenizer(prompt, return_tensors="pt").to(self.llm_model.device)
409
  with torch.no_grad():
 
479
  if hate_tokens:
480
  hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
481
  initial_prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nExpressions containing offensive words (e.g., 좃, 씨발, 병신) must be deleted.\nOther aggressive or inappropriate expressions should be mitigated by expressing them more politely and inclusively.\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\nMitigated sentence:"""
 
 
 
 
 
 
 
 
 
482
  # Iterative mitigation and evaluation
483
  max_iter = 3 # Reduced from 5 to 3 for Space deployment
484
  metrics_history = []