alohaboy
commited on
Commit
·
2185d4b
1
Parent(s):
449e8c9
Fix indentation error in guided mitigation methods
Browse files
app.py
CHANGED
@@ -404,15 +404,6 @@ Mitigated sentence:"""
|
|
404 |
if hate_tokens:
|
405 |
hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
|
406 |
prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nPlease remove hate speech or aggressive expressions, while maintaining the original intent (criticism, complaint, opinion, etc.).\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\n[Important] All offensive, derogatory, and explicit hate expressions (e.g., 씨발, 좆, 병신) must be deleted.\n\nMitigated sentence:"""
|
407 |
-
label_desc = {
|
408 |
-
"offensive": "Aggressive",
|
409 |
-
"L1_hate": "Mild Hate",
|
410 |
-
"L2_hate": "Severe Hate"
|
411 |
-
}
|
412 |
-
hate_tokens_str = ""
|
413 |
-
if hate_tokens:
|
414 |
-
hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
|
415 |
-
prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nPlease remove hate speech or aggressive expressions, while maintaining the original intent (criticism, complaint, opinion, etc.).\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\n[Important] All offensive, derogatory, and explicit hate expressions (e.g., 씨발, 좆, 병신) must be deleted.\n\nMitigated sentence:"""
|
416 |
# LLM inference
|
417 |
inputs = self.llm_tokenizer(prompt, return_tensors="pt").to(self.llm_model.device)
|
418 |
with torch.no_grad():
|
@@ -488,15 +479,6 @@ Mitigated sentence:"""
|
|
488 |
if hate_tokens:
|
489 |
hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
|
490 |
initial_prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nExpressions containing offensive words (e.g., 좃, 씨발, 병신) must be deleted.\nOther aggressive or inappropriate expressions should be mitigated by expressing them more politely and inclusively.\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\nMitigated sentence:"""
|
491 |
-
label_desc = {
|
492 |
-
"offensive": "Aggressive",
|
493 |
-
"L1_hate": "Mild Hate",
|
494 |
-
"L2_hate": "Severe Hate"
|
495 |
-
}
|
496 |
-
hate_tokens_str = ""
|
497 |
-
if hate_tokens:
|
498 |
-
hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
|
499 |
-
initial_prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nExpressions containing offensive words (e.g., 좃, 씨발, 병신) must be deleted.\nOther aggressive or inappropriate expressions should be mitigated by expressing them more politely and inclusively.\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\nMitigated sentence:"""
|
500 |
# Iterative mitigation and evaluation
|
501 |
max_iter = 3 # Reduced from 5 to 3 for Space deployment
|
502 |
metrics_history = []
|
|
|
404 |
if hate_tokens:
|
405 |
hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
|
406 |
prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nPlease remove hate speech or aggressive expressions, while maintaining the original intent (criticism, complaint, opinion, etc.).\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\n[Important] All offensive, derogatory, and explicit hate expressions (e.g., 씨발, 좆, 병신) must be deleted.\n\nMitigated sentence:"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
407 |
# LLM inference
|
408 |
inputs = self.llm_tokenizer(prompt, return_tensors="pt").to(self.llm_model.device)
|
409 |
with torch.no_grad():
|
|
|
479 |
if hate_tokens:
|
480 |
hate_tokens_str = "\nExpressions causing issues:\n" + "\n".join([f"• {token} ({bio_label})" for _, token, bio_label in hate_tokens[:5]])
|
481 |
initial_prompt = f"""The following sentence is classified as {label_desc.get(label, "harmful")} expression. \nExpressions containing offensive words (e.g., 좃, 씨발, 병신) must be deleted.\nOther aggressive or inappropriate expressions should be mitigated by expressing them more politely and inclusively.\n\nOriginal: {text}\nClassification: {label_desc.get(label, "harmful")} expression\n{hate_tokens_str}\n\nMitigated sentence:"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
482 |
# Iterative mitigation and evaluation
|
483 |
max_iter = 3 # Reduced from 5 to 3 for Space deployment
|
484 |
metrics_history = []
|