Spaces:

brichett
/

tsgpt

Running

App Files Files Community

tsgpt / src /enforcement_module /policy_enforcement_decider.py

brichett

upload src folder

854f61d verified 11 months ago

raw

history blame

2.88 kB

	from types import ModuleType
	import math
	from langchain.llms import OpenAI
	import requests
	import requests.models


	import requests
	from hamilton.function_modifiers import config




	def enforcement_prompt(user_input: str, violation_context: dict) -> str:
	"""
	Generates the prompt to be sent to the LLM for determining the appropriate enforcement action.
	"""
	dio_name = violation_context.get("entity_name", "None")
	dio_details = violation_context.get("entity_info", {}).get("Summary", "Unknown")
	dio_category = violation_context.get("entity_info", {}).get("Policy Category", "Unknown")
	radicalization_flag = violation_context.get("extremism_detected", False)
	dio_sentiment = violation_context.get("aspect_sentiment", "None")

	prompt = (

	f"Based on the following content and context, determine the appropriate enforcement action:\\n\\n"
	f"Content: '{user_input}'\\n"
	f"Radicalization detected: {radicalization_flag}\\n"
	f"DIO Mentioned: {dio_name} (Category: {dio_category})\\n"
	f"System's Classification of Content Sentiment Towards DIO: {dio_sentiment}"
	# f"{user_info}\\n\\n"
	"Policy Summary: Any entity mentioned in the 'DIO Mentioned' field is from the Dangerous Individuals & Organizations (DIO) blacklist. "
	"If this entity is mentioned in the user content in a supportive, glorifying, or representative manner, it constitutes a violation of our platform's community standards. "
	"Our platform does not allow content that glorifies, supports, or represents individuals or organizations involved in violence, hate, or terrorism. "
	"These entities include terrorist groups, hate groups, violent non-state actors, and others who advocate or engage in violent activities. "
	"Any content that endorses or supports these entities or their actions is a policy violation. "
	"Users may discuss these topics neutrally or in a condemnatory manner, but any positive portrayal is prohibited. "
	"Based on this policy, choose one of the following enforcement actions: Warning, Limited Visibility, Temporary Suspension, Permanent Ban. "
	"Provide a brief explanation for your choice."
	)
	return prompt

	def get_enforcement_decision(enforcement_prompt: str, mistral_public_url: str) -> dict:
	"""
	Sends the enforcement prompt to the Mistral model server and retrieves the enforcement decision.
	"""
	input_text = {
	"context": enforcement_prompt,
	"question": "What is the appropriate enforcement action?"
	}

	response = requests.post(f'{mistral_public_url}/mistral-inference', json=input_text, stream=False)

	return {
	"enforcement_action": response.text.strip(),
	"prompt": enforcement_prompt
	}