Spaces:

stefanjwojcik
/

misinfo_detection_app

Running

App Files Files Community

misinfo_detection_app / scripts /expansive_claims_with_LLM.jl

stefanjwojcik

add scripts

143b0d4 verified 5 months ago

raw

history blame contribute delete

6.81 kB

	## We're going to use OpenAI.jl expand upon very simple claims we already have
	using OpenAI, Dates, DataFrames, CSV,ProgressMeter, JSON3

	## First, we need to set the API key
	api_key = ENV["OPENAI_API_KEY"]

	systemprompt = """
	Create a conversation between a misinformed user and a fact-checker. Given a misleading claim, expand on that claim to make it sound credible, then provide the fact-checker's response to correct it. Structure the conversation as alternating exchanges, with each misleading claim followed by a fact-checked response.

	# Steps

	1. Elaborate on the misleading claim, providing reasoning that a misinformed user might use to justify their belief.
	2. Construct a response from the fact-checker that addresses each erroneous point, correcting the misinformation using clear and reliable information.
	3. Alternate between "User" and "Fact-checker" dialogue, ensuring there are at least 2 exchanges per conversation.
	4. Present results such that each interaction is divided into separate payloads for an API response.

	# Output Format

	Result should be formatted as JSON without code blocks:
	{
	"user_statements": [
	{
	"message": "[First misinformed user statement]"
	},
	{
	"message": "[Second misinformed user statement if needed]"
	}
	],
	"fact_checker_responses": [
	{
	"message": "[Fact-checker's response to the first user statement]"
	},
	{
	"message": "[Fact-checker's response to the second user statement if needed]"
	}
	]
	}

	# Examples

	Input:

	The earth is flat

	Output:

	{
	"user_statements": [
	{
	"message": "I've heard that the Earth is flat because if it were round, we would all fall off. Plus, they say there's no real proof of a round Earth, just some photoshopped images by space agencies. It just makes sense when you think about it."
	}
	],
	"fact_checker_responses": [
	{
	"message": "Actually, the Earth isn't flat. Gravity keeps everything attached to the Earth's surface regardless of where we are on the globe, which explains why we don't fall off. Additionally, countless photos and scientific missions over decades have demonstrated that the Earth is round. The images of Earth from space are verified by experts worldwide and they come from many different agencies and companies, not just government entities. Private organizations, like SpaceX, have also provided evidence that the Earth is round."
	}
	]
	}

	Input:

	Vaccines are dangerous

	Output:

	{
	"user_statements": [
	{
	"message": "I read somewhere that vaccines are dangerous because they contain harmful chemicals like mercury, and they can cause severe diseases. Isn't that a huge risk to take?"
	}
	],
	"fact_checker_responses": [
	{
	"message": "Vaccines do contain ingredients to help enhance their effectiveness, but they are used in very small, safe amounts. For instance, mercury is found in the form of Thimerosal, which serves as a preservative to prevent contamination and has been repeatedly found to be safe in those minimal amounts. Moreover, most modern vaccines no longer contain any mercury at all. Decades of research have shown that vaccines are far safer than the dangerous diseases they prevent, protecting millions of lives worldwide."
	}
	]
	}

	# Notes

	- Ensure each claim is expanded to appear credible, using reasoning or information one might encounter from unreliable sources.
	- Fact-checking responses should be direct and supported with verified facts.
	- Keep each user statement clearly differentiated from the fact-checker's response to make it easy to parse through the API."""


	"""
	## create_expansive_claim: Function to generate a misleading claim based on a very simple claim

	# Example
	claimprompt = "vaccines are dangerous"
	response = create_expansive_claim(claimprompt, systemprompt)
	println(response.response.choices[1].message.content)

	# Example
	"""
	function create_expansive_claim(claimprompt, systemprompt=systemprompt)
	response = OpenAI.create_chat(
	ENV["OPENAI_API_KEY"],
	"gpt-4o",
	[Dict("role" => "system", "content" => systemprompt),
	Dict("role" => "user", "content" => claimprompt)]
	)
	return response
	end

	"""
	## Function to parse the result of the expansive claim generation

	"""
	function get_misinfo_claim(response; kwargs...)
	# Escape control characters in the JSON string
	json_string = replace(response.response.choices[1].message.content, "\n" => "")
	json_content_response = JSON3.read(json_string)
	user_statements = String[]
	for statement in json_content_response["user_statements"]
	push!(user_statements, statement["message"])
	end

	return user_statements
	end

	"""
	## Function to generate expansive claims based on a library of claims

	# Example
	expansive_claims_library = expansive_combined_library()
	query_categories = ["climate change", "jewish people", "black people",
	"immigration", "LGBTQ", "sexual and reproductive health"]
	replace_dict = Dict("Climate Change" => "climate change",
	"Anti-semitic" => "jewish people",
	"Black" => "black people",
	"Immigration" => "immigration",
	"LGBTQ" => "LGBTQ",
	"Reproductive health" => "sexual and reproductive health")
	## Use replace dict to generate category where .Model equal the dict key
	expansive_claims_library[!, :category] = [replace_dict[x] for x in expansive_claims_library.Model]
	expansive_claims_library[!, :text] = expansive_claims_library.ExpandedClaim
	CSV.write("data/expansive_claims_library.csv", expansive_claims_library)

	"""
	function expansive_combined_library(path::String= "data/Combined Misinformation Library.csv")
	## Load the expansive claims library
	expansive_claims_library = CSV.read(path, DataFrame)
	expansive_claims_library[!, :ExpandedClaim] .= ""
	@showprogress for (i, claim) in enumerate(expansive_claims_library.Claims)
	response = create_expansive_claim(claim)
	user_statements = get_misinfo_claim(response)
	expansive_claims_library[i, :ExpandedClaim] = user_statements[1]
	end
	return expansive_claims_library
	end

	"""
	include("scripts/expansive_claims_with_LLM.jl")
	cl = CSV.read("data/expansive_claims_library.csv", DataFrame)
	fill_expansive_claims_library!(cl)
	CSV.write("data/expansive_claims_library_expanded.csv", cl)
	"""
	function fill_expansive_claims_library!(cl::DataFrame)
	# Get all those with missing expanded claims
	missing_claims = findall(ismissing, cl.ExpandedClaim)
	@showprogress for i in missing_claims
	claim = cl.Claims[i]
	response = create_expansive_claim(claim)
	user_statements = get_misinfo_claim(response)
	cl[i, :ExpandedClaim] = user_statements[1]
	end
	end