misinfo_detection_app / scripts /expansive_claims_with_LLM.jl
stefanjwojcik's picture
add scripts
143b0d4 verified
## We're going to use OpenAI.jl expand upon very simple claims we already have
using OpenAI, Dates, DataFrames, CSV,ProgressMeter, JSON3
## First, we need to set the API key
api_key = ENV["OPENAI_API_KEY"]
systemprompt = """
Create a conversation between a misinformed user and a fact-checker. Given a misleading claim, expand on that claim to make it sound credible, then provide the fact-checker's response to correct it. Structure the conversation as alternating exchanges, with each misleading claim followed by a fact-checked response.
# Steps
1. Elaborate on the misleading claim, providing reasoning that a misinformed user might use to justify their belief.
2. Construct a response from the fact-checker that addresses each erroneous point, correcting the misinformation using clear and reliable information.
3. Alternate between "User" and "Fact-checker" dialogue, ensuring there are **at least 2 exchanges** per conversation.
4. Present results such that each interaction is divided into separate payloads for an API response.
# Output Format
Result should be formatted as JSON without code blocks:
{
"user_statements": [
{
"message": "[First misinformed user statement]"
},
{
"message": "[Second misinformed user statement if needed]"
}
],
"fact_checker_responses": [
{
"message": "[Fact-checker's response to the first user statement]"
},
{
"message": "[Fact-checker's response to the second user statement if needed]"
}
]
}
# Examples
Input:
The earth is flat
Output:
{
"user_statements": [
{
"message": "I've heard that the Earth is flat because if it were round, we would all fall off. Plus, they say there's no real proof of a round Earth, just some photoshopped images by space agencies. It just makes sense when you think about it."
}
],
"fact_checker_responses": [
{
"message": "Actually, the Earth isn't flat. Gravity keeps everything attached to the Earth's surface regardless of where we are on the globe, which explains why we don't fall off. Additionally, countless photos and scientific missions over decades have demonstrated that the Earth is round. The images of Earth from space are verified by experts worldwide and they come from many different agencies and companies, not just government entities. Private organizations, like SpaceX, have also provided evidence that the Earth is round."
}
]
}
Input:
Vaccines are dangerous
Output:
{
"user_statements": [
{
"message": "I read somewhere that vaccines are dangerous because they contain harmful chemicals like mercury, and they can cause severe diseases. Isn't that a huge risk to take?"
}
],
"fact_checker_responses": [
{
"message": "Vaccines do contain ingredients to help enhance their effectiveness, but they are used in very small, safe amounts. For instance, mercury is found in the form of Thimerosal, which serves as a preservative to prevent contamination and has been repeatedly found to be safe in those minimal amounts. Moreover, most modern vaccines no longer contain any mercury at all. Decades of research have shown that vaccines are far safer than the dangerous diseases they prevent, protecting millions of lives worldwide."
}
]
}
# Notes
- Ensure each claim is expanded to appear credible, using reasoning or information one might encounter from unreliable sources.
- Fact-checking responses should be direct and supported with verified facts.
- Keep each user statement clearly differentiated from the fact-checker's response to make it easy to parse through the API."""
"""
## create_expansive_claim: Function to generate a misleading claim based on a very simple claim
# Example
claimprompt = "vaccines are dangerous"
response = create_expansive_claim(claimprompt, systemprompt)
println(response.response.choices[1].message.content)
# Example
"""
function create_expansive_claim(claimprompt, systemprompt=systemprompt)
response = OpenAI.create_chat(
ENV["OPENAI_API_KEY"],
"gpt-4o",
[Dict("role" => "system", "content" => systemprompt),
Dict("role" => "user", "content" => claimprompt)]
)
return response
end
"""
## Function to parse the result of the expansive claim generation
"""
function get_misinfo_claim(response; kwargs...)
# Escape control characters in the JSON string
json_string = replace(response.response.choices[1].message.content, "\n" => "")
json_content_response = JSON3.read(json_string)
user_statements = String[]
for statement in json_content_response["user_statements"]
push!(user_statements, statement["message"])
end
return user_statements
end
"""
## Function to generate expansive claims based on a library of claims
# Example
expansive_claims_library = expansive_combined_library()
query_categories = ["climate change", "jewish people", "black people",
"immigration", "LGBTQ", "sexual and reproductive health"]
replace_dict = Dict("Climate Change" => "climate change",
"Anti-semitic" => "jewish people",
"Black" => "black people",
"Immigration" => "immigration",
"LGBTQ" => "LGBTQ",
"Reproductive health" => "sexual and reproductive health")
## Use replace dict to generate category where .Model equal the dict key
expansive_claims_library[!, :category] = [replace_dict[x] for x in expansive_claims_library.Model]
expansive_claims_library[!, :text] = expansive_claims_library.ExpandedClaim
CSV.write("data/expansive_claims_library.csv", expansive_claims_library)
"""
function expansive_combined_library(path::String= "data/Combined Misinformation Library.csv")
## Load the expansive claims library
expansive_claims_library = CSV.read(path, DataFrame)
expansive_claims_library[!, :ExpandedClaim] .= ""
@showprogress for (i, claim) in enumerate(expansive_claims_library.Claims)
response = create_expansive_claim(claim)
user_statements = get_misinfo_claim(response)
expansive_claims_library[i, :ExpandedClaim] = user_statements[1]
end
return expansive_claims_library
end
"""
include("scripts/expansive_claims_with_LLM.jl")
cl = CSV.read("data/expansive_claims_library.csv", DataFrame)
fill_expansive_claims_library!(cl)
CSV.write("data/expansive_claims_library_expanded.csv", cl)
"""
function fill_expansive_claims_library!(cl::DataFrame)
# Get all those with missing expanded claims
missing_claims = findall(ismissing, cl.ExpandedClaim)
@showprogress for i in missing_claims
claim = cl.Claims[i]
response = create_expansive_claim(claim)
user_statements = get_misinfo_claim(response)
cl[i, :ExpandedClaim] = user_statements[1]
end
end