Spaces:

stefanjwojcik
/

misinfo_detection_app

Running

File size: 6,813 Bytes

143b0d4

## We're going to use OpenAI.jl expand upon very simple claims we already have 
using OpenAI, Dates, DataFrames, CSV,ProgressMeter, JSON3

## First, we need to set the API key
api_key = ENV["OPENAI_API_KEY"]

systemprompt = """
Create a conversation between a misinformed user and a fact-checker. Given a misleading claim, expand on that claim to make it sound credible, then provide the fact-checker's response to correct it. Structure the conversation as alternating exchanges, with each misleading claim followed by a fact-checked response.

# Steps

1. Elaborate on the misleading claim, providing reasoning that a misinformed user might use to justify their belief.
2. Construct a response from the fact-checker that addresses each erroneous point, correcting the misinformation using clear and reliable information.
3. Alternate between "User" and "Fact-checker" dialogue, ensuring there are **at least 2 exchanges** per conversation.
4. Present results such that each interaction is divided into separate payloads for an API response.

# Output Format

Result should be formatted as JSON without code blocks:
{
  "user_statements": [
    {
      "message": "[First misinformed user statement]"
    },
    {
      "message": "[Second misinformed user statement if needed]"
    }
  ],
  "fact_checker_responses": [
    {
      "message": "[Fact-checker's response to the first user statement]"
    },
    {
      "message": "[Fact-checker's response to the second user statement if needed]"
    }
  ]
}

# Examples

Input:

The earth is flat

Output:

{
  "user_statements": [
    {
      "message": "I've heard that the Earth is flat because if it were round, we would all fall off. Plus, they say there's no real proof of a round Earth, just some photoshopped images by space agencies. It just makes sense when you think about it."
    }
  ],
  "fact_checker_responses": [
    {
      "message": "Actually, the Earth isn't flat. Gravity keeps everything attached to the Earth's surface regardless of where we are on the globe, which explains why we don't fall off. Additionally, countless photos and scientific missions over decades have demonstrated that the Earth is round. The images of Earth from space are verified by experts worldwide and they come from many different agencies and companies, not just government entities. Private organizations, like SpaceX, have also provided evidence that the Earth is round."
    }
  ]
}

Input:

Vaccines are dangerous

Output:

{
  "user_statements": [
    {
      "message": "I read somewhere that vaccines are dangerous because they contain harmful chemicals like mercury, and they can cause severe diseases. Isn't that a huge risk to take?"
    }
  ],
  "fact_checker_responses": [
    {
      "message": "Vaccines do contain ingredients to help enhance their effectiveness, but they are used in very small, safe amounts. For instance, mercury is found in the form of Thimerosal, which serves as a preservative to prevent contamination and has been repeatedly found to be safe in those minimal amounts. Moreover, most modern vaccines no longer contain any mercury at all. Decades of research have shown that vaccines are far safer than the dangerous diseases they prevent, protecting millions of lives worldwide."
    }
  ]
}

# Notes

- Ensure each claim is expanded to appear credible, using reasoning or information one might encounter from unreliable sources.
- Fact-checking responses should be direct and supported with verified facts.
- Keep each user statement clearly differentiated from the fact-checker's response to make it easy to parse through the API."""


"""
## create_expansive_claim:  Function to generate a misleading claim based on a very simple claim 

# Example
claimprompt = "vaccines are dangerous"
response = create_expansive_claim(claimprompt, systemprompt)
println(response.response.choices[1].message.content)

# Example 
"""
function create_expansive_claim(claimprompt, systemprompt=systemprompt)
    response = OpenAI.create_chat(
        ENV["OPENAI_API_KEY"],
        "gpt-4o",
        [Dict("role" => "system", "content" => systemprompt),
        Dict("role" => "user", "content" => claimprompt)]
    )
    return response
end

"""
## Function to parse the result of the expansive claim generation  

"""
function get_misinfo_claim(response; kwargs...)
    # Escape control characters in the JSON string
    json_string = replace(response.response.choices[1].message.content, "\n" => "")
    json_content_response = JSON3.read(json_string)
    user_statements = String[]
    for statement in json_content_response["user_statements"]
        push!(user_statements, statement["message"])
    end

    return user_statements
end

"""
## Function to generate expansive claims based on a library of claims

# Example
expansive_claims_library = expansive_combined_library()
query_categories = ["climate change", "jewish people", "black people", 
        "immigration", "LGBTQ", "sexual and reproductive health"]
replace_dict = Dict("Climate Change" => "climate change", 
                    "Anti-semitic" => "jewish people",
                    "Black" => "black people", 
                    "Immigration" => "immigration", 
                    "LGBTQ" => "LGBTQ",
                    "Reproductive health" => "sexual and reproductive health")
## Use replace dict to generate category where .Model equal the dict key
expansive_claims_library[!, :category] = [replace_dict[x] for x in expansive_claims_library.Model]
expansive_claims_library[!, :text] = expansive_claims_library.ExpandedClaim
CSV.write("data/expansive_claims_library.csv", expansive_claims_library)

"""
function expansive_combined_library(path::String= "data/Combined Misinformation Library.csv")
    ## Load the expansive claims library
    expansive_claims_library = CSV.read(path, DataFrame)
    expansive_claims_library[!, :ExpandedClaim] .= ""
    @showprogress for (i, claim) in enumerate(expansive_claims_library.Claims)
        response = create_expansive_claim(claim)
        user_statements = get_misinfo_claim(response)
        expansive_claims_library[i, :ExpandedClaim] = user_statements[1]
    end
    return expansive_claims_library
end

"""
include("scripts/expansive_claims_with_LLM.jl")
cl = CSV.read("data/expansive_claims_library.csv", DataFrame)
fill_expansive_claims_library!(cl)
CSV.write("data/expansive_claims_library_expanded.csv", cl)
"""
function fill_expansive_claims_library!(cl::DataFrame)
  # Get all those with missing expanded claims 
  missing_claims = findall(ismissing, cl.ExpandedClaim)
    @showprogress for i in missing_claims
        claim = cl.Claims[i]
        response = create_expansive_claim(claim)
        user_statements = get_misinfo_claim(response)
        cl[i, :ExpandedClaim] = user_statements[1]
    end
end