|
import OstreaCultura as OC |
|
using DataFrames, XLSX, CSV |
|
|
|
df = DataFrame(XLSX.readtable("data/Misinformation Library with counterclaims.xlsx", "Climate")) |
|
CSV.write("data/Climate Misinformation Library with counterclaims.csv", df) |
|
claims = OC.DataLoader.pd.read_csv("data/Climate Misinformation Library with counterclaims.csv") |
|
indexname = "ostreacultura-v1" |
|
namespace = "cards-data" |
|
claim = claims.Claims[1] |
|
counterclaim = claims.Counterclaims[1] |
|
threshold = .8 |
|
top_k = 100 # top_k for the initial query |
|
#OC.query_claims(claims.Claims[1], claims.Counterclaims[1], indexname, namespace) |
|
|
|
# Write a loop to query all claims, then assign the claim to the top k values |
|
classified = DataFrame() |
|
for i in 1:size(claims)[1] |
|
result = OC.query_claims(string(claims.Claims[i]), string(claims.Counterclaims[i]), indexname, namespace; top_k=100, include_values=false) |
|
if nrow(result) == 0 |
|
println("No results found for claim: ", claims.Claims[i]) |
|
continue |
|
else |
|
result.assigned_claim .= claims.Claims[i] |
|
classified = vcat(classified, result) |
|
end |
|
end |
|
|
|
# Write the classified data to a csv file |
|
using CSV |
|
CSV.write("data/cards_top100_results.csv", classified) |
|
## |