File size: 1,199 Bytes
143b0d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import OstreaCultura as OC
using DataFrames, XLSX, CSV

df = DataFrame(XLSX.readtable("data/Misinformation Library with counterclaims.xlsx", "Climate"))
CSV.write("data/Climate Misinformation Library with counterclaims.csv", df)
claims = OC.DataLoader.pd.read_csv("data/Climate Misinformation Library with counterclaims.csv")
indexname = "ostreacultura-v1"
namespace = "cards-data"
claim = claims.Claims[1]
counterclaim = claims.Counterclaims[1]
threshold = .8
top_k = 100 # top_k for the initial query
#OC.query_claims(claims.Claims[1], claims.Counterclaims[1], indexname, namespace)

# Write a loop to query all claims, then assign the claim to the top k values
classified = DataFrame()
for i in 1:size(claims)[1]
    result = OC.query_claims(string(claims.Claims[i]), string(claims.Counterclaims[i]), indexname, namespace; top_k=100, include_values=false)
    if nrow(result) == 0
        println("No results found for claim: ", claims.Claims[i])
        continue
    else
        result.assigned_claim .= claims.Claims[i]
        classified = vcat(classified, result)    
    end
end

# Write the classified data to a csv file
using CSV
CSV.write("data/cards_top100_results.csv", classified)
##