|
|
|
|
|
|
|
|
|
""" |
|
## Calculates distances and assigns tentative classification |
|
""" |
|
function distances_and_classification(narrative_matrix, target_matrix) |
|
distances = pairwise(CosineDist(), target_matrix, narrative_matrix, dims=2) |
|
|
|
return distances[argmin(distances, dims=2)][:, 1], argmin(distances, dims=2)[:, 1] |
|
end |
|
|
|
""" |
|
## Assignments of closest claim and counterclaim to the test data |
|
""" |
|
function assignments!(narrative_matrix, target_matrix, narrative_embeddings, target_embeddings; kwargs...) |
|
claim_counter_claim = get(kwargs, :claim_counter_claim, "claim") |
|
dists, narrative_assignment = distances_and_classification(narrative_matrix, target_matrix) |
|
target_embeddings[:, "$(claim_counter_claim)Dist"] = dists |
|
target_embeddings[:, "Closest$(claim_counter_claim)"] = [narrative_embeddings[x[2], claim_counter_claim] for x in narrative_assignment[:, 1]] |
|
return nothing |
|
end |
|
|
|
""" |
|
## Get distances and assign the closest claim to the test data |
|
|
|
include("src/Narrative.jl") |
|
include("src/NarrativeClassification.jl") |
|
climate_narrative = create_example_narrative(); |
|
generate_claim_embeddings_from_narrative!(climate_narrative) |
|
candidate_data = candidate_embeddings(climate_narrative) |
|
get_distances!(climate_narrative, candidate_data) |
|
""" |
|
function get_distances!(narrative::Narrative, target_embeddings::DataFrame) |
|
|
|
narrative_embeddings = narrative_to_dataframe(narrative) |
|
narrative_matrix = hcat([claim.claimembedding for claim in narrative.claims]...) |
|
counternarrative_matrix = hcat([claim.counterclaimembedding for claim in narrative.claims]...) |
|
target_matrix = hcat(target_embeddings[:, "Embeddings"]...) |
|
|
|
|
|
assignments!(narrative_matrix, target_matrix, narrative_embeddings, target_embeddings, claim_counter_claim="claim") |
|
|
|
assignments!(counternarrative_matrix, target_matrix, narrative_embeddings, target_embeddings, claim_counter_claim="counterclaim") |
|
return nothing |
|
end |
|
|
|
function apply_gate_logic!(target_embeddings; kwargs...) |
|
threshold = get(kwargs, :threshold, 0.2) |
|
|
|
closer_to_claim = findall(target_embeddings[:, "claimDist"] .< target_embeddings[:, "counterclaimDist"]) |
|
|
|
meets_threshold = findall(target_embeddings[:, "claimDist"] .< threshold) |
|
|
|
target_embeddings[:, "OCLabel"] .= 0 |
|
target_embeddings[intersect(meets_threshold, closer_to_claim), "OCLabel"] .= 1 |
|
return nothing |
|
end |
|
|
|
""" |
|
## Deploy the narrative model |
|
- Input: narrative, threshold |
|
|
|
include("src/Narrative.jl") |
|
include("src/NarrativeClassification.jl") |
|
include("src/ExampleNarrative.jl") |
|
climate_narrative = create_example_narrative(); |
|
generate_claim_embeddings_from_narrative!(climate_narrative) |
|
candidate_data = candidate_embeddings_from_narrative(climate_narrative) |
|
get_distances!(climate_narrative, candidate_data) |
|
apply_gate_logic!(candidate_data; threshold=0.2) |
|
return_top_labels(candidate_data) |
|
|
|
""" |
|
function return_top_labels(target_embeddings; kwargs...) |
|
top_labels = get(kwargs, :top_labels, 10) |
|
|
|
out = target_embeddings[findall(target_embeddings[:, "OCLabel"] .== 1), :] |
|
|
|
sort!(out, :claimDist) |
|
return out[1:min(top_labels, nrow(out)), :] |
|
end |
|
|
|
function return_positive_candidates(target_embeddings) |
|
return target_embeddings[findall(target_embeddings[:, "OCLabel"] .== 1), :] |
|
end |
|
|
|
""" |
|
## Deploy the narrative model |
|
- Input: narrative, threshold |
|
|
|
include("src/Narrative.jl") |
|
include("src/NarrativeClassification.jl") |
|
include("src/ExampleNarrative.jl") |
|
climate_narrative = create_example_narrative(); |
|
deploy_narrative_model!(climate_narrative; threshold=0.2) |
|
""" |
|
function deploy_narrative_model!(narrative::Narrative; kwargs...) |
|
threshold = get(kwargs, :threshold, 0.2) |
|
db = get(kwargs, :db, "data/random_300k.csv") |
|
generate_claim_embeddings_from_narrative!(narrative) |
|
candidate_data = candidate_embeddings_from_narrative(narrative; db=db) |
|
get_distances!(narrative, candidate_data) |
|
apply_gate_logic!(candidate_data, threshold=threshold) |
|
return candidate_data |
|
end |