## Database retrieval based on keywords ## need to ] add TidierDB@0.3.1 """ ## Calculates distances and assigns tentative classification """ function distances_and_classification(narrative_matrix, target_matrix) distances = pairwise(CosineDist(), target_matrix, narrative_matrix, dims=2) # get the index of the column with the smallest distance return distances[argmin(distances, dims=2)][:, 1], argmin(distances, dims=2)[:, 1] end """ ## Assignments of closest claim and counterclaim to the test data """ function assignments!(narrative_matrix, target_matrix, narrative_embeddings, target_embeddings; kwargs...) claim_counter_claim = get(kwargs, :claim_counter_claim, "claim") dists, narrative_assignment = distances_and_classification(narrative_matrix, target_matrix) target_embeddings[:, "$(claim_counter_claim)Dist"] = dists target_embeddings[:, "Closest$(claim_counter_claim)"] = [narrative_embeddings[x[2], claim_counter_claim] for x in narrative_assignment[:, 1]] return nothing end """ ## Get distances and assign the closest claim to the test data include("src/Narrative.jl") include("src/NarrativeClassification.jl") climate_narrative = create_example_narrative(); generate_claim_embeddings_from_narrative!(climate_narrative) candidate_data = candidate_embeddings(climate_narrative) get_distances!(climate_narrative, candidate_data) """ function get_distances!(narrative::Narrative, target_embeddings::DataFrame) ## Matrix of embeddings narrative_embeddings = narrative_to_dataframe(narrative) narrative_matrix = hcat([claim.claimembedding for claim in narrative.claims]...) counternarrative_matrix = hcat([claim.counterclaimembedding for claim in narrative.claims]...) target_matrix = hcat(target_embeddings[:, "Embeddings"]...) # Create a search function # Assign the closest claim to the test data assignments!(narrative_matrix, target_matrix, narrative_embeddings, target_embeddings, claim_counter_claim="claim") # Assign the closest counterclaim to the test data assignments!(counternarrative_matrix, target_matrix, narrative_embeddings, target_embeddings, claim_counter_claim="counterclaim") return nothing end function apply_gate_logic!(target_embeddings; kwargs...) threshold = get(kwargs, :threshold, 0.2) # Find those closer to claim than counter claim closer_to_claim = findall(target_embeddings[:, "claimDist"] .< target_embeddings[:, "counterclaimDist"]) # Meets the threshold meets_threshold = findall(target_embeddings[:, "claimDist"] .< threshold) # Meets the threshold and is closer to claim than counter claim target_embeddings[:, "OCLabel"] .= 0 target_embeddings[intersect(meets_threshold, closer_to_claim), "OCLabel"] .= 1 return nothing end """ ## Deploy the narrative model - Input: narrative, threshold include("src/Narrative.jl") include("src/NarrativeClassification.jl") include("src/ExampleNarrative.jl") climate_narrative = create_example_narrative(); generate_claim_embeddings_from_narrative!(climate_narrative) candidate_data = candidate_embeddings_from_narrative(climate_narrative) get_distances!(climate_narrative, candidate_data) apply_gate_logic!(candidate_data; threshold=0.2) return_top_labels(candidate_data) """ function return_top_labels(target_embeddings; kwargs...) top_labels = get(kwargs, :top_labels, 10) # Filter to "OCLabel" == 1 out = target_embeddings[findall(target_embeddings[:, "OCLabel"] .== 1), :] # sort by claimDist sort!(out, :claimDist) return out[1:min(top_labels, nrow(out)), :] end function return_positive_candidates(target_embeddings) return target_embeddings[findall(target_embeddings[:, "OCLabel"] .== 1), :] end """ ## Deploy the narrative model - Input: narrative, threshold include("src/Narrative.jl") include("src/NarrativeClassification.jl") include("src/ExampleNarrative.jl") climate_narrative = create_example_narrative(); deploy_narrative_model!(climate_narrative; threshold=0.2) """ function deploy_narrative_model!(narrative::Narrative; kwargs...) threshold = get(kwargs, :threshold, 0.2) db = get(kwargs, :db, "data/random_300k.csv") generate_claim_embeddings_from_narrative!(narrative) candidate_data = candidate_embeddings_from_narrative(narrative; db=db) get_distances!(narrative, candidate_data) apply_gate_logic!(candidate_data, threshold=threshold) return candidate_data end