=
Add compression and decompression functions for fact check data; update dependencies and remove obsolete files
fd342b4
#using CodecZlib | |
#using CSV | |
#using DataFrames | |
function compress_csv(input_path::String, output_path::String=input_path * ".gz") | |
println("Compressing $input_path to $output_path...") | |
open(input_path, "r") do input_io | |
open(output_path, "w") do output_io | |
stream = GzipCompressorStream(output_io) | |
write(stream, read(input_io)) | |
close(stream) | |
end | |
end | |
# Calculate compression ratio | |
original_size = filesize(input_path) | |
compressed_size = filesize(output_path) | |
ratio = (1 - compressed_size / original_size) * 100 | |
println("Compression complete: $(round(original_size / 1024^2, digits=2)) MB β $(round(compressed_size / 1024^2, digits=2)) MB ($(round(ratio, digits=1))% reduction)") | |
return output_path | |
end | |
function decompress_csv(input_path::String, output_path::String) | |
println("Decompressing $input_path to $output_path...") | |
open(input_path, "r") do input_io | |
open(output_path, "w") do output_io | |
stream = GzipDecompressorStream(input_io) | |
write(output_io, read(stream)) | |
close(stream) | |
end | |
end | |
println("Decompression complete!") | |
return output_path | |
end | |