# The files from the ntuplizer are produced in parts, so we need to copy them to the appropriate folders in order to have one dataset per signal hypothesis.
import argparse
import os


parser = argparse.ArgumentParser(description='Copy the files in appropriate folders - no matter how many parts are in the files') # /work/gkrzmanc/jetclustering/data/Feb26_2025_E1000_N500_noPartonFilter_GluonFix_Full/
parser.add_argument("--input", type=str, default="/work/gkrzmanc/jetclustering/data/Feb26_2025_E1000_N500_noPartonFilter_GluonFix_Full")
parser.add_argument("--output", type=str, default="/pnfs/psi.ch/cms/trivcat/store/user/gkrzmanc/jetclustering/data/Feb26_2025_E1000_N500_noPartonFilter_GluonFix_FullF")
parser.add_argument("--overwrite", action="store_true") # If True, it will overwrite the files, otherwise, it will skip files that have been already copied
# --input /pnfs/psi.ch/cms/trivcat/store/user/gkrzmanc/jetclustering/data/Feb26_2025_E1000_N500_noPartonFilter --output /pnfs/psi.ch/cms/trivcat/store/user/gkrzmanc/jetclustering/data/Feb26_2025_E1000_N500_noPartonFilter_Folders
# --input /pnfs/psi.ch/cms/trivcat/store/user/gkrzmanc/jetclustering/data/Feb26_2025_E1000_N500_noPartonFilter_C --output /pnfs/psi.ch/cms/trivcat/store/user/gkrzmanc/jetclustering/data/Feb26_2025_E1000_N500_noPartonFilter_C_Folders
# --input /work/gkrzmanc/jetclustering/data/Feb26_2025_E1000_N500_noPartonFilter_C --output /work/gkrzmanc/jetclustering/data/Feb26_2025_E1000_N500_noPartonFilter_C_F


args = parser.parse_args()

if not os.path.exists(args.output):
    os.makedirs(args.output)

for file in os.listdir(args.input):
    # if file is less than 0.5MB, ignore - it's probably still in processing
    if os.path.getsize(os.path.join(args.input, file)) < 0.5e6:
        continue
    # filename is like this: PFNano_s-channel_mMed-1100_mDark-20_rinv-0.7_alpha-peak_13TeV-pythia8_n-1000_part_1.root
    # make a dir PFNano_s-channel_mMed-1100_mDark-20_rinv-0.7_alpha-peak_13TeV-pythia8_n-1000 in output
    # and put part_X.root in it
    filename = file.split("_part_")[0]
    if not os.path.exists(os.path.join(args.output, filename)):
        os.makedirs(os.path.join(args.output, filename))
    # copy it
    print(f"Copying {file} to {os.path.join(args.output, filename)}")
    if args.overwrite or not os.path.exists( os.path.join(args.output, filename, "part_"+file.split("_part_")[1])):
        os.system(f"cp {os.path.join(args.input, file)} {os.path.join(args.output, filename)}")
        # rename it
        os.rename(os.path.join(args.output, filename, file), os.path.join(args.output, filename, "part_"+file.split("_part_")[1]))

print("Done")