Mercurial > repos > dereeper > ragoo
comparison RaGOO/ragoo_utilities/get_ragoo_stats.py @ 13:b9a3aeb162ab draft default tip
Uploaded
| author | dereeper |
|---|---|
| date | Mon, 26 Jul 2021 18:22:37 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 12:68a9ec9ce51e | 13:b9a3aeb162ab |
|---|---|
| 1 import argparse | |
| 2 | |
| 3 parser = argparse.ArgumentParser(description='Summary stats about contig scaffolding with RaGOO.') | |
| 4 parser.add_argument("index", metavar="<contigs.fasta.fai>", type=str, help="Samtools fasta index file for input contigs. If chimera breaking mode was used, this must be" | |
| 5 "the index file of the chimera broken contigs, which can be found in ragoo_output/chimera_break." | |
| 6 "The correct file to use is the file with the .intra.chimera.broken.fa suffix.") | |
| 7 parser.add_argument("groupings", metavar="<groupings.fofn>", type=str, help="file of file names for all *_groupings.txt produced by RaGOO. Single column with full path to each grouping file.") | |
| 8 | |
| 9 args = parser.parse_args() | |
| 10 contigs_index = args.index | |
| 11 grouping_fofn = args.groupings | |
| 12 | |
| 13 remaining_ctg = [] | |
| 14 all_ctg_len = dict() | |
| 15 with open(contigs_index) as f: | |
| 16 for line in f: | |
| 17 L1 = line.split('\t') | |
| 18 all_ctg_len[L1[0]] = int(L1[1]) | |
| 19 remaining_ctg.append(L1[0]) | |
| 20 | |
| 21 grouping_files = [] | |
| 22 with open(grouping_fofn) as f: | |
| 23 for line in f: | |
| 24 grouping_files.append(line.rstrip()) | |
| 25 | |
| 26 num_ctg_localized = 0 | |
| 27 num_bp_localized = 0 | |
| 28 | |
| 29 for group_file in grouping_files: | |
| 30 with open(group_file) as f: | |
| 31 for line in f: | |
| 32 L1 = line.split('\t') | |
| 33 header = L1[0].rstrip() | |
| 34 num_ctg_localized += 1 | |
| 35 num_bp_localized += all_ctg_len[header] | |
| 36 assert header in remaining_ctg | |
| 37 remaining_ctg.pop(remaining_ctg.index(header)) | |
| 38 | |
| 39 num_ctg_unlocalized = 0 | |
| 40 num_bp_unlocalized = 0 | |
| 41 for ctg in remaining_ctg: | |
| 42 num_ctg_unlocalized += 1 | |
| 43 num_bp_unlocalized += all_ctg_len[ctg] | |
| 44 | |
| 45 print('%r contigs were localized by RaGOO' %(num_ctg_localized)) | |
| 46 print('%r bp were localized by RaGOO' %(num_bp_localized)) | |
| 47 print('%r contigs were unlocalized by RaGOO' %(num_ctg_unlocalized)) | |
| 48 print('%r bp were unlocalized by RaGOO' %(num_bp_unlocalized)) | |
| 49 | |
| 50 print('%r %% of contigs were localized by RaGOO' %((num_ctg_localized/(num_ctg_localized + num_ctg_unlocalized))*100)) | |
| 51 print('%r %% of bp were localized by RaGOO' %((num_bp_localized/(num_bp_localized + num_bp_unlocalized))*100)) | |
| 52 | |
| 53 | |
| 54 | |
| 55 |
