Mercurial > repos > fubar > egapx_runner
comparison nf/subworkflows/ncbi/default/annot_builder/main.nf @ 0:d9c5c5b87fec draft
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
| author | fubar |
|---|---|
| date | Sat, 03 Aug 2024 11:16:53 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d9c5c5b87fec |
|---|---|
| 1 #!/usr/bin/env nextflow | |
| 2 nextflow.enable.dsl=2 | |
| 3 | |
| 4 include { merge_params } from '../../utilities' | |
| 5 | |
| 6 // annot_builder_main collection_dir ${output}/COLLECTION accept_dir ${output}/ACCEPT conflict_dir ${output}/CONFLICT report_dir ${output}/REPORT test_dir ${output}/TEST i | |
| 7 // reftrack_attrs_manifest ${input.reftrack_attrs} loss_pct_ccds 0.0 loss_pct_refseq_ref_primary 1.0 loss_pct_refseq_alt_ref_loci 100.0 loss_pct_refseq_patches 100.0 loss_pct_refseq_other 1.0 | |
| 8 // annot_builder_input name gnomon desc Gnomon aliases Gnomon|Chainer|PartAbInitio|FullAbInitio|Chainer_GapFilled|PartAbInitio_GapFilled | |
| 9 // is_primary 1 input_manifest ${input.gnomon_models} model_maker gnomon2model use_secondary_support 1 keep_top_N_models 50 | |
| 10 // drop_alt_brs_overlap 1 merge_variants 1 enable_AR0050_AR0048 1 max_pct_ab_initio 50 | |
| 11 // annot_builder -accept-output both -asn-cache ${GP_cache_dir} -conffile ${conffile} -gc-assembly-manifest ${input.gencoll_asn} -logfile ${logfile} | |
| 12 | |
| 13 // not implimented, future examples | |
| 14 //load_annot_builder_tracking_data -stats-xml ${output}/REPORT/stats.xml -taskrun ${taskrun.id} | |
| 15 //annot_builder_input name bestrs desc BestRefSeq is_primary 1 input_manifest ${input.best_rs_seqalign} model_maker splign2model user_filter lxr_data.is_refseq=1 | |
| 16 //annot_builder_input name ng desc Curated Genomic is_primary 1 input_manifest ${input.best_ng_seqalign} model_maker ng2model score_filter rank=1 exclude_subtypes CloneRef,misc_difference,STS,tRNA,variation,VariationRef exclude_types Biosrc,Pub | |
| 17 //annot_builder_input name imgt desc IMGT is_primary 1 input_manifest ${input.imgt} model_maker imgt2model use_secondary_support 0 | |
| 18 //annot_builder_input name rfam desc cmsearch aliases Rfam is_primary 1 input_manifest ${input.rfam} model_maker gnomon2model | |
| 19 //annot_builder_input name trna desc tRNAscan-SE is_primary 1 input_manifest ${input.trna_annot} model_maker passthru | |
| 20 //annot_builder_input name blessed desc SelectedGeneRepresentative is_primary 0 input_manifest ${input.best_rs_seqalign} model_maker splign2model score_filter rank=1 user_filter lxr_data.is_refseq=0 | |
| 21 | |
| 22 workflow annot_builder { | |
| 23 take: | |
| 24 gencoll_asn | |
| 25 gnomon_file | |
| 26 genome_asn | |
| 27 parameters // Map : extra parameter and parameter update | |
| 28 main: | |
| 29 | |
| 30 def m = annot_builder_main('outdir', params).collect() | |
| 31 def i = annot_builder_input('outdir', m, '01', gnomon_file, params) | |
| 32 // FIXME: intended params 4-5 to be lists of all input files and all input manifests, but it complained with only one entry | |
| 33 def (all, accept, accept_ftable, annot) = annot_builder_run('outdir', i[0], gencoll_asn, i[1], gnomon_file, genome_asn, params) | |
| 34 | |
| 35 emit: | |
| 36 outputs = all | |
| 37 accept_asn = accept | |
| 38 accept_ftable_annot = accept_ftable | |
| 39 annot_files = annot | |
| 40 } | |
| 41 | |
| 42 | |
| 43 // [Main] | |
| 44 // accept_dir = "/netmnt/vast01/gpi/prod/GPIPE_PROD/data00/Lagenorhynchus_albirostris/1.1.470794/6389647/annot_builder.46361542/out/ACCEPT" | |
| 45 // collection_dir = "/netmnt/vast01/gpi/prod/GPIPE_PROD/data00/Lagenorhynchus_albirostris/1.1.470794/6389647/annot_builder.46361542/out/COLLECTION" | |
| 46 // conflict_dir = "/netmnt/vast01/gpi/prod/GPIPE_PROD/data00/Lagenorhynchus_albirostris/1.1.470794/6389647/annot_builder.46361542/out/CONFLICT" | |
| 47 // loss_pct_ccds = "0.0" | |
| 48 // loss_pct_refseq_alt_ref_loci = "100.0" | |
| 49 // loss_pct_refseq_other = "1.0" | |
| 50 // loss_pct_refseq_patches = "100.0" | |
| 51 // loss_pct_refseq_ref_primary = "1.0" | |
| 52 // reftrack_attrs_manifest = "/netmnt/vast01/gpi/prod/GPIPE_PROD/data00/Lagenorhynchus_albirostris/1.1.470794/6389647/annot_builder.46361542/inp/reftrack_attrs.mft" | |
| 53 // report_dir = "/netmnt/vast01/gpi/prod/GPIPE_PROD/data00/Lagenorhynchus_albirostris/1.1.470794/6389647/annot_builder.46361542/out/REPORT" | |
| 54 // test_dir = "/netmnt/vast01/gpi/prod/GPIPE_PROD/data00/Lagenorhynchus_albirostris/1.1.470794/6389647/annot_builder.46361542/out/TEST" | |
| 55 | |
| 56 process annot_builder_main { | |
| 57 input: | |
| 58 val outdir | |
| 59 val params | |
| 60 output: | |
| 61 path "annot_builder_main.ini" | |
| 62 script: | |
| 63 """ | |
| 64 #!/usr/bin/env python3 | |
| 65 with open('annot_builder_main.ini', 'w') as outf: | |
| 66 print('[Main]', file=outf) | |
| 67 print('accept_dir = "$outdir/ACCEPT"', file=outf) | |
| 68 print('collection_dir = "$outdir/COLLECTION"', file=outf) | |
| 69 print('conflict_dir = "$outdir/CONFLICT"', file=outf) | |
| 70 print('loss_pct_ccds = "0.0"', file=outf) | |
| 71 print('loss_pct_refseq_alt_ref_loci = "100.0"', file=outf) | |
| 72 print('loss_pct_refseq_other = "1.0"', file=outf) | |
| 73 print('loss_pct_refseq_patches = "100.0"', file=outf) | |
| 74 print('loss_pct_refseq_ref_primary = "1.0"', file=outf) | |
| 75 print('report_dir = "$outdir/REPORT"', file=outf) | |
| 76 print('test_dir = "$outdir/TEST"', file=outf) | |
| 77 """ | |
| 78 stub: | |
| 79 """ | |
| 80 touch annot_builder_main.ini | |
| 81 echo 'main' > annot_builder_main.ini | |
| 82 """ | |
| 83 } | |
| 84 | |
| 85 | |
| 86 // [DataProvider06] | |
| 87 // aliases = "Gnomon|Chainer|PartAbInitio|FullAbInitio|Chainer_GapFilled|PartAbInitio_GapFilled" | |
| 88 // desc = "Gnomon" | |
| 89 // drop_alt_brs_overlap = "1" | |
| 90 // enable_AR0050_AR0048 = "1" | |
| 91 // input_manifest = "/netmnt/vast01/gpi/prod/GPIPE_PROD/data00/Lagenorhynchus_albirostris/1.1.470794/6389647/annot_builder.46361542/inp/gnomon_models.mft" | |
| 92 // is_primary = "1" | |
| 93 // keep_top_N_models = "50" | |
| 94 // max_pct_ab_initio = "50" | |
| 95 // merge_variants = "1" | |
| 96 // model_maker = "gnomon2model" | |
| 97 // name = "gnomon" | |
| 98 // use_secondary_support = "1" | |
| 99 | |
| 100 process annot_builder_input { | |
| 101 input: | |
| 102 val outdir | |
| 103 path prior_file | |
| 104 val provider_number | |
| 105 path input_file | |
| 106 val params | |
| 107 output: | |
| 108 path("annot_builder_input.ini") | |
| 109 path("input_manifest_${provider_number}.mft") | |
| 110 script: | |
| 111 """ | |
| 112 #!/usr/bin/env python3 | |
| 113 with open('annot_builder_input.ini', 'w') as outf: | |
| 114 | |
| 115 with open('${prior_file}', 'r') as f: | |
| 116 print(f.read(), file=outf) | |
| 117 | |
| 118 print('[DataProvider${provider_number}]', file=outf) | |
| 119 | |
| 120 im = 'input_manifest_${provider_number}.mft' | |
| 121 inpf = '${input_file}' | |
| 122 with open(im, 'w') as mft: | |
| 123 print(inpf, file=mft) | |
| 124 print(f'input_manifest="{im}"', file=outf) | |
| 125 | |
| 126 print('aliases = "Gnomon|Chainer|PartAbInitio|FullAbInitio|Chainer_GapFilled|PartAbInitio_GapFilled"', file=outf) | |
| 127 print('desc = "Gnomon"', file=outf) | |
| 128 print('name = "gnomon"', file=outf) | |
| 129 print('model_maker = "gnomon2model"', file=outf) | |
| 130 | |
| 131 print('drop_alt_brs_overlap = "1"', file=outf) | |
| 132 print('enable_AR0050_AR0048 = "1"', file=outf) | |
| 133 print('is_primary = "1"', file=outf) | |
| 134 print('keep_top_N_models = "50"', file=outf) | |
| 135 print('max_pct_ab_initio = "50"', file=outf) | |
| 136 print('merge_variants = "1"', file=outf) | |
| 137 print('use_secondary_support = "1"', file=outf) | |
| 138 """ | |
| 139 stub: | |
| 140 """ | |
| 141 touch annot_builder_input.ini | |
| 142 touch input_manifest_${provider_number}.mft | |
| 143 cp ${prior_file} annot_builder_input.ini | |
| 144 echo 'input ${provider_number}' >> annot_builder_input.ini | |
| 145 """ | |
| 146 } | |
| 147 | |
| 148 | |
| 149 // ## annot_builder -accept-output both -asn-cache ${GP_cache_dir} -conffile ${conffile} -gc-assembly-manifest ${input.gencoll_asn} -logfile ${logfile} | |
| 150 process annot_builder_run { | |
| 151 input: | |
| 152 val outdir | |
| 153 path conffile, stageAs: 'annot_builder_final.ini' | |
| 154 path gencoll_asn | |
| 155 path input_manifests | |
| 156 path input_files | |
| 157 path genome_asn, stageAs: 'genome/*' | |
| 158 val params | |
| 159 output: | |
| 160 path "${outdir}/*", emit: "all" | |
| 161 path "${outdir}/ACCEPT/accept.asn", emit: "accept", optional: true | |
| 162 path "${outdir}/ACCEPT/accept.ftable_annot", emit: "accept_ftable_annot", optional: true | |
| 163 path "${outdir}/ACCEPT/*.annot", optional: true | |
| 164 script: | |
| 165 """ | |
| 166 mkdir -p $outdir/ACCEPT | |
| 167 mkdir -p $outdir/COLLECTION | |
| 168 mkdir -p $outdir/CONFLICT | |
| 169 mkdir -p $outdir/REPORT | |
| 170 mkdir -p $outdir/TEST | |
| 171 | |
| 172 lds2_indexer -source genome/ -db LDS2 | |
| 173 # EXCEPTION_STACK_TRACE_LEVEL=Warning DEBUG_STACK_TRACE_LEVEL=Warning DIAG_POST_LEVEL=Trace | |
| 174 annot_builder -accept-output both -nogenbank -lds2 LDS2 -conffile $conffile -gc-assembly $gencoll_asn -logfile ${outdir}/annot_builder.log | |
| 175 cat ${outdir}/ACCEPT/*.ftable.annot > ${outdir}/ACCEPT/accept.ftable_annot | |
| 176 """ | |
| 177 stub: | |
| 178 """ | |
| 179 mkdir -p $outdir/ACCEPT | |
| 180 mkdir -p $outdir/COLLECTION | |
| 181 mkdir -p $outdir/CONFLICT | |
| 182 mkdir -p $outdir/REPORT | |
| 183 mkdir -p $outdir/TEST | |
| 184 | |
| 185 echo "1" > ${outdir}/annot_builder.log | |
| 186 echo "2" > ${outdir}/accept.asn | |
| 187 echo "3" > ${outdir}/accept.ftable.annot | |
| 188 | |
| 189 | |
| 190 echo "4" > ${outdir}/ACCEPT/accept.asn | |
| 191 echo "5" > ${outdir}/ACCEPT/accept.ftable_annot | |
| 192 echo "S1" > ${outdir}/ACCEPT/S1.annot | |
| 193 echo "S2" > ${outdir}/ACCEPT/S2.annot | |
| 194 | |
| 195 """ | |
| 196 } |
