comparison nf/subworkflows/ncbi/default/annot_builder/main.nf @ 0:d9c5c5b87fec draft

planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
author fubar
date Sat, 03 Aug 2024 11:16:53 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d9c5c5b87fec
1 #!/usr/bin/env nextflow
2 nextflow.enable.dsl=2
3
4 include { merge_params } from '../../utilities'
5
6 // annot_builder_main collection_dir ${output}/COLLECTION accept_dir ${output}/ACCEPT conflict_dir ${output}/CONFLICT report_dir ${output}/REPORT test_dir ${output}/TEST i
7 // reftrack_attrs_manifest ${input.reftrack_attrs} loss_pct_ccds 0.0 loss_pct_refseq_ref_primary 1.0 loss_pct_refseq_alt_ref_loci 100.0 loss_pct_refseq_patches 100.0 loss_pct_refseq_other 1.0
8 // annot_builder_input name gnomon desc Gnomon aliases Gnomon|Chainer|PartAbInitio|FullAbInitio|Chainer_GapFilled|PartAbInitio_GapFilled
9 // is_primary 1 input_manifest ${input.gnomon_models} model_maker gnomon2model use_secondary_support 1 keep_top_N_models 50
10 // drop_alt_brs_overlap 1 merge_variants 1 enable_AR0050_AR0048 1 max_pct_ab_initio 50
11 // annot_builder -accept-output both -asn-cache ${GP_cache_dir} -conffile ${conffile} -gc-assembly-manifest ${input.gencoll_asn} -logfile ${logfile}
12
13 // not implimented, future examples
14 //load_annot_builder_tracking_data -stats-xml ${output}/REPORT/stats.xml -taskrun ${taskrun.id}
15 //annot_builder_input name bestrs desc BestRefSeq is_primary 1 input_manifest ${input.best_rs_seqalign} model_maker splign2model user_filter lxr_data.is_refseq=1
16 //annot_builder_input name ng desc Curated Genomic is_primary 1 input_manifest ${input.best_ng_seqalign} model_maker ng2model score_filter rank=1 exclude_subtypes CloneRef,misc_difference,STS,tRNA,variation,VariationRef exclude_types Biosrc,Pub
17 //annot_builder_input name imgt desc IMGT is_primary 1 input_manifest ${input.imgt} model_maker imgt2model use_secondary_support 0
18 //annot_builder_input name rfam desc cmsearch aliases Rfam is_primary 1 input_manifest ${input.rfam} model_maker gnomon2model
19 //annot_builder_input name trna desc tRNAscan-SE is_primary 1 input_manifest ${input.trna_annot} model_maker passthru
20 //annot_builder_input name blessed desc SelectedGeneRepresentative is_primary 0 input_manifest ${input.best_rs_seqalign} model_maker splign2model score_filter rank=1 user_filter lxr_data.is_refseq=0
21
22 workflow annot_builder {
23 take:
24 gencoll_asn
25 gnomon_file
26 genome_asn
27 parameters // Map : extra parameter and parameter update
28 main:
29
30 def m = annot_builder_main('outdir', params).collect()
31 def i = annot_builder_input('outdir', m, '01', gnomon_file, params)
32 // FIXME: intended params 4-5 to be lists of all input files and all input manifests, but it complained with only one entry
33 def (all, accept, accept_ftable, annot) = annot_builder_run('outdir', i[0], gencoll_asn, i[1], gnomon_file, genome_asn, params)
34
35 emit:
36 outputs = all
37 accept_asn = accept
38 accept_ftable_annot = accept_ftable
39 annot_files = annot
40 }
41
42
43 // [Main]
44 // accept_dir = "/netmnt/vast01/gpi/prod/GPIPE_PROD/data00/Lagenorhynchus_albirostris/1.1.470794/6389647/annot_builder.46361542/out/ACCEPT"
45 // collection_dir = "/netmnt/vast01/gpi/prod/GPIPE_PROD/data00/Lagenorhynchus_albirostris/1.1.470794/6389647/annot_builder.46361542/out/COLLECTION"
46 // conflict_dir = "/netmnt/vast01/gpi/prod/GPIPE_PROD/data00/Lagenorhynchus_albirostris/1.1.470794/6389647/annot_builder.46361542/out/CONFLICT"
47 // loss_pct_ccds = "0.0"
48 // loss_pct_refseq_alt_ref_loci = "100.0"
49 // loss_pct_refseq_other = "1.0"
50 // loss_pct_refseq_patches = "100.0"
51 // loss_pct_refseq_ref_primary = "1.0"
52 // reftrack_attrs_manifest = "/netmnt/vast01/gpi/prod/GPIPE_PROD/data00/Lagenorhynchus_albirostris/1.1.470794/6389647/annot_builder.46361542/inp/reftrack_attrs.mft"
53 // report_dir = "/netmnt/vast01/gpi/prod/GPIPE_PROD/data00/Lagenorhynchus_albirostris/1.1.470794/6389647/annot_builder.46361542/out/REPORT"
54 // test_dir = "/netmnt/vast01/gpi/prod/GPIPE_PROD/data00/Lagenorhynchus_albirostris/1.1.470794/6389647/annot_builder.46361542/out/TEST"
55
56 process annot_builder_main {
57 input:
58 val outdir
59 val params
60 output:
61 path "annot_builder_main.ini"
62 script:
63 """
64 #!/usr/bin/env python3
65 with open('annot_builder_main.ini', 'w') as outf:
66 print('[Main]', file=outf)
67 print('accept_dir = "$outdir/ACCEPT"', file=outf)
68 print('collection_dir = "$outdir/COLLECTION"', file=outf)
69 print('conflict_dir = "$outdir/CONFLICT"', file=outf)
70 print('loss_pct_ccds = "0.0"', file=outf)
71 print('loss_pct_refseq_alt_ref_loci = "100.0"', file=outf)
72 print('loss_pct_refseq_other = "1.0"', file=outf)
73 print('loss_pct_refseq_patches = "100.0"', file=outf)
74 print('loss_pct_refseq_ref_primary = "1.0"', file=outf)
75 print('report_dir = "$outdir/REPORT"', file=outf)
76 print('test_dir = "$outdir/TEST"', file=outf)
77 """
78 stub:
79 """
80 touch annot_builder_main.ini
81 echo 'main' > annot_builder_main.ini
82 """
83 }
84
85
86 // [DataProvider06]
87 // aliases = "Gnomon|Chainer|PartAbInitio|FullAbInitio|Chainer_GapFilled|PartAbInitio_GapFilled"
88 // desc = "Gnomon"
89 // drop_alt_brs_overlap = "1"
90 // enable_AR0050_AR0048 = "1"
91 // input_manifest = "/netmnt/vast01/gpi/prod/GPIPE_PROD/data00/Lagenorhynchus_albirostris/1.1.470794/6389647/annot_builder.46361542/inp/gnomon_models.mft"
92 // is_primary = "1"
93 // keep_top_N_models = "50"
94 // max_pct_ab_initio = "50"
95 // merge_variants = "1"
96 // model_maker = "gnomon2model"
97 // name = "gnomon"
98 // use_secondary_support = "1"
99
100 process annot_builder_input {
101 input:
102 val outdir
103 path prior_file
104 val provider_number
105 path input_file
106 val params
107 output:
108 path("annot_builder_input.ini")
109 path("input_manifest_${provider_number}.mft")
110 script:
111 """
112 #!/usr/bin/env python3
113 with open('annot_builder_input.ini', 'w') as outf:
114
115 with open('${prior_file}', 'r') as f:
116 print(f.read(), file=outf)
117
118 print('[DataProvider${provider_number}]', file=outf)
119
120 im = 'input_manifest_${provider_number}.mft'
121 inpf = '${input_file}'
122 with open(im, 'w') as mft:
123 print(inpf, file=mft)
124 print(f'input_manifest="{im}"', file=outf)
125
126 print('aliases = "Gnomon|Chainer|PartAbInitio|FullAbInitio|Chainer_GapFilled|PartAbInitio_GapFilled"', file=outf)
127 print('desc = "Gnomon"', file=outf)
128 print('name = "gnomon"', file=outf)
129 print('model_maker = "gnomon2model"', file=outf)
130
131 print('drop_alt_brs_overlap = "1"', file=outf)
132 print('enable_AR0050_AR0048 = "1"', file=outf)
133 print('is_primary = "1"', file=outf)
134 print('keep_top_N_models = "50"', file=outf)
135 print('max_pct_ab_initio = "50"', file=outf)
136 print('merge_variants = "1"', file=outf)
137 print('use_secondary_support = "1"', file=outf)
138 """
139 stub:
140 """
141 touch annot_builder_input.ini
142 touch input_manifest_${provider_number}.mft
143 cp ${prior_file} annot_builder_input.ini
144 echo 'input ${provider_number}' >> annot_builder_input.ini
145 """
146 }
147
148
149 // ## annot_builder -accept-output both -asn-cache ${GP_cache_dir} -conffile ${conffile} -gc-assembly-manifest ${input.gencoll_asn} -logfile ${logfile}
150 process annot_builder_run {
151 input:
152 val outdir
153 path conffile, stageAs: 'annot_builder_final.ini'
154 path gencoll_asn
155 path input_manifests
156 path input_files
157 path genome_asn, stageAs: 'genome/*'
158 val params
159 output:
160 path "${outdir}/*", emit: "all"
161 path "${outdir}/ACCEPT/accept.asn", emit: "accept", optional: true
162 path "${outdir}/ACCEPT/accept.ftable_annot", emit: "accept_ftable_annot", optional: true
163 path "${outdir}/ACCEPT/*.annot", optional: true
164 script:
165 """
166 mkdir -p $outdir/ACCEPT
167 mkdir -p $outdir/COLLECTION
168 mkdir -p $outdir/CONFLICT
169 mkdir -p $outdir/REPORT
170 mkdir -p $outdir/TEST
171
172 lds2_indexer -source genome/ -db LDS2
173 # EXCEPTION_STACK_TRACE_LEVEL=Warning DEBUG_STACK_TRACE_LEVEL=Warning DIAG_POST_LEVEL=Trace
174 annot_builder -accept-output both -nogenbank -lds2 LDS2 -conffile $conffile -gc-assembly $gencoll_asn -logfile ${outdir}/annot_builder.log
175 cat ${outdir}/ACCEPT/*.ftable.annot > ${outdir}/ACCEPT/accept.ftable_annot
176 """
177 stub:
178 """
179 mkdir -p $outdir/ACCEPT
180 mkdir -p $outdir/COLLECTION
181 mkdir -p $outdir/CONFLICT
182 mkdir -p $outdir/REPORT
183 mkdir -p $outdir/TEST
184
185 echo "1" > ${outdir}/annot_builder.log
186 echo "2" > ${outdir}/accept.asn
187 echo "3" > ${outdir}/accept.ftable.annot
188
189
190 echo "4" > ${outdir}/ACCEPT/accept.asn
191 echo "5" > ${outdir}/ACCEPT/accept.ftable_annot
192 echo "S1" > ${outdir}/ACCEPT/S1.annot
193 echo "S2" > ${outdir}/ACCEPT/S2.annot
194
195 """
196 }