comparison nf/subworkflows/ncbi/utilities.nf @ 0:d9c5c5b87fec draft

planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4
author fubar
date Sat, 03 Aug 2024 11:16:53 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d9c5c5b87fec
1 #!/usr/bin/env nextflow
2 nextflow.enable.dsl=2
3
4
5 // Analog of shlex.split
6 def List<String> shellSplit(CharSequence s) {
7 List<String> tokens = []
8 boolean escaping = false
9 char quoteChar = ' '
10 boolean quoting = false
11 int lastCloseQuoteIndex = Integer.MIN_VALUE
12 StringBuilder current = new StringBuilder()
13
14 s.eachWithIndex { c, i ->
15 if (escaping) {
16 current.append(c)
17 escaping = false
18 // } else if (c == '\\' && !(quoting && quoteChar == '\'')) {
19 } else if (c == '\\' && !quoting) {
20 escaping = true
21 } else if (quoting && c == quoteChar) {
22 quoting = false
23 lastCloseQuoteIndex = i
24 } else if (!quoting && (c == '\'' || c == '"')) {
25 quoting = true
26 quoteChar = c
27 } else if (!quoting && c.isAllWhitespace()) {
28 if (current.size() > 0 || lastCloseQuoteIndex == (i - 1)) {
29 tokens.add(current.toString())
30 current = new StringBuilder()
31 }
32 } else {
33 current.append(c)
34 }
35 }
36 if (current.size() > 0 || lastCloseQuoteIndex == (s.length() - 1)) {
37 tokens.add(current.toString())
38 }
39
40 return tokens
41 }
42
43
44 // Convert a parameter list into a map
45 def Map<String, String> to_map(List<String> list )
46 {
47 def map = [:]
48 def s = list.size()
49 def i = 0
50 while (i < s)
51 {
52 def elem = list.get(i)
53 i = i + 1
54 if (elem.size() > 0 && elem[0] == '-')
55 {
56 if (i < s) {
57 def val = list.get(i)
58 if ( val.size() > 0 && (val[0] != '-' || val.contains(' ')) )
59 {
60 map[elem] = val
61 i = i + 1
62 } else {
63 map[elem] = ""
64 }
65 } else {
66 map[elem] = ""
67 }
68 } else {
69 println("Error: parameter string not well formed, map ${map}, elem ${elem}, i ${i}, s ${s}")
70 return map
71 }
72 }
73 return map
74 }
75
76
77 def quote(String s)
78 {
79 if (s.size() > 0 && !(s =~ /[^\w@%+=:,.\/-]/)) {
80 return s
81 }
82 return "'" + s + "'"
83 }
84
85
86 // Read a section of the parameters and merge them into the default parameters
87 // Parameters:
88 // default_params: the default parameters, string
89 // parameters: the parameters as a map from string to string
90 // section_name: the name of the section in the parameters map to use
91 // Return: the merged parameters
92 def merge_params(default_params, parameters, section_name)
93 {
94 def section = parameters.get(section_name, "")
95 def update_map = to_map(shellSplit(section))
96 def default_params_map = to_map(shellSplit(default_params))
97 default_params_map.putAll(update_map)
98 def l = []
99 default_params_map.each { parameter, value ->
100 l << quote(parameter)
101 if (value.size() > 0) {
102 l << quote(value)
103 }
104 }
105
106 return l.join(" ")
107 }
108
109
110
111 process clean_fasta_ids {
112 input:
113 path fasta_in
114 output:
115 path "fasta_out", emit: 'fasta_out'
116 script:
117 """
118 ## turns Fasta inputs formatted with multi-part IDs into
119 ## single-part IDs, like
120 ## >gi|1234|ref|NW_1234.1 Some Defline For This Org
121 ## >gi|1234 Some Defline For This Org
122 ## LDS chokes on the multi-part IDs.
123 # the base64 nonsense is because I couldnt get it to not complain about the regex as syntax errors in some way.
124 # its just this:
125 # import re,sys;
126 # for l in sys.stdin:
127 # <I had to delete the regex here because even in a comment nextflow lost it>
128 echo 'aW1wb3J0IHJlLHN5czsKZm9yIGwgaW4gc3lzLnN0ZGluOgogICAgcHJpbnQocmUuc3ViKHIiXig+' > reol.b64
129 echo 'Z2lcfFxkKylcfD8oW2Etel0rXHxbQS1aX10rW1xkXC5dK1x8KSguKikiLCAiXGc8MT5cZzwzPiIs' >> reol.b64
130 echo 'IGwuc3RyaXAoKSkpCg==' >> reol.b64
131 base64 -d ./reol.b64 > ./reol.py
132 cat ${fasta_in} | python reol.py > ./fasta_out
133 """
134 stub:
135 """
136 touch ./fasta.out
137 """
138 }
139
140
141
142 process multireader {
143 input:
144 path fasta_file
145 val parameters
146 output:
147 path ('output/fasta_file.asnt') , emit: 'multireader_file'
148 script:
149 """
150 mkdir -p output
151 if [ -n "$fasta_file" ]; then
152 multireader $parameters -out-format asn_text -input $fasta_file -output output/fasta_file.asnt
153 else
154 touch output/fasta_file.asnt
155 fi
156 """
157 stub:
158 """
159 mkdir -p output
160 touch output/fasta_file.asnt
161 """
162 }