Mercurial > repos > peterjc > blast_top_hit_species
comparison blast_top_hit_species.ga @ 0:68d65aeb3567 draft
Uploaded v0.0.1
author | peterjc |
---|---|
date | Mon, 30 Mar 2015 11:25:10 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:68d65aeb3567 |
---|---|
1 { | |
2 "a_galaxy_workflow": "true", | |
3 "annotation": "", | |
4 "format-version": "0.1", | |
5 "name": "Species of top BLAST hits", | |
6 "steps": { | |
7 "0": { | |
8 "annotation": "", | |
9 "id": 0, | |
10 "input_connections": {}, | |
11 "inputs": [ | |
12 { | |
13 "description": "", | |
14 "name": "Transcriptome FASTA file" | |
15 } | |
16 ], | |
17 "label": null, | |
18 "name": "Input dataset", | |
19 "outputs": [], | |
20 "position": { | |
21 "left": 242, | |
22 "top": 119 | |
23 }, | |
24 "tool_errors": null, | |
25 "tool_id": null, | |
26 "tool_state": "{\"name\": \"Transcriptome FASTA file\"}", | |
27 "tool_version": null, | |
28 "type": "data_input", | |
29 "user_outputs": [], | |
30 "uuid": "e445b44b-02a7-4fd1-8944-cd680f967062" | |
31 }, | |
32 "1": { | |
33 "annotation": "This workflow is deliberately a simple/crude assessment, and there is no need to run BLASTX on all the sequences - a sample of 1000 should be enough.", | |
34 "id": 1, | |
35 "input_connections": { | |
36 "input_file": { | |
37 "id": 0, | |
38 "output_name": "output" | |
39 } | |
40 }, | |
41 "inputs": [], | |
42 "label": null, | |
43 "name": "Sub-sample sequences files", | |
44 "outputs": [ | |
45 { | |
46 "name": "output_file", | |
47 "type": "input" | |
48 } | |
49 ], | |
50 "position": { | |
51 "left": 435, | |
52 "top": 119 | |
53 }, | |
54 "post_job_actions": { | |
55 "RenameDatasetActionoutput_file": { | |
56 "action_arguments": { | |
57 "newname": "1000 sequences from #{input_file}" | |
58 }, | |
59 "action_type": "RenameDatasetAction", | |
60 "output_name": "output_file" | |
61 } | |
62 }, | |
63 "tool_errors": null, | |
64 "tool_id": "toolshed.g2.bx.psu.edu/repos/peterjc/sample_seqs/sample_seqs/0.2.1", | |
65 "tool_state": "{\"__page__\": 0, \"input_file\": \"null\", \"__rerun_remap_job_id__\": null, \"sampling\": \"{\\\"count\\\": \\\"1000\\\", \\\"type\\\": \\\"desired_count\\\", \\\"__current_case__\\\": 2}\", \"chromInfo\": \"\\\"/mnt/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"interleaved\": \"\\\"False\\\"\"}", | |
66 "tool_version": "0.2.1", | |
67 "type": "tool", | |
68 "user_outputs": [], | |
69 "uuid": "87ce69ef-5fb0-41b0-9575-d3b96544f8be" | |
70 }, | |
71 "2": { | |
72 "annotation": "We only want one line per query, so limit this to the best scoring target sequence. Assumes current NCBI nr database is available locally as \"nr\".", | |
73 "id": 2, | |
74 "input_connections": { | |
75 "query": { | |
76 "id": 1, | |
77 "output_name": "output_file" | |
78 } | |
79 }, | |
80 "inputs": [], | |
81 "label": null, | |
82 "name": "NCBI BLAST+ blastx", | |
83 "outputs": [ | |
84 { | |
85 "name": "output1", | |
86 "type": "tabular" | |
87 } | |
88 ], | |
89 "position": { | |
90 "left": 489, | |
91 "top": 263 | |
92 }, | |
93 "post_job_actions": { | |
94 "RenameDatasetActionoutput1": { | |
95 "action_arguments": { | |
96 "newname": "Top BLAST match" | |
97 }, | |
98 "action_type": "RenameDatasetAction", | |
99 "output_name": "output1" | |
100 } | |
101 }, | |
102 "tool_errors": null, | |
103 "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastx_wrapper/0.1.01", | |
104 "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"adv_optional_id_files_opts\\\": {\\\"adv_optional_id_files_opts_selector\\\": \\\"none\\\", \\\"__current_case__\\\": 0}, \\\"matrix\\\": \\\"BLOSUM62\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"ungapped\\\": \\\"False\\\", \\\"filter_query\\\": \\\"True\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"1\\\"}\", \"__rerun_remap_job_id__\": null, \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"db\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": \\\"\\\", \\\"__current_case__\\\": 0, \\\"database\\\": \\\"nr\\\"}\", \"query_gencode\": \"\\\"1\\\"\", \"query\": \"null\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"sseqid\\\", \\\"pident\\\", \\\"length\\\", \\\"mismatch\\\", \\\"gapopen\\\", \\\"qstart\\\", \\\"qend\\\", \\\"sstart\\\", \\\"send\\\", \\\"evalue\\\", \\\"bitscore\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": [\\\"staxids\\\", \\\"sscinames\\\", \\\"scomnames\\\", \\\"sblastnames\\\", \\\"sskingdoms\\\"], \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": null}\", \"chromInfo\": \"\\\"/mnt/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", | |
105 "tool_version": "0.1.01", | |
106 "type": "tool", | |
107 "user_outputs": [], | |
108 "uuid": "1559a0b0-0b66-40f9-b777-2f062fcda4cc" | |
109 }, | |
110 "3": { | |
111 "annotation": "Having a tabular file of all 1000 sequences is used in the \"join\" step to count the sequences giving no BLAST hit.", | |
112 "id": 3, | |
113 "input_connections": { | |
114 "input": { | |
115 "id": 1, | |
116 "output_name": "output_file" | |
117 } | |
118 }, | |
119 "inputs": [], | |
120 "label": null, | |
121 "name": "FASTA-to-Tabular", | |
122 "outputs": [ | |
123 { | |
124 "name": "output", | |
125 "type": "tabular" | |
126 } | |
127 ], | |
128 "position": { | |
129 "left": 696, | |
130 "top": 139 | |
131 }, | |
132 "post_job_actions": { | |
133 "HideDatasetActionoutput": { | |
134 "action_arguments": {}, | |
135 "action_type": "HideDatasetAction", | |
136 "output_name": "output" | |
137 }, | |
138 "RenameDatasetActionoutput": { | |
139 "action_arguments": { | |
140 "newname": "1000 sequences as tabular" | |
141 }, | |
142 "action_type": "RenameDatasetAction", | |
143 "output_name": "output" | |
144 } | |
145 }, | |
146 "tool_errors": null, | |
147 "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.0", | |
148 "tool_state": "{\"__page__\": 0, \"keep_first\": \"\\\"0\\\"\", \"descr_columns\": \"\\\"2\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/mnt/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}", | |
149 "tool_version": "1.1.0", | |
150 "type": "tool", | |
151 "user_outputs": [], | |
152 "uuid": "31f11208-b2bd-4d9d-9745-dc1a6ed7ccf9" | |
153 }, | |
154 "4": { | |
155 "annotation": "Some BLAST matches will give multiple HSPs, and thus multiple lines in the tabular output. We only want one line per query.", | |
156 "id": 4, | |
157 "input_connections": { | |
158 "input": { | |
159 "id": 2, | |
160 "output_name": "output1" | |
161 } | |
162 }, | |
163 "inputs": [], | |
164 "label": null, | |
165 "name": "Unique", | |
166 "outputs": [ | |
167 { | |
168 "name": "outfile", | |
169 "type": "input" | |
170 } | |
171 ], | |
172 "position": { | |
173 "left": 665, | |
174 "top": 376 | |
175 }, | |
176 "post_job_actions": { | |
177 "HideDatasetActionoutfile": { | |
178 "action_arguments": {}, | |
179 "action_type": "HideDatasetAction", | |
180 "output_name": "outfile" | |
181 }, | |
182 "RenameDatasetActionoutfile": { | |
183 "action_arguments": { | |
184 "newname": "One HSP per BLAST hit" | |
185 }, | |
186 "action_type": "RenameDatasetAction", | |
187 "output_name": "outfile" | |
188 } | |
189 }, | |
190 "tool_errors": null, | |
191 "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/unique/bg_uniq/0.3", | |
192 "tool_state": "{\"__page__\": 0, \"ignore_case\": \"\\\"False\\\"\", \"adv_opts\": \"{\\\"column_end\\\": {\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}, \\\"column_start\\\": {\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"1\\\"}, \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"__current_case__\\\": 1}\", \"__rerun_remap_job_id__\": null, \"is_numeric\": \"\\\"False\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/mnt/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", | |
193 "tool_version": "0.3", | |
194 "type": "tool", | |
195 "user_outputs": [], | |
196 "uuid": "acf948e3-71dc-4f35-8357-3998bd0abdd8" | |
197 }, | |
198 "5": { | |
199 "annotation": "We don't need all the columns in this join, but the key is to assign \"None\" to the sequences with no BLAST hits.", | |
200 "id": 5, | |
201 "input_connections": { | |
202 "input1": { | |
203 "id": 3, | |
204 "output_name": "output" | |
205 }, | |
206 "input2": { | |
207 "id": 4, | |
208 "output_name": "outfile" | |
209 } | |
210 }, | |
211 "inputs": [], | |
212 "label": null, | |
213 "name": "Join two Datasets", | |
214 "outputs": [ | |
215 { | |
216 "name": "out_file1", | |
217 "type": "input" | |
218 } | |
219 ], | |
220 "position": { | |
221 "left": 827, | |
222 "top": 263 | |
223 }, | |
224 "post_job_actions": { | |
225 "HideDatasetActionout_file1": { | |
226 "action_arguments": {}, | |
227 "action_type": "HideDatasetAction", | |
228 "output_name": "out_file1" | |
229 }, | |
230 "RenameDatasetActionout_file1": { | |
231 "action_arguments": { | |
232 "newname": "Top BLAST hits or None" | |
233 }, | |
234 "action_type": "RenameDatasetAction", | |
235 "output_name": "out_file1" | |
236 } | |
237 }, | |
238 "tool_errors": null, | |
239 "tool_id": "join1", | |
240 "tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"field1\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"1\\\"}\", \"partial\": \"\\\"\\\"\", \"field2\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"1\\\"}\", \"__rerun_remap_job_id__\": null, \"fill_empty_columns\": \"{\\\"fill_empty_columns_switch\\\": \\\"fill_empty\\\", \\\"do_fill_empty_columns\\\": {\\\"column_fill_type\\\": \\\"single_fill_value\\\", \\\"fill_value\\\": \\\"None\\\", \\\"__current_case__\\\": 0}, \\\"fill_columns_by\\\": \\\"fill_unjoined_only\\\", \\\"__current_case__\\\": 1}\", \"unmatched\": \"\\\"-u\\\"\", \"input1\": \"null\", \"chromInfo\": \"\\\"/mnt/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", | |
241 "tool_version": "2.0.2", | |
242 "type": "tool", | |
243 "user_outputs": [], | |
244 "uuid": "4c280b0e-b4a6-4ae4-8a81-d6e93932ef71" | |
245 }, | |
246 "6": { | |
247 "annotation": "Here we make a tally table of the BLAST species name column", | |
248 "id": 6, | |
249 "input_connections": { | |
250 "input": { | |
251 "id": 5, | |
252 "output_name": "out_file1" | |
253 } | |
254 }, | |
255 "inputs": [], | |
256 "label": null, | |
257 "name": "Count", | |
258 "outputs": [ | |
259 { | |
260 "name": "out_file1", | |
261 "type": "tabular" | |
262 } | |
263 ], | |
264 "position": { | |
265 "left": 952, | |
266 "top": 398 | |
267 }, | |
268 "post_job_actions": { | |
269 "HideDatasetActionout_file1": { | |
270 "action_arguments": {}, | |
271 "action_type": "HideDatasetAction", | |
272 "output_name": "out_file1" | |
273 }, | |
274 "RenameDatasetActionout_file1": { | |
275 "action_arguments": { | |
276 "newname": "Top BLAST hit species counts (unsorted)" | |
277 }, | |
278 "action_type": "RenameDatasetAction", | |
279 "output_name": "out_file1" | |
280 } | |
281 }, | |
282 "tool_errors": null, | |
283 "tool_id": "Count1", | |
284 "tool_state": "{\"__page__\": 0, \"column\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"19\\\"]}\", \"__rerun_remap_job_id__\": null, \"delim\": \"\\\"T\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/mnt/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", | |
285 "tool_version": "1.0.0", | |
286 "type": "tool", | |
287 "user_outputs": [], | |
288 "uuid": "d3322137-1911-426d-87a7-c82b5fc16825" | |
289 }, | |
290 "7": { | |
291 "annotation": "Sorting the counts makes the results easier to interpret directly.", | |
292 "id": 7, | |
293 "input_connections": { | |
294 "input": { | |
295 "id": 6, | |
296 "output_name": "out_file1" | |
297 } | |
298 }, | |
299 "inputs": [], | |
300 "label": null, | |
301 "name": "Sort", | |
302 "outputs": [ | |
303 { | |
304 "name": "out_file1", | |
305 "type": "input" | |
306 } | |
307 ], | |
308 "position": { | |
309 "left": 1056, | |
310 "top": 506 | |
311 }, | |
312 "post_job_actions": { | |
313 "RenameDatasetActionout_file1": { | |
314 "action_arguments": { | |
315 "newname": "Top BLAST hit species counts" | |
316 }, | |
317 "action_type": "RenameDatasetAction", | |
318 "output_name": "out_file1" | |
319 } | |
320 }, | |
321 "tool_errors": null, | |
322 "tool_id": "sort1", | |
323 "tool_state": "{\"__page__\": 0, \"style\": \"\\\"num\\\"\", \"column\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"1\\\"}\", \"__rerun_remap_job_id__\": null, \"column_set\": \"[]\", \"input\": \"null\", \"chromInfo\": \"\\\"/mnt/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"order\": \"\\\"DESC\\\"\"}", | |
324 "tool_version": "1.0.3", | |
325 "type": "tool", | |
326 "user_outputs": [], | |
327 "uuid": "c81cc61d-52a3-44ee-b646-b23e0e004c38" | |
328 } | |
329 }, | |
330 "uuid": "9fe8754a-3a87-4f6a-89a2-141b02b4793e" | |
331 } |