comparison tools/fasta_filter_by_id/fasta_filter_by_id.xml @ 3:812383b5d3b8 draft default tip

v0.0.5 - galaxy_sequence_utils dependency and other cleanups inc using MIT license
author peterjc
date Fri, 03 Feb 2017 05:32:34 -0500
parents
children
comparison
equal deleted inserted replaced
2:5b552b3005f2 3:812383b5d3b8
1 <tool id="fasta_filter_by_id" name="Filter FASTA by ID" version="0.0.5" hidden="true">
2 <description>from a tabular file</description>
3 <requirements>
4 <requirement type="package" version="1.0.1">galaxy_sequence_utils</requirement>
5 </requirements>
6 <version_command interpreter="python">fasta_filter_by_id.py --version</version_command>
7 <command interpreter="python">
8 fasta_filter_by_id.py $input_tabular $columns $input_fasta
9 #if $output_choice_cond.output_choice=="both"
10 $output_pos $output_neg
11 #elif $output_choice_cond.output_choice=="pos"
12 $output_pos -
13 #elif $output_choice_cond.output_choice=="neg"
14 - $output_neg
15 #end if
16 </command>
17 <inputs>
18 <param name="input_fasta" type="data" format="fasta" label="FASTA file to filter on the identifiers"/>
19 <param name="input_tabular" type="data" format="tabular" label="Tabular file containing FASTA identifiers"/>
20 <param name="columns" type="data_column" data_ref="input_tabular" multiple="True" numerical="False" label="Column(s) containing FASTA identifiers" help="Multi-select list - hold the appropriate key while clicking to select multiple columns">
21 <validator type="no_options" message="Pick at least one column"/>
22 </param>
23 <conditional name="output_choice_cond">
24 <param name="output_choice" type="select" label="Output positive matches, negative matches, or both?">
25 <option value="both">Both positive matches (ID on list) and negative matches (ID not on list), as two FASTA files</option>
26 <option value="pos">Just positive matches (ID on list), as a single FASTA file</option>
27 <option value="neg">Just negative matches (ID not on list), as a single FASTA file</option>
28 </param>
29 <!-- Seems need these dummy entries here, compare this to indels/indel_sam2interval.xml -->
30 <when value="both" />
31 <when value="pos" />
32 <when value="neg" />
33 </conditional>
34 </inputs>
35 <outputs>
36 <data name="output_pos" format="fasta" label="With matched ID">
37 <filter>output_choice_cond["output_choice"] != "neg"</filter>
38 </data>
39 <data name="output_neg" format="fasta" label="Without matched ID">
40 <filter>output_choice_cond["output_choice"] != "pos"</filter>
41 </data>
42 </outputs>
43 <tests>
44 <test>
45 <param name="input_fasta" value="four_human_proteins.fasta" ftype="fasta" />
46 <param name="input_tabular" value="blastp_four_human_vs_rhodopsin.tabular" ftype="tabular" />
47 <param name="columns" value="1" />
48 <param name="output_choice" value="both" />
49 <output name="output_pos" file="four_human_proteins_filter_a.fasta" ftype="fasta" />
50 <output name="output_neg" file="four_human_proteins_filter_b.fasta" ftype="fasta" />
51 </test>
52 <test>
53 <param name="input_fasta" value="four_human_proteins.fasta" ftype="fasta" />
54 <param name="input_tabular" value="blastp_four_human_vs_rhodopsin.tabular" ftype="tabular" />
55 <param name="columns" value="1" />
56 <param name="output_choice" value="pos" />
57 <output name="output_pos" file="four_human_proteins_filter_a.fasta" ftype="fasta" />
58 </test>
59 <test>
60 <param name="input_fasta" value="four_human_proteins.fasta" ftype="fasta" />
61 <param name="input_tabular" value="blastp_four_human_vs_rhodopsin.tabular" ftype="tabular" />
62 <param name="columns" value="1" />
63 <param name="output_choice" value="neg" />
64 <output name="output_neg" file="four_human_proteins_filter_b.fasta" ftype="fasta" />
65 </test>
66 </tests>
67 <help>
68
69 **Deprecated**
70
71 This tool is now obsolete, and should not be used in future. It has been
72 replaced by a more general version covering FASTA, FASTQ and SFF in one
73 single tool.
74
75 **What it does**
76
77 By default it divides a FASTA file in two, those sequences with or without an
78 ID present in the tabular file column(s) specified. You can opt to have a
79 single output file of just the matching records, or just the non-matching ones.
80
81 Note that the order of sequences in the original FASTA file is preserved.
82 Also, if any sequences share an identifier, duplicates are not removed.
83
84 **Example Usage**
85
86 Given a FASTA file of proteins you might run a signal peptide search (e.g.
87 via the SignalP wrapper for Galaxy), then filtered these tabular results to
88 select just those with a signal peptide. You could then use this tool to get
89 a FASTA file of only the proteins with predicted signal peptides.
90
91 </help>
92 <citations>
93 <citation type="doi">10.7717/peerj.167</citation>
94 </citations>
95 </tool>