Mercurial > repos > pravs > remove_fasta_subsequences
diff removeFastaSubSequence.xml @ 0:9ec27561593e draft
planemo upload
author | pravs |
---|---|
date | Wed, 02 Aug 2017 18:09:53 -0400 |
parents | |
children | d49328dfeceb |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/removeFastaSubSequence.xml Wed Aug 02 18:09:53 2017 -0400 @@ -0,0 +1,88 @@ + +<tool id="removeFastaSubSequence" name="Remove Fasta Substring Sequence" version="1.0.0"> + <description>Removes sequences that are subsequence in a reference Fasta File.</description> + <requirements> + <requirement type="package" version="1.70">biopython</requirement> + </requirements> + <command interpreter="python"><![CDATA[removeFastaSubSequence.py $ref_fastafile $query_fastafile $output]]></command> + <inputs> + <param name="ref_fastafile" type="data" format="fasta"> + <label>Input Reference Fasta File</label> + </param> + <param name="query_fastafile" type="data" format="fasta"> + <label>Input Query Fasta File</label> + </param> + </inputs> + + <outputs> + <data format="fasta" name="output" label="uniqSeq_${query_fastafile.name.rsplit('.',1)[0]}.fasta" /> + </outputs> + + <tests> + <test> + <param name="ref_fastafile" value="test_ref.fasta" /> + <param name="query_fastafile" value="test_query.fasta" /> + <output name="output" file="uniqSeq_test_query.fasta"> + <assert_contents> + <has_text text="ENSMUST00000193003" /> + </assert_contents> + </output> + </test> + </tests> + + + <help> +This program removes the sequences from the query fasta file that are present as subsequence in a reference fasta file. + +EXAMPLE: + +---- + +Ref sequences: + +>reference_seq_1 + +TSLDKDHLELCCTLSLPFSWACSWVLVLRLSINGQLPRSRLWAAHCLWGVP + +>reference_seq_2 + +RGLCISGLEKEVQVQSRQAEGPVHLWLRKGSTSAE + +---- + +Query Sequences: + +>query_seq_1 + +TKTILNYAVLSPCLSPGHVLGC + + +>query_seq_2 + +LDKDHLELCCTLSLPFSWACSWVLVL + + +>query_seq_3 + +LWGVPRGLCISG + +---- + +Output Sequences: + +>query_seq_1 + +TKTILNYAVLSPCLSPGHVLGC + + +>query_seq_3 + +LWGVPRGLCISG + +---- + +Output Sequence file will have only query_seq_1 and query_seq_3. query_seq_2 is removed because query_seq_2's sequence "LDKDHLELCCTLSLPFSWACSWVLVL" is +present as substring in reference_seq_1's sequence "TSLDKDHLELCCTLSLPFSWACSWVLVLRLSINGQLPRSRLWAAHCLWGVP". + + </help> +</tool>