Mercurial > repos > rnateam > splitfasta
changeset 4:ae4d5733272f draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/splitfasta commit 03f3cc2000e6ce876a3cb44c55c3fe878a2e7ce3-dirty
author | rnateam |
---|---|
date | Fri, 16 Oct 2015 16:13:34 -0400 |
parents | 7439ffbd8943 |
children | 733ca84b21ee |
files | splitFasta.py splitFasta.xml splitfasta.xml test-data/ID1_result1.fasta test-data/ID2_result1.fasta test-data/ID3_result1.fasta test-data/test.fasta tool_dependencies.xml |
diffstat | 8 files changed, 79 insertions(+), 55 deletions(-) [+] |
line wrap: on
line diff
--- a/splitFasta.py Wed Jul 08 06:23:46 2015 -0400 +++ b/splitFasta.py Fri Oct 16 16:13:34 2015 -0400 @@ -1,14 +1,13 @@ -import sys, os +#!/usr/bin/env python +import os +import sys +from Bio import SeqIO + if __name__ == "__main__": - #assuming perfect input, read every two lines inpath = sys.argv[1] - file_contents = open(inpath, 'r').readlines() - os.makedirs('splits') - inname = os.path.basename(inpath) - for i in range(0, len(file_contents), 2): - headline = file_contents[i] - outname = headline[1:headline.index(' ')]+'.fa' - outfile = open(os.path.join('splits',outname), 'w') - outfile.write(file_contents[i]) - outfile.write(file_contents[i+1]) - outfile.close() + os.mkdir('splits') + with open(inpath, 'r') as handle: + for record in SeqIO.parse(handle, 'fasta'): + header = os.path.join('splits', record.id + '.fasta') + with open(header, 'w') as handle2: + SeqIO.write([record], handle2, 'fasta')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splitFasta.xml Fri Oct 16 16:13:34 2015 -0400 @@ -0,0 +1,44 @@ +<tool id="rbc_splitfasta" name="Split Fasta" version="0.2.0"> + <description>files into a collection</description> + <requirements> + <requirement type="package" version="1.65">biopython</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command interpreter="python"> + <![CDATA[ + splitFasta.py $inputFile + ]]></command> + <inputs> + <param name="inputFile" type="data" format="fasta" label="Fasta file to split"/> + </inputs> + <outputs> + <collection name="splitted_fasta" type="list" label="Sequence collection in FASTA format"> + <discover_datasets pattern="(?P<designation>.*)" directory="splits" ext="fasta" visible="false"/> + </collection> + </outputs> + <tests> + <test> + <param name="inputFile" value="test.fasta" /> + <output_collection name="splitted_fasta"> + <element name="ID1.fasta" file="ID1_result1.fasta" ftype="fasta" /> + <element name="ID2.fasta" file="ID2_result1.fasta" ftype="fasta" /> + <element name="ID3.fasta" file="ID3_result1.fasta" ftype="fasta" /> + </output_collection> + </test> + </tests> + <help><![CDATA[ + Takes an input file and writes each consecutive two lines to a separate file, in a dataset collection. + ]]></help> + <citations> + <citation type="bibtex"> + @ARTICLE{bgruening_galaxytools, + Author = {Björn Grüning, Cameron Smith, Torsten Houwaart, Nicola Soranzo, Eric Rasche}, + keywords = {bioinformatics, ngs, galaxy, cheminformatics, rna}, + title = {{Galaxy Tools - A collection of bioinformatics and cheminformatics tools for the Galaxy environment}}, + url = {https://github.com/bgruening/galaxytools} + } + </citation> + </citations> +</tool>
--- a/splitfasta.xml Wed Jul 08 06:23:46 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -<tool id="rbc_splitfasta" name="SplitFasta" version="0.1.0"> - <requirements> - </requirements> - <stdio> - <exit_code range="1:" /> - </stdio> - - <command interpreter="python"> - <![CDATA[ - splitFasta.py $inputFile - ]]></command> - <inputs> - <param name="inputFile" type="data" format="fasta" label="Fasta file to split"/> - <param name="outputFormat" type="select" label="Output Format"> - <option value="separate">Separate History Items</option> - <option value="collection">Create dataset collection</option> - </param> - </inputs> - <outputs> - <collection type="list"> - <filter>outputFormat == 'collection'</filter> - <discover_datasets pattern="(?P<designation>.*)" directory="splits" ext="fasta" visible="false"/> - </collection> - <data name="output"> - <filter>outputFormat == 'separate'</filter> - <discover_datasets pattern="(?P<designation>.*)" directory="splits" ext="fasta" visible="true"/> - </data> - </outputs> - <help><![CDATA[ - Takes an input file and writes each consecutive two lines to a separate file, in a dataset collection. - ]]></help> - <citations> - <citation type="bibtex"> - @ARTICLE{bgruening_galaxytools, - Author = {Björn Grüning, Cameron Smith, Torsten Houwaart, Nicola Soranzo, Eric Rasche}, - keywords = {bioinformatics, ngs, galaxy, cheminformatics, rna}, - title = {{Galaxy Tools - A collection of bioinformatics and cheminformatics tools for the Galaxy environment}}, - url = {https://github.com/bgruening/galaxytools} - } - </citation> - - </citations> -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ID1_result1.fasta Fri Oct 16 16:13:34 2015 -0400 @@ -0,0 +1,2 @@ +>ID1 desc +GATACA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ID2_result1.fasta Fri Oct 16 16:13:34 2015 -0400 @@ -0,0 +1,2 @@ +>ID2 desc +GATACAGATACAGATACAGATACAGATACA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ID3_result1.fasta Fri Oct 16 16:13:34 2015 -0400 @@ -0,0 +1,3 @@ +>ID3 desc +GATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACA +GATACA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.fasta Fri Oct 16 16:13:34 2015 -0400 @@ -0,0 +1,11 @@ +>ID1 desc +GATACA + + +>ID2 desc +GATACAGATACA +GATACAGA +TACAGATACA +>ID3 desc +GATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGA +TACAGATACA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Oct 16 16:13:34 2015 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="biopython" version="1.65"> + <repository changeset_revision="dc595937617c" name="package_biopython_1_65" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>