Mercurial > repos > rnateam > splitfasta

--- a/splitFasta.py	Wed Jul 08 06:23:46 2015 -0400
+++ b/splitFasta.py	Fri Oct 16 16:13:34 2015 -0400
@@ -1,14 +1,13 @@
-import sys, os
+#!/usr/bin/env python
+import os
+import sys
+from Bio import SeqIO
+
 if __name__ == "__main__":
-    #assuming perfect input, read every two lines
     inpath = sys.argv[1]
-    file_contents = open(inpath, 'r').readlines()
-    os.makedirs('splits')
-    inname = os.path.basename(inpath)
-    for i in range(0, len(file_contents), 2):
-        headline = file_contents[i]
-        outname = headline[1:headline.index(' ')]+'.fa'
-        outfile = open(os.path.join('splits',outname), 'w')
-        outfile.write(file_contents[i])
-        outfile.write(file_contents[i+1])
-        outfile.close()
+    os.mkdir('splits')
+    with open(inpath, 'r') as handle:
+        for record in SeqIO.parse(handle, 'fasta'):
+            header = os.path.join('splits', record.id + '.fasta')
+            with open(header, 'w') as handle2:
+                SeqIO.write([record], handle2, 'fasta')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/splitFasta.xml	Fri Oct 16 16:13:34 2015 -0400
@@ -0,0 +1,44 @@
+<tool id="rbc_splitfasta" name="Split Fasta" version="0.2.0">
+    <description>files into a collection</description>
+    <requirements>
+        <requirement type="package" version="1.65">biopython</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command interpreter="python">
+    <![CDATA[
+        splitFasta.py $inputFile
+    ]]></command>
+    <inputs>
+        <param name="inputFile" type="data" format="fasta" label="Fasta file to split"/>
+    </inputs>
+    <outputs>
+        <collection name="splitted_fasta" type="list" label="Sequence collection in FASTA format">
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="splits" ext="fasta" visible="false"/>
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="inputFile" value="test.fasta" />
+            <output_collection name="splitted_fasta">
+                <element name="ID1.fasta" file="ID1_result1.fasta" ftype="fasta" />
+                <element name="ID2.fasta" file="ID2_result1.fasta" ftype="fasta" />
+                <element name="ID3.fasta" file="ID3_result1.fasta" ftype="fasta" />
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+        Takes an input file and writes each consecutive two lines to a separate file, in a dataset collection.
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+            @ARTICLE{bgruening_galaxytools,
+                Author = {Björn Grüning, Cameron Smith, Torsten Houwaart, Nicola Soranzo, Eric Rasche},
+                keywords = {bioinformatics, ngs, galaxy, cheminformatics, rna},
+                title = {{Galaxy Tools - A collection of bioinformatics and cheminformatics tools for the Galaxy environment}},
+                url = {https://github.com/bgruening/galaxytools}
+            }
+        </citation>
+    </citations>
+</tool>
--- a/splitfasta.xml	Wed Jul 08 06:23:46 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-<tool id="rbc_splitfasta" name="SplitFasta" version="0.1.0">
-    <requirements>
-    </requirements>
-    <stdio>
-        <exit_code range="1:" />
-    </stdio>
-
-    <command interpreter="python">
-    <![CDATA[
-        splitFasta.py $inputFile
-    ]]></command>
-    <inputs>
-        <param name="inputFile" type="data" format="fasta" label="Fasta file to split"/>
-        <param name="outputFormat" type="select" label="Output Format">
-            <option value="separate">Separate History Items</option>
-            <option value="collection">Create dataset collection</option>
-        </param>
-    </inputs>
-    <outputs>
-        <collection type="list">
-            <filter>outputFormat == 'collection'</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="splits" ext="fasta" visible="false"/>
-        </collection>
-        <data name="output">
-            <filter>outputFormat == 'separate'</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="splits" ext="fasta" visible="true"/>
-        </data>
-    </outputs>
-    <help><![CDATA[
-        Takes an input file and writes each consecutive two lines to a separate file, in a dataset collection.
-    ]]></help>
-    <citations>
-            <citation type="bibtex">
-            @ARTICLE{bgruening_galaxytools,
-                Author = {Björn Grüning, Cameron Smith, Torsten Houwaart, Nicola Soranzo, Eric Rasche},
-                keywords = {bioinformatics, ngs, galaxy, cheminformatics, rna},
-                title = {{Galaxy Tools - A collection of bioinformatics and cheminformatics tools for the Galaxy environment}},
-                url = {https://github.com/bgruening/galaxytools}
-            }
-        </citation>
-
-    </citations>
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ID1_result1.fasta	Fri Oct 16 16:13:34 2015 -0400
@@ -0,0 +1,2 @@
+>ID1 desc
+GATACA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ID2_result1.fasta	Fri Oct 16 16:13:34 2015 -0400
@@ -0,0 +1,2 @@
+>ID2 desc
+GATACAGATACAGATACAGATACAGATACA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ID3_result1.fasta	Fri Oct 16 16:13:34 2015 -0400
@@ -0,0 +1,3 @@
+>ID3 desc
+GATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACA
+GATACA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.fasta	Fri Oct 16 16:13:34 2015 -0400
@@ -0,0 +1,11 @@
+>ID1 desc
+GATACA
+
+
+>ID2 desc
+GATACAGATACA
+GATACAGA
+TACAGATACA
+>ID3 desc
+GATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGA
+TACAGATACA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Fri Oct 16 16:13:34 2015 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="biopython" version="1.65">
+        <repository changeset_revision="dc595937617c" name="package_biopython_1_65" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>