changeset 0:ac738de70427 draft default tip

planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
author genouest
date Mon, 10 Sep 2018 10:16:42 -0400
parents
children
files get_pairs.py get_pairs.xml test-data/r1.fastq test-data/r1_paired.fastq test-data/r1_unpaired.fastq test-data/r2.fastq test-data/r2_paired.fastq test-data/r2_unpaired.fastq
diffstat 8 files changed, 219 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_pairs.py	Mon Sep 10 10:16:42 2018 -0400
@@ -0,0 +1,77 @@
+#!/opt/python/bin/python
+# -*- coding: utf-8 -*-
+# ----------------------------------------------------------
+# --
+# -- author : Pierre Pericard
+# -- created : 2012-11-09
+# -- modified: 2013-05-23
+# --
+# ----------------------------------------------------------
+# --
+# -- description : Get separately paired reads and singletons
+# -- 				from two fastq files (left and right)
+# --
+# -- get_pairs.py file1.fastq file2.fastq
+# --
+# ----------------------------------------------------------
+
+import argparse
+import sys
+
+
+if __name__ == '__main__':
+
+    # Arguments
+    parser = argparse.ArgumentParser(description='Get separately paired reads and singletons from two fastq files (left and right)')
+    parser.add_argument('leftreads', metavar='leftreads', type=argparse.FileType('r'), help='left reads fastq')
+    parser.add_argument('rightreads', metavar='rightreads', type=argparse.FileType('r'), help='right reads fastq')
+
+    args = parser.parse_args()
+
+    leftreads = args.leftreads.name
+    rightreads = args.rightreads.name
+
+    (n1, n2) = (list(), list())
+
+    for f, n in ((leftreads, n1), (rightreads, n2)):
+        with open(f, 'r') as fh:
+            c = 0
+            for line in fh:
+                line = line.strip()
+                if line:
+                    c += 1
+                    if c % 4 == 1:
+                        n.append(line.split()[0][1:].split('/')[0])
+                        if c % 40000 == 1:
+                            sys.stdout.write("\r%.2f M reads read" % (c / 4000000.0))
+            sys.stdout.write("\r%.2f M reads read\n" % (c / 4000000.0))
+
+    notcommon = set(n1) ^ set(n2)
+
+    for f in (leftreads, rightreads):
+
+        if f == leftreads:
+            basefilename = "left"
+        else:
+            basefilename = "right"
+
+        pfh = open(basefilename + '.paired.fastq', 'w')
+        ufh = open(basefilename + '.unpaired.fastq', 'w')
+        with open(f, 'r') as fh:
+            c = 0
+            paired = False
+            for line in fh:
+                line = line.strip()
+                if line:
+                    c += 1
+                    if c % 4 == 1:
+                        paired = line.split()[0][1:].split('/')[0] not in notcommon
+                        if c % 40000 == 1:
+                            sys.stdout.write("\r%.2f M reads writen" % (c / 4000000.0))
+                    if paired:
+                        pfh.write("%s\n" % line)
+                    else:
+                        ufh.write("%s\n" % line)
+            sys.stdout.write("\r%.2f M reads writen\n" % (c / 4000000.0))
+        pfh.close()
+        ufh.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_pairs.xml	Mon Sep 10 10:16:42 2018 -0400
@@ -0,0 +1,78 @@
+<tool id="get_pairs" name="Separate paired and unpaired reads" version="0.3" >
+	<!-- author : lecorguille@sb-roscoff.fr -->
+	<!-- date : 20-11-12 -->
+	<!-- adapted by abretaud -->
+
+	<description>from two fastq files</description>
+
+    <requirements>
+        <requirement type="package" version="3.6">python</requirement>
+    </requirements>
+
+	<command><![CDATA[
+        #if $library.type == 'paired':
+            python '$__tool_directory__/get_pairs.py' '$library.input_left' '$library.input_right'
+        #else if $library.type == 'paired_collection'
+            python '$__tool_directory__/get_pairs.py' '$library.input.forward' '$library.input.reverse'
+        #end if
+	]]></command>
+
+	<inputs>
+        <conditional name="library">
+            <param name="type" type="select" label="Input type">
+                <option value="paired" selected="true">Independent datasets</option>
+                <option value="paired_collection">Paired-end collection</option>
+            </param>
+
+            <when value="paired">
+				<param name="input_left" type="data" format="fastqsanger" label="Forward reads" />
+				<param name="input_right" type="data" format="fastqsanger" label="Reverse reads" />
+            </when>
+
+            <when value="paired_collection">
+                <param name="input" format="fastqsanger" type="data_collection" collection_type="paired" label="Paired collection" />
+            </when>
+
+        </conditional>
+	</inputs>
+
+	<outputs>
+		<data format="fastqsanger" name="left_paired" from_work_dir="left.paired.fastq" label="${tool.name} on ${on_string}: paired forward" />
+		<data format="fastqsanger" name="right_paired" from_work_dir="right.paired.fastq" label="${tool.name} on ${on_string}: paired reverse" />
+		<data format="fastqsanger" name="left_unpaired" from_work_dir="left.unpaired.fastq" label="${tool.name} on ${on_string}: unpaired forward" />
+		<data format="fastqsanger" name="right_unpaired" from_work_dir="right.unpaired.fastq" label="${tool.name} on ${on_string}: unpaired reverse" />
+	</outputs>
+    <tests>
+        <test>
+            <conditional name="library">
+                <param name="input_left" value="r1.fastq" />
+                <param name="input_right" value="r2.fastq" />
+            </conditional>
+            <output name="left_paired" file="r1_paired.fastq" />
+            <output name="right_paired" file="r2_paired.fastq" />
+            <output name="left_unpaired" file="r1_unpaired.fastq" />
+            <output name="right_unpaired" file="r2_unpaired.fastq" />
+        </test>
+        <test>
+            <conditional name="library">
+                <param name="type" value="paired_collection" />
+                <param name="input">
+                    <collection type="paired">
+                        <element name="forward" ftype="fastq" value="r1.fastq" />
+                        <element name="reverse" ftype="fastq" value="r2.fastq" />
+                    </collection>
+                </param>
+            </conditional>
+            <output name="left_paired" file="r1_paired.fastq" />
+            <output name="right_paired" file="r2_paired.fastq" />
+            <output name="left_unpaired" file="r1_unpaired.fastq" />
+            <output name="right_unpaired" file="r2_unpaired.fastq" />
+        </test>
+    </tests>
+
+    <help><![CDATA[
+        A tool to separate paired and unpaired reads from fastq datasets.
+    ]]></help>
+    <citations>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/r1.fastq	Mon Sep 10 10:16:42 2018 -0400
@@ -0,0 +1,16 @@
+@HWI-ST745_0097:7:1101:1001:1000#0/1
+GAAAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTT
++HWI-ST745_0097:7:1101:1001:1000#0/1
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@HWI-ST745_0097:7:1101:1003:1000#0/1
+AAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAG
++HWI-ST745_0097:7:1101:1003:1000#0/1
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@HWI-ST745_0097:7:1101:1004:1000#0/1
+AAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGT
++HWI-ST745_0097:7:1101:1004:1000#0/1
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@HWI-ST745_0097:7:1101:1005:1000#0/1
+AACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTG
++HWI-ST745_0097:7:1101:1005:1000#0/1
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/r1_paired.fastq	Mon Sep 10 10:16:42 2018 -0400
@@ -0,0 +1,12 @@
+@HWI-ST745_0097:7:1101:1001:1000#0/1
+GAAAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTT
++HWI-ST745_0097:7:1101:1001:1000#0/1
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@HWI-ST745_0097:7:1101:1003:1000#0/1
+AAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAG
++HWI-ST745_0097:7:1101:1003:1000#0/1
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@HWI-ST745_0097:7:1101:1005:1000#0/1
+AACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTG
++HWI-ST745_0097:7:1101:1005:1000#0/1
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/r1_unpaired.fastq	Mon Sep 10 10:16:42 2018 -0400
@@ -0,0 +1,4 @@
+@HWI-ST745_0097:7:1101:1004:1000#0/1
+AAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGT
++HWI-ST745_0097:7:1101:1004:1000#0/1
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/r2.fastq	Mon Sep 10 10:16:42 2018 -0400
@@ -0,0 +1,16 @@
+@HWI-ST745_0097:7:1101:1001:1000#0/2
+GCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTC
++HWI-ST745_0097:7:1101:1001:1000#0/2
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@HWI-ST745_0097:7:1101:1002:1000#0/2
+TTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAA
++HWI-ST745_0097:7:1101:1002:1000#0/2
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@HWI-ST745_0097:7:1101:1003:1000#0/2
+ATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAG
++HWI-ST745_0097:7:1101:1003:1000#0/2
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@HWI-ST745_0097:7:1101:1005:1000#0/2
+CCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCC
++HWI-ST745_0097:7:1101:1005:1000#0/2
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/r2_paired.fastq	Mon Sep 10 10:16:42 2018 -0400
@@ -0,0 +1,12 @@
+@HWI-ST745_0097:7:1101:1001:1000#0/2
+GCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTC
++HWI-ST745_0097:7:1101:1001:1000#0/2
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@HWI-ST745_0097:7:1101:1003:1000#0/2
+ATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAG
++HWI-ST745_0097:7:1101:1003:1000#0/2
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@HWI-ST745_0097:7:1101:1005:1000#0/2
+CCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCC
++HWI-ST745_0097:7:1101:1005:1000#0/2
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/r2_unpaired.fastq	Mon Sep 10 10:16:42 2018 -0400
@@ -0,0 +1,4 @@
+@HWI-ST745_0097:7:1101:1002:1000#0/2
+TTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAA
++HWI-ST745_0097:7:1101:1002:1000#0/2
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII