diff get_pairs.py @ 0:ac738de70427 draft default tip

planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
author genouest
date Mon, 10 Sep 2018 10:16:42 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_pairs.py	Mon Sep 10 10:16:42 2018 -0400
@@ -0,0 +1,77 @@
+#!/opt/python/bin/python
+# -*- coding: utf-8 -*-
+# ----------------------------------------------------------
+# --
+# -- author : Pierre Pericard
+# -- created : 2012-11-09
+# -- modified: 2013-05-23
+# --
+# ----------------------------------------------------------
+# --
+# -- description : Get separately paired reads and singletons
+# -- 				from two fastq files (left and right)
+# --
+# -- get_pairs.py file1.fastq file2.fastq
+# --
+# ----------------------------------------------------------
+
+import argparse
+import sys
+
+
+if __name__ == '__main__':
+
+    # Arguments
+    parser = argparse.ArgumentParser(description='Get separately paired reads and singletons from two fastq files (left and right)')
+    parser.add_argument('leftreads', metavar='leftreads', type=argparse.FileType('r'), help='left reads fastq')
+    parser.add_argument('rightreads', metavar='rightreads', type=argparse.FileType('r'), help='right reads fastq')
+
+    args = parser.parse_args()
+
+    leftreads = args.leftreads.name
+    rightreads = args.rightreads.name
+
+    (n1, n2) = (list(), list())
+
+    for f, n in ((leftreads, n1), (rightreads, n2)):
+        with open(f, 'r') as fh:
+            c = 0
+            for line in fh:
+                line = line.strip()
+                if line:
+                    c += 1
+                    if c % 4 == 1:
+                        n.append(line.split()[0][1:].split('/')[0])
+                        if c % 40000 == 1:
+                            sys.stdout.write("\r%.2f M reads read" % (c / 4000000.0))
+            sys.stdout.write("\r%.2f M reads read\n" % (c / 4000000.0))
+
+    notcommon = set(n1) ^ set(n2)
+
+    for f in (leftreads, rightreads):
+
+        if f == leftreads:
+            basefilename = "left"
+        else:
+            basefilename = "right"
+
+        pfh = open(basefilename + '.paired.fastq', 'w')
+        ufh = open(basefilename + '.unpaired.fastq', 'w')
+        with open(f, 'r') as fh:
+            c = 0
+            paired = False
+            for line in fh:
+                line = line.strip()
+                if line:
+                    c += 1
+                    if c % 4 == 1:
+                        paired = line.split()[0][1:].split('/')[0] not in notcommon
+                        if c % 40000 == 1:
+                            sys.stdout.write("\r%.2f M reads writen" % (c / 4000000.0))
+                    if paired:
+                        pfh.write("%s\n" % line)
+                    else:
+                        ufh.write("%s\n" % line)
+            sys.stdout.write("\r%.2f M reads writen\n" % (c / 4000000.0))
+        pfh.close()
+        ufh.close()