# HG changeset patch # User genouest # Date 1536589002 14400 # Node ID ac738de70427e46b0d467ee1f18f3712e7fce44a planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c diff -r 000000000000 -r ac738de70427 get_pairs.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_pairs.py Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,77 @@ +#!/opt/python/bin/python +# -*- coding: utf-8 -*- +# ---------------------------------------------------------- +# -- +# -- author : Pierre Pericard +# -- created : 2012-11-09 +# -- modified: 2013-05-23 +# -- +# ---------------------------------------------------------- +# -- +# -- description : Get separately paired reads and singletons +# -- from two fastq files (left and right) +# -- +# -- get_pairs.py file1.fastq file2.fastq +# -- +# ---------------------------------------------------------- + +import argparse +import sys + + +if __name__ == '__main__': + + # Arguments + parser = argparse.ArgumentParser(description='Get separately paired reads and singletons from two fastq files (left and right)') + parser.add_argument('leftreads', metavar='leftreads', type=argparse.FileType('r'), help='left reads fastq') + parser.add_argument('rightreads', metavar='rightreads', type=argparse.FileType('r'), help='right reads fastq') + + args = parser.parse_args() + + leftreads = args.leftreads.name + rightreads = args.rightreads.name + + (n1, n2) = (list(), list()) + + for f, n in ((leftreads, n1), (rightreads, n2)): + with open(f, 'r') as fh: + c = 0 + for line in fh: + line = line.strip() + if line: + c += 1 + if c % 4 == 1: + n.append(line.split()[0][1:].split('/')[0]) + if c % 40000 == 1: + sys.stdout.write("\r%.2f M reads read" % (c / 4000000.0)) + sys.stdout.write("\r%.2f M reads read\n" % (c / 4000000.0)) + + notcommon = set(n1) ^ set(n2) + + for f in (leftreads, rightreads): + + if f == leftreads: + basefilename = "left" + else: + basefilename = "right" + + pfh = open(basefilename + '.paired.fastq', 'w') + ufh = open(basefilename + '.unpaired.fastq', 'w') + with open(f, 'r') as fh: + c = 0 + paired = False + for line in fh: + line = line.strip() + if line: + c += 1 + if c % 4 == 1: + paired = line.split()[0][1:].split('/')[0] not in notcommon + if c % 40000 == 1: + sys.stdout.write("\r%.2f M reads writen" % (c / 4000000.0)) + if paired: + pfh.write("%s\n" % line) + else: + ufh.write("%s\n" % line) + sys.stdout.write("\r%.2f M reads writen\n" % (c / 4000000.0)) + pfh.close() + ufh.close() diff -r 000000000000 -r ac738de70427 get_pairs.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_pairs.xml Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,78 @@ + + + + + + from two fastq files + + + python + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r ac738de70427 test-data/r1.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/r1.fastq Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,16 @@ +@HWI-ST745_0097:7:1101:1001:1000#0/1 +GAAAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTT ++HWI-ST745_0097:7:1101:1001:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1003:1000#0/1 +AAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAG ++HWI-ST745_0097:7:1101:1003:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1004:1000#0/1 +AAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGT ++HWI-ST745_0097:7:1101:1004:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1005:1000#0/1 +AACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTG ++HWI-ST745_0097:7:1101:1005:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII diff -r 000000000000 -r ac738de70427 test-data/r1_paired.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/r1_paired.fastq Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,12 @@ +@HWI-ST745_0097:7:1101:1001:1000#0/1 +GAAAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTT ++HWI-ST745_0097:7:1101:1001:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1003:1000#0/1 +AAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAG ++HWI-ST745_0097:7:1101:1003:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1005:1000#0/1 +AACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTG ++HWI-ST745_0097:7:1101:1005:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII diff -r 000000000000 -r ac738de70427 test-data/r1_unpaired.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/r1_unpaired.fastq Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,4 @@ +@HWI-ST745_0097:7:1101:1004:1000#0/1 +AAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGT ++HWI-ST745_0097:7:1101:1004:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII diff -r 000000000000 -r ac738de70427 test-data/r2.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/r2.fastq Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,16 @@ +@HWI-ST745_0097:7:1101:1001:1000#0/2 +GCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTC ++HWI-ST745_0097:7:1101:1001:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1002:1000#0/2 +TTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAA ++HWI-ST745_0097:7:1101:1002:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1003:1000#0/2 +ATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAG ++HWI-ST745_0097:7:1101:1003:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1005:1000#0/2 +CCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCC ++HWI-ST745_0097:7:1101:1005:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII diff -r 000000000000 -r ac738de70427 test-data/r2_paired.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/r2_paired.fastq Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,12 @@ +@HWI-ST745_0097:7:1101:1001:1000#0/2 +GCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTC ++HWI-ST745_0097:7:1101:1001:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1003:1000#0/2 +ATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAG ++HWI-ST745_0097:7:1101:1003:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1005:1000#0/2 +CCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCC ++HWI-ST745_0097:7:1101:1005:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII diff -r 000000000000 -r ac738de70427 test-data/r2_unpaired.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/r2_unpaired.fastq Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,4 @@ +@HWI-ST745_0097:7:1101:1002:1000#0/2 +TTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAA ++HWI-ST745_0097:7:1101:1002:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII