Mercurial > repos > genouest > get_pairs
comparison get_pairs.py @ 0:ac738de70427 draft default tip
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
| author | genouest |
|---|---|
| date | Mon, 10 Sep 2018 10:16:42 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ac738de70427 |
|---|---|
| 1 #!/opt/python/bin/python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 # ---------------------------------------------------------- | |
| 4 # -- | |
| 5 # -- author : Pierre Pericard | |
| 6 # -- created : 2012-11-09 | |
| 7 # -- modified: 2013-05-23 | |
| 8 # -- | |
| 9 # ---------------------------------------------------------- | |
| 10 # -- | |
| 11 # -- description : Get separately paired reads and singletons | |
| 12 # -- from two fastq files (left and right) | |
| 13 # -- | |
| 14 # -- get_pairs.py file1.fastq file2.fastq | |
| 15 # -- | |
| 16 # ---------------------------------------------------------- | |
| 17 | |
| 18 import argparse | |
| 19 import sys | |
| 20 | |
| 21 | |
| 22 if __name__ == '__main__': | |
| 23 | |
| 24 # Arguments | |
| 25 parser = argparse.ArgumentParser(description='Get separately paired reads and singletons from two fastq files (left and right)') | |
| 26 parser.add_argument('leftreads', metavar='leftreads', type=argparse.FileType('r'), help='left reads fastq') | |
| 27 parser.add_argument('rightreads', metavar='rightreads', type=argparse.FileType('r'), help='right reads fastq') | |
| 28 | |
| 29 args = parser.parse_args() | |
| 30 | |
| 31 leftreads = args.leftreads.name | |
| 32 rightreads = args.rightreads.name | |
| 33 | |
| 34 (n1, n2) = (list(), list()) | |
| 35 | |
| 36 for f, n in ((leftreads, n1), (rightreads, n2)): | |
| 37 with open(f, 'r') as fh: | |
| 38 c = 0 | |
| 39 for line in fh: | |
| 40 line = line.strip() | |
| 41 if line: | |
| 42 c += 1 | |
| 43 if c % 4 == 1: | |
| 44 n.append(line.split()[0][1:].split('/')[0]) | |
| 45 if c % 40000 == 1: | |
| 46 sys.stdout.write("\r%.2f M reads read" % (c / 4000000.0)) | |
| 47 sys.stdout.write("\r%.2f M reads read\n" % (c / 4000000.0)) | |
| 48 | |
| 49 notcommon = set(n1) ^ set(n2) | |
| 50 | |
| 51 for f in (leftreads, rightreads): | |
| 52 | |
| 53 if f == leftreads: | |
| 54 basefilename = "left" | |
| 55 else: | |
| 56 basefilename = "right" | |
| 57 | |
| 58 pfh = open(basefilename + '.paired.fastq', 'w') | |
| 59 ufh = open(basefilename + '.unpaired.fastq', 'w') | |
| 60 with open(f, 'r') as fh: | |
| 61 c = 0 | |
| 62 paired = False | |
| 63 for line in fh: | |
| 64 line = line.strip() | |
| 65 if line: | |
| 66 c += 1 | |
| 67 if c % 4 == 1: | |
| 68 paired = line.split()[0][1:].split('/')[0] not in notcommon | |
| 69 if c % 40000 == 1: | |
| 70 sys.stdout.write("\r%.2f M reads writen" % (c / 4000000.0)) | |
| 71 if paired: | |
| 72 pfh.write("%s\n" % line) | |
| 73 else: | |
| 74 ufh.write("%s\n" % line) | |
| 75 sys.stdout.write("\r%.2f M reads writen\n" % (c / 4000000.0)) | |
| 76 pfh.close() | |
| 77 ufh.close() |
