Mercurial > repos > genouest > get_pairs
view get_pairs.py @ 0:ac738de70427 draft default tip
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
author | genouest |
---|---|
date | Mon, 10 Sep 2018 10:16:42 -0400 |
parents | |
children |
line wrap: on
line source
#!/opt/python/bin/python # -*- coding: utf-8 -*- # ---------------------------------------------------------- # -- # -- author : Pierre Pericard # -- created : 2012-11-09 # -- modified: 2013-05-23 # -- # ---------------------------------------------------------- # -- # -- description : Get separately paired reads and singletons # -- from two fastq files (left and right) # -- # -- get_pairs.py file1.fastq file2.fastq # -- # ---------------------------------------------------------- import argparse import sys if __name__ == '__main__': # Arguments parser = argparse.ArgumentParser(description='Get separately paired reads and singletons from two fastq files (left and right)') parser.add_argument('leftreads', metavar='leftreads', type=argparse.FileType('r'), help='left reads fastq') parser.add_argument('rightreads', metavar='rightreads', type=argparse.FileType('r'), help='right reads fastq') args = parser.parse_args() leftreads = args.leftreads.name rightreads = args.rightreads.name (n1, n2) = (list(), list()) for f, n in ((leftreads, n1), (rightreads, n2)): with open(f, 'r') as fh: c = 0 for line in fh: line = line.strip() if line: c += 1 if c % 4 == 1: n.append(line.split()[0][1:].split('/')[0]) if c % 40000 == 1: sys.stdout.write("\r%.2f M reads read" % (c / 4000000.0)) sys.stdout.write("\r%.2f M reads read\n" % (c / 4000000.0)) notcommon = set(n1) ^ set(n2) for f in (leftreads, rightreads): if f == leftreads: basefilename = "left" else: basefilename = "right" pfh = open(basefilename + '.paired.fastq', 'w') ufh = open(basefilename + '.unpaired.fastq', 'w') with open(f, 'r') as fh: c = 0 paired = False for line in fh: line = line.strip() if line: c += 1 if c % 4 == 1: paired = line.split()[0][1:].split('/')[0] not in notcommon if c % 40000 == 1: sys.stdout.write("\r%.2f M reads writen" % (c / 4000000.0)) if paired: pfh.write("%s\n" % line) else: ufh.write("%s\n" % line) sys.stdout.write("\r%.2f M reads writen\n" % (c / 4000000.0)) pfh.close() ufh.close()