Mercurial > repos > genouest > get_pairs
annotate get_pairs.py @ 0:ac738de70427 draft default tip
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
author | genouest |
---|---|
date | Mon, 10 Sep 2018 10:16:42 -0400 |
parents | |
children |
rev | line source |
---|---|
0
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
1 #!/opt/python/bin/python |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
3 # ---------------------------------------------------------- |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
4 # -- |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
5 # -- author : Pierre Pericard |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
6 # -- created : 2012-11-09 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
7 # -- modified: 2013-05-23 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
8 # -- |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
9 # ---------------------------------------------------------- |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
10 # -- |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
11 # -- description : Get separately paired reads and singletons |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
12 # -- from two fastq files (left and right) |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
13 # -- |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
14 # -- get_pairs.py file1.fastq file2.fastq |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
15 # -- |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
16 # ---------------------------------------------------------- |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
17 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
18 import argparse |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
19 import sys |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
20 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
21 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
22 if __name__ == '__main__': |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
23 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
24 # Arguments |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
25 parser = argparse.ArgumentParser(description='Get separately paired reads and singletons from two fastq files (left and right)') |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
26 parser.add_argument('leftreads', metavar='leftreads', type=argparse.FileType('r'), help='left reads fastq') |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
27 parser.add_argument('rightreads', metavar='rightreads', type=argparse.FileType('r'), help='right reads fastq') |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
28 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
29 args = parser.parse_args() |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
30 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
31 leftreads = args.leftreads.name |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
32 rightreads = args.rightreads.name |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
33 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
34 (n1, n2) = (list(), list()) |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
35 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
36 for f, n in ((leftreads, n1), (rightreads, n2)): |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
37 with open(f, 'r') as fh: |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
38 c = 0 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
39 for line in fh: |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
40 line = line.strip() |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
41 if line: |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
42 c += 1 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
43 if c % 4 == 1: |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
44 n.append(line.split()[0][1:].split('/')[0]) |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
45 if c % 40000 == 1: |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
46 sys.stdout.write("\r%.2f M reads read" % (c / 4000000.0)) |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
47 sys.stdout.write("\r%.2f M reads read\n" % (c / 4000000.0)) |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
48 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
49 notcommon = set(n1) ^ set(n2) |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
50 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
51 for f in (leftreads, rightreads): |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
52 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
53 if f == leftreads: |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
54 basefilename = "left" |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
55 else: |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
56 basefilename = "right" |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
57 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
58 pfh = open(basefilename + '.paired.fastq', 'w') |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
59 ufh = open(basefilename + '.unpaired.fastq', 'w') |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
60 with open(f, 'r') as fh: |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
61 c = 0 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
62 paired = False |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
63 for line in fh: |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
64 line = line.strip() |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
65 if line: |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
66 c += 1 |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
67 if c % 4 == 1: |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
68 paired = line.split()[0][1:].split('/')[0] not in notcommon |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
69 if c % 40000 == 1: |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
70 sys.stdout.write("\r%.2f M reads writen" % (c / 4000000.0)) |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
71 if paired: |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
72 pfh.write("%s\n" % line) |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
73 else: |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
74 ufh.write("%s\n" % line) |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
75 sys.stdout.write("\r%.2f M reads writen\n" % (c / 4000000.0)) |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
76 pfh.close() |
ac738de70427
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff
changeset
|
77 ufh.close() |