Mercurial > repos > genouest > get_pairs
changeset 0:ac738de70427 draft default tip
planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
author | genouest |
---|---|
date | Mon, 10 Sep 2018 10:16:42 -0400 |
parents | |
children | |
files | get_pairs.py get_pairs.xml test-data/r1.fastq test-data/r1_paired.fastq test-data/r1_unpaired.fastq test-data/r2.fastq test-data/r2_paired.fastq test-data/r2_unpaired.fastq |
diffstat | 8 files changed, 219 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_pairs.py Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,77 @@ +#!/opt/python/bin/python +# -*- coding: utf-8 -*- +# ---------------------------------------------------------- +# -- +# -- author : Pierre Pericard +# -- created : 2012-11-09 +# -- modified: 2013-05-23 +# -- +# ---------------------------------------------------------- +# -- +# -- description : Get separately paired reads and singletons +# -- from two fastq files (left and right) +# -- +# -- get_pairs.py file1.fastq file2.fastq +# -- +# ---------------------------------------------------------- + +import argparse +import sys + + +if __name__ == '__main__': + + # Arguments + parser = argparse.ArgumentParser(description='Get separately paired reads and singletons from two fastq files (left and right)') + parser.add_argument('leftreads', metavar='leftreads', type=argparse.FileType('r'), help='left reads fastq') + parser.add_argument('rightreads', metavar='rightreads', type=argparse.FileType('r'), help='right reads fastq') + + args = parser.parse_args() + + leftreads = args.leftreads.name + rightreads = args.rightreads.name + + (n1, n2) = (list(), list()) + + for f, n in ((leftreads, n1), (rightreads, n2)): + with open(f, 'r') as fh: + c = 0 + for line in fh: + line = line.strip() + if line: + c += 1 + if c % 4 == 1: + n.append(line.split()[0][1:].split('/')[0]) + if c % 40000 == 1: + sys.stdout.write("\r%.2f M reads read" % (c / 4000000.0)) + sys.stdout.write("\r%.2f M reads read\n" % (c / 4000000.0)) + + notcommon = set(n1) ^ set(n2) + + for f in (leftreads, rightreads): + + if f == leftreads: + basefilename = "left" + else: + basefilename = "right" + + pfh = open(basefilename + '.paired.fastq', 'w') + ufh = open(basefilename + '.unpaired.fastq', 'w') + with open(f, 'r') as fh: + c = 0 + paired = False + for line in fh: + line = line.strip() + if line: + c += 1 + if c % 4 == 1: + paired = line.split()[0][1:].split('/')[0] not in notcommon + if c % 40000 == 1: + sys.stdout.write("\r%.2f M reads writen" % (c / 4000000.0)) + if paired: + pfh.write("%s\n" % line) + else: + ufh.write("%s\n" % line) + sys.stdout.write("\r%.2f M reads writen\n" % (c / 4000000.0)) + pfh.close() + ufh.close()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_pairs.xml Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,78 @@ +<tool id="get_pairs" name="Separate paired and unpaired reads" version="0.3" > + <!-- author : lecorguille@sb-roscoff.fr --> + <!-- date : 20-11-12 --> + <!-- adapted by abretaud --> + + <description>from two fastq files</description> + + <requirements> + <requirement type="package" version="3.6">python</requirement> + </requirements> + + <command><![CDATA[ + #if $library.type == 'paired': + python '$__tool_directory__/get_pairs.py' '$library.input_left' '$library.input_right' + #else if $library.type == 'paired_collection' + python '$__tool_directory__/get_pairs.py' '$library.input.forward' '$library.input.reverse' + #end if + ]]></command> + + <inputs> + <conditional name="library"> + <param name="type" type="select" label="Input type"> + <option value="paired" selected="true">Independent datasets</option> + <option value="paired_collection">Paired-end collection</option> + </param> + + <when value="paired"> + <param name="input_left" type="data" format="fastqsanger" label="Forward reads" /> + <param name="input_right" type="data" format="fastqsanger" label="Reverse reads" /> + </when> + + <when value="paired_collection"> + <param name="input" format="fastqsanger" type="data_collection" collection_type="paired" label="Paired collection" /> + </when> + + </conditional> + </inputs> + + <outputs> + <data format="fastqsanger" name="left_paired" from_work_dir="left.paired.fastq" label="${tool.name} on ${on_string}: paired forward" /> + <data format="fastqsanger" name="right_paired" from_work_dir="right.paired.fastq" label="${tool.name} on ${on_string}: paired reverse" /> + <data format="fastqsanger" name="left_unpaired" from_work_dir="left.unpaired.fastq" label="${tool.name} on ${on_string}: unpaired forward" /> + <data format="fastqsanger" name="right_unpaired" from_work_dir="right.unpaired.fastq" label="${tool.name} on ${on_string}: unpaired reverse" /> + </outputs> + <tests> + <test> + <conditional name="library"> + <param name="input_left" value="r1.fastq" /> + <param name="input_right" value="r2.fastq" /> + </conditional> + <output name="left_paired" file="r1_paired.fastq" /> + <output name="right_paired" file="r2_paired.fastq" /> + <output name="left_unpaired" file="r1_unpaired.fastq" /> + <output name="right_unpaired" file="r2_unpaired.fastq" /> + </test> + <test> + <conditional name="library"> + <param name="type" value="paired_collection" /> + <param name="input"> + <collection type="paired"> + <element name="forward" ftype="fastq" value="r1.fastq" /> + <element name="reverse" ftype="fastq" value="r2.fastq" /> + </collection> + </param> + </conditional> + <output name="left_paired" file="r1_paired.fastq" /> + <output name="right_paired" file="r2_paired.fastq" /> + <output name="left_unpaired" file="r1_unpaired.fastq" /> + <output name="right_unpaired" file="r2_unpaired.fastq" /> + </test> + </tests> + + <help><![CDATA[ + A tool to separate paired and unpaired reads from fastq datasets. + ]]></help> + <citations> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/r1.fastq Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,16 @@ +@HWI-ST745_0097:7:1101:1001:1000#0/1 +GAAAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTT ++HWI-ST745_0097:7:1101:1001:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1003:1000#0/1 +AAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAG ++HWI-ST745_0097:7:1101:1003:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1004:1000#0/1 +AAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGT ++HWI-ST745_0097:7:1101:1004:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1005:1000#0/1 +AACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTG ++HWI-ST745_0097:7:1101:1005:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/r1_paired.fastq Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,12 @@ +@HWI-ST745_0097:7:1101:1001:1000#0/1 +GAAAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTT ++HWI-ST745_0097:7:1101:1001:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1003:1000#0/1 +AAAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAG ++HWI-ST745_0097:7:1101:1003:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1005:1000#0/1 +AACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGTG ++HWI-ST745_0097:7:1101:1005:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/r1_unpaired.fastq Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,4 @@ +@HWI-ST745_0097:7:1101:1004:1000#0/1 +AAACGCGCTTGCAGATGGGTCGCAGGTTACTTCCTTGCGCACAGGATATAGTTATACCAGCGTTATTGTCGTTAGT ++HWI-ST745_0097:7:1101:1004:1000#0/1 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/r2.fastq Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,16 @@ +@HWI-ST745_0097:7:1101:1001:1000#0/2 +GCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTC ++HWI-ST745_0097:7:1101:1001:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1002:1000#0/2 +TTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAA ++HWI-ST745_0097:7:1101:1002:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1003:1000#0/2 +ATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAG ++HWI-ST745_0097:7:1101:1003:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1005:1000#0/2 +CCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCC ++HWI-ST745_0097:7:1101:1005:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/r2_paired.fastq Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,12 @@ +@HWI-ST745_0097:7:1101:1001:1000#0/2 +GCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAATTCTCCGCTGATTGTC ++HWI-ST745_0097:7:1101:1001:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1003:1000#0/2 +ATGTCTGGAAATATAGGGGCAAATCCAGGGTTCTTGTCCCATGATGTTCAAAACTGTCAATATCCTGCATTGTTAG ++HWI-ST745_0097:7:1101:1003:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@HWI-ST745_0097:7:1101:1005:1000#0/2 +CCTGCATTGTTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCC ++HWI-ST745_0097:7:1101:1005:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/r2_unpaired.fastq Mon Sep 10 10:16:42 2018 -0400 @@ -0,0 +1,4 @@ +@HWI-ST745_0097:7:1101:1002:1000#0/2 +TTAGCGTAGCGCCACTGCCAATTTCCAGCAAAGCTTTATCCTGTACCCGAATAACCCCGGCAAACCCCAGAGGGAA ++HWI-ST745_0097:7:1101:1002:1000#0/2 +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII