comparison bin/deinterleave_fastq.sh @ 0:fc22ec8e924e draft

planemo upload commit 6b0a9d0f0ef4bdb0c2e2c54070b510ff28125f7a
author cstrittmatter
date Tue, 21 Apr 2020 12:45:34 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:fc22ec8e924e
1 #!/bin/bash
2 # Usage: deinterleave_fastq.sh < interleaved.fastq f.fastq r.fastq [compress]
3 #
4 # Deinterleaves a FASTQ file of paired reads into two FASTQ
5 # files specified on the command line. Optionally GZip compresses the output
6 # FASTQ files using pigz if the 3rd command line argument is the word "compress"
7 #
8 # Can deinterleave 100 million paired reads (200 million total
9 # reads; a 43Gbyte file), in memory (/dev/shm), in 4m15s (255s)
10 #
11 # Latest code: https://gist.github.com/3521724
12 # Also see my interleaving script: https://gist.github.com/4544979
13 #
14 # Inspired by Torsten Seemann's blog post:
15 # http://thegenomefactory.blogspot.com.au/2012/05/cool-use-of-unix-paste-with-ngs.html
16
17 # Set up some defaults
18 GZIP_OUTPUT=0
19 PIGZ_COMPRESSION_THREADS=10
20
21 # If the third argument is the word "compress" then we'll compress the output using pigz
22 if [[ $3 == "compress" ]]; then
23 GZIP_OUTPUT=1
24 fi
25
26 if [[ ${GZIP_OUTPUT} == 0 ]]; then
27 paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" > $1) | cut -f 5-8 | tr "\t" "\n" > $2
28 else
29 paste - - - - - - - - | tee >(cut -f 1-4 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $1) | cut -f 5-8 | tr "\t" "\n" | pigz --best --processes ${PIGZ_COMPRESSION_THREADS} > $2
30 fi