Mercurial > repos > estrain > seqsero_v1
diff SeqSero/libs/split_interleaved_fastq.pl @ 0:c577b57b7c74 draft
Uploaded
author | estrain |
---|---|
date | Wed, 06 Dec 2017 15:59:29 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SeqSero/libs/split_interleaved_fastq.pl Wed Dec 06 15:59:29 2017 -0500 @@ -0,0 +1,317 @@ +#!/usr/bin/perl-w + +use strict; + +use warnings; + +use Getopt::Long; + +use Pod::Usage; + +use File::Basename; + +# Date: 14-05-2010 + +# This program takes a fastq file containing paired-end reads in interleaved format as input and returns two separate files containing read 1 and read 2 in the correct order. + +# Author: Ram Vinay Pandey + + + +# Define the variables + +my $input=""; + +my $output=""; + +my $help=0; + +my $test=0; + +my $verbose=1; + + + +my $usage="perl $0 --input interleaved_fastq_file.fastq --output output.fastq\n"; + + + +GetOptions( + + "input=s" =>\$input, + + "output=s" =>\$output, + + "test" =>\$test, + + "help" =>\$help + +) or pod2usage(-msg=>"Wrong options",-verbose=>1); + + + +pod2usage(-verbose=>2) if $help; + +Test::runTests() if $test; + + + +pod2usage(-msg=>"\n\tProvide an input file!!\n\n\t\t$usage\n\n",-verbose=>1) unless -e $input; + +pod2usage(-msg=>"\n\tProvide an output file!!\n\n\t\t$usage\n\n",-verbose=>1) unless $output; + + + + + +my ( $name, $path, $extension ) = File::Basename::fileparse ( $output, '\..*' ); + +my $output1 = $name."-read1.fastq"; + +my $output2 = $name."-read2.fastq"; + + + +open my $ofh1, ">$output1" or die "Could not open output file"; + +open my $ofh2, ">$output2" or die "Could not open output file"; + + + + + +open (IN, "<$input") or die ("Could not open file $input for reading\n"); + + + +while (<IN>) { + + chomp; + + s/\r/\n/g; + + # discard blank line + + if (m/^\s*$/g) { + + next; + + } + + else { + + # Reading all lines for read 1 + + if (m/^\s*\@.*1$/) { + + + + print $ofh1 "$_\n"; + + my $ct=0; + + while(my $l = <IN>) { + + + + $ct++; + + chomp $l; + + s/\r/\n/g; + + print $ofh1 "$l\n"; + + + + last if($ct ==3); + + + + } + + + + } + + # Reading all lines for read 2 + + if (m/^\s*\@.*2$/) { + + + + print $ofh2 "$_\n"; + + my $ct=0; + + while(my $l = <IN>) { + + + + $ct++; + + chomp $l; + + s/\r/\n/g; + + print $ofh2 "$l\n"; + + + + last if($ct ==3); + + } + + + + + + } + + + + } + +} + + + +close IN; + +close $ofh1; + +close $ofh2; + + + + + + + +=head1 NAME + + + +split-interleaved-fastq.pl - TThis program takes a fastq file containing paired-end reads in interleaved format as input and returns two separate files containing read 1 and read 2 in the correct order. + + + +=head1 SYNOPSIS + + + + perl split-interleaved-fastq.pl --input interleaved_fastq_file.fastq --output output.fastq + + + +=head1 OPTIONS + + + +=over 4 + + + +=item B<--input> + + + +The input file which contains read1 and read2 in a single file in FASTQ format. Mandatory parameter + + + +=item B<--output> + + + +The output file. Mandatory parameter + + + +=item B<--help> + + + +Display help for this script + + + +=back + + + +=head1 Details + + + +=head2 Input + + + +The paired-end reads in interleaved format; input file looks like following: + + + + @fc_HWUSI-EAS613R:1:1:4:682#CATA/1 + + TTGTANGATTTCGTCCAGACTTATCTGGAGCATCCGGACGGTCGGGTGAAGCTCAATCCTCAGCTGGTGTTG + + +fc_HWUSI-EAS613R:1:1:4:682#CATA/1 + + baaa\DVbbbbaaaaa`[`abaaaab`b`]aab]_aaa``Z^`a[SN^QR^`]___aXK[a\T\[UTWWMZV + + @fc_HWUSI-EAS613R:1:1:4:682#CATA/2 + + TCGACAGCTGCTGCTCCGTATTGAGGTACGGATCGTTCACGATCATATACGCCCTCTCTTTCAAAAACCTCA + + +fc_HWUSI-EAS613R:1:1:4:682#CATA/2 + + bbbY`[`a\S_Y][XPaUDZ__LLL]TZPWXBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB + + + +=head2 Output + + + +The output looks like as following for reads 1: + + + + @fc_HWUSI-EAS613R:1:1:4:682#CATA/1 + + TTGTANGATTTCGTCCAGACTTATCTGGAGCATCCGGACGGTCGGGTGAAGCTCAATCCTCAGCTGGTGTTG + + +fc_HWUSI-EAS613R:1:1:4:682#CATA/1 + + baaa\DVbbbbaaaaa`[`abaaaab`b`]aab]_aaa``Z^`a[SN^QR^`]___aXK[a\T\[UTWWMZV + + + +The output looks like as following for reads 2: + + + + @fc_HWUSI-EAS613R:1:1:4:682#CATA/2 + + TCGACAGCTGCTGCTCCGTATTGAGGTACGGATCGTTCACGATCATATACGCCCTCTCTTTCAAAAACCTCA + + +fc_HWUSI-EAS613R:1:1:4:682#CATA/2 + + bbbY`[`a\S_Y][XPaUDZ__LLL]TZPWXBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB + + + + + +=head1 AUTHORS + + + +Ram vinay pandey + + + +=cut + + \ No newline at end of file