Mercurial > repos > estrain > seqsero_v1
view SeqSero/libs/split_interleaved_fastq.pl @ 0:c577b57b7c74 draft
Uploaded
author | estrain |
---|---|
date | Wed, 06 Dec 2017 15:59:29 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl-w use strict; use warnings; use Getopt::Long; use Pod::Usage; use File::Basename; # Date: 14-05-2010 # This program takes a fastq file containing paired-end reads in interleaved format as input and returns two separate files containing read 1 and read 2 in the correct order. # Author: Ram Vinay Pandey # Define the variables my $input=""; my $output=""; my $help=0; my $test=0; my $verbose=1; my $usage="perl $0 --input interleaved_fastq_file.fastq --output output.fastq\n"; GetOptions( "input=s" =>\$input, "output=s" =>\$output, "test" =>\$test, "help" =>\$help ) or pod2usage(-msg=>"Wrong options",-verbose=>1); pod2usage(-verbose=>2) if $help; Test::runTests() if $test; pod2usage(-msg=>"\n\tProvide an input file!!\n\n\t\t$usage\n\n",-verbose=>1) unless -e $input; pod2usage(-msg=>"\n\tProvide an output file!!\n\n\t\t$usage\n\n",-verbose=>1) unless $output; my ( $name, $path, $extension ) = File::Basename::fileparse ( $output, '\..*' ); my $output1 = $name."-read1.fastq"; my $output2 = $name."-read2.fastq"; open my $ofh1, ">$output1" or die "Could not open output file"; open my $ofh2, ">$output2" or die "Could not open output file"; open (IN, "<$input") or die ("Could not open file $input for reading\n"); while (<IN>) { chomp; s/\r/\n/g; # discard blank line if (m/^\s*$/g) { next; } else { # Reading all lines for read 1 if (m/^\s*\@.*1$/) { print $ofh1 "$_\n"; my $ct=0; while(my $l = <IN>) { $ct++; chomp $l; s/\r/\n/g; print $ofh1 "$l\n"; last if($ct ==3); } } # Reading all lines for read 2 if (m/^\s*\@.*2$/) { print $ofh2 "$_\n"; my $ct=0; while(my $l = <IN>) { $ct++; chomp $l; s/\r/\n/g; print $ofh2 "$l\n"; last if($ct ==3); } } } } close IN; close $ofh1; close $ofh2; =head1 NAME split-interleaved-fastq.pl - TThis program takes a fastq file containing paired-end reads in interleaved format as input and returns two separate files containing read 1 and read 2 in the correct order. =head1 SYNOPSIS perl split-interleaved-fastq.pl --input interleaved_fastq_file.fastq --output output.fastq =head1 OPTIONS =over 4 =item B<--input> The input file which contains read1 and read2 in a single file in FASTQ format. Mandatory parameter =item B<--output> The output file. Mandatory parameter =item B<--help> Display help for this script =back =head1 Details =head2 Input The paired-end reads in interleaved format; input file looks like following: @fc_HWUSI-EAS613R:1:1:4:682#CATA/1 TTGTANGATTTCGTCCAGACTTATCTGGAGCATCCGGACGGTCGGGTGAAGCTCAATCCTCAGCTGGTGTTG +fc_HWUSI-EAS613R:1:1:4:682#CATA/1 baaa\DVbbbbaaaaa`[`abaaaab`b`]aab]_aaa``Z^`a[SN^QR^`]___aXK[a\T\[UTWWMZV @fc_HWUSI-EAS613R:1:1:4:682#CATA/2 TCGACAGCTGCTGCTCCGTATTGAGGTACGGATCGTTCACGATCATATACGCCCTCTCTTTCAAAAACCTCA +fc_HWUSI-EAS613R:1:1:4:682#CATA/2 bbbY`[`a\S_Y][XPaUDZ__LLL]TZPWXBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB =head2 Output The output looks like as following for reads 1: @fc_HWUSI-EAS613R:1:1:4:682#CATA/1 TTGTANGATTTCGTCCAGACTTATCTGGAGCATCCGGACGGTCGGGTGAAGCTCAATCCTCAGCTGGTGTTG +fc_HWUSI-EAS613R:1:1:4:682#CATA/1 baaa\DVbbbbaaaaa`[`abaaaab`b`]aab]_aaa``Z^`a[SN^QR^`]___aXK[a\T\[UTWWMZV The output looks like as following for reads 2: @fc_HWUSI-EAS613R:1:1:4:682#CATA/2 TCGACAGCTGCTGCTCCGTATTGAGGTACGGATCGTTCACGATCATATACGCCCTCTCTTTCAAAAACCTCA +fc_HWUSI-EAS613R:1:1:4:682#CATA/2 bbbY`[`a\S_Y][XPaUDZ__LLL]TZPWXBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB =head1 AUTHORS Ram vinay pandey =cut