Mercurial > repos > xilinxu > xilinxu
view fastx_toolkit-0.0.6/scripts/fasta_clipping_histogram.pl @ 3:997f5136985f draft default tip
Uploaded
author | xilinxu |
---|---|
date | Thu, 14 Aug 2014 04:52:17 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl # FASTX-toolkit - FASTA/FASTQ preprocessing tools. # Copyright (C) 2009 A. Gordon (gordon@cshl.edu) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. use strict; use warnings; use GD::Graph::bars; use Data::Dumper; use PerlIO::gzip; if (scalar @ARGV==0) { print<<END; Create a Linker Clipping Information Histogram usage: $0 INPUT_FILE.FA OUTPUT_FILE.PNG INPUT_FILE.FA = input file (in FASTA format, can be GZIPped) OUTPUT_FILE.PNG = histogram image END exit 0; } # # Read parameters # open(IN, "<:gzip(autopop)", "$ARGV[0]") or die "Cannot open input file $ARGV[0]\n"; open(OUT, ">$ARGV[1]") or die "Cannot create output file $ARGV[1]\n"; binmode OUT; my %histogram ; while (my $name = <IN>) { my $sequence = <IN> ; chomp $sequence; my $sequence_length = length($sequence); my $count; if ( index($name, "-")==-1 ) { #Assume this file is not collapsed, just count each seqeunce as 1 $count = 1 ; } else { #Assume file is collapsed (that is - sequence-ID has two numbers with a separating dash) (undef, $count) = $name =~ /^\>(\d+)\-(\d+)$/ ; # If the match failed, treat this fasta as not collapsed; $count = 1 if not defined $count ; } $histogram{$sequence_length} += $count ; } #Textual Output if (0) { print "Length\tCount\n"; foreach my $length_key ( sort { $a <=> $b } keys %histogram ) { print $length_key,"\t", $histogram{$length_key},"\n"; } exit 0; } ## Build the data as required by GD::Graph::bars. ## Data list has two items (each item is itself a list) ## 1. a list of x-axis labels (these are the keys from the histogram) ## 2. a list of values my @data = ( [ sort { $a <=> $b } keys %histogram ], [ map { $histogram{$_} } sort { $a <=> $b } keys %histogram ] ) ; my $graph = new GD::Graph::bars (1000,800); $graph->set( x_label => 'Length', y_label => 'Amount', title => 'Sequences lengths Distribution (after clipping)', bar_spacing => 10, transparent => 0, t_margin => 10, y_tick_number => 20, y_long_ticks => 1, ) or die $graph->error; $graph->plot(\@data) or die $graph->error; print OUT $graph->gd->png; close IN; close OUT;