comparison fasta-stats.pl @ 2:cd0874854f51 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit adc5e3616c1849551c9a712b651b0d1c6b0e88f1"
author iuc
date Mon, 26 Apr 2021 10:01:43 +0000
parents 16f1f3e2de42
children 56022eb50bbd
comparison
equal deleted inserted replaced
1:16f1f3e2de42 2:cd0874854f51
45 process($seq) if $n; 45 process($seq) if $n;
46 46
47 # sort length array 47 # sort length array
48 # (should use hash here for efficiency with huge no of short reads?) 48 # (should use hash here for efficiency with huge no of short reads?)
49 49
50 @len = sort { $a <=> $b } @len; 50 @len = sort { $b <=> $a } @len;
51 51
52 # compute more stats 52 # compute more stats
53 53
54 $stat{'num_seq'} = scalar(@len); 54 $stat{'num_seq'} = scalar(@len);
55 55
60 $stat{'len_median'} = $len[int(@len/2)]; 60 $stat{'len_median'} = $len[int(@len/2)];
61 $stat{'len_mean'} = int( $stat{'num_bp'} / $stat{'num_seq'} ); 61 $stat{'len_mean'} = int( $stat{'num_bp'} / $stat{'num_seq'} );
62 62
63 # calculate n50 63 # calculate n50
64 my $thresh = int 0.5 * $stat{'num_bp'}; 64 my $thresh = int 0.5 * $stat{'num_bp'};
65 $stat{'len_N50'} = &calc_x50(@len, $thresh); 65 ($stat{'len_N50'}, $stat{'L50'}) = &calc_x50(\@len, $thresh);
66 66
67 #calculate NG50 67 #calculate NG50
68 if ($calc_ng50) { 68 if ($calc_ng50) {
69 my $thresh = int 0.5 * $genome_size * 1000000; 69 my $thresh = int 0.5 * $genome_size;
70 $stat{'len_NG50'} = &calc_x50(@len, $thresh); 70 ($stat{'len_NG50'}, $stat{'LG50'}) = &calc_x50(\@len, $thresh);
71 } 71 }
72 } 72 }
73 73
74 #calculate GC content 74 #calculate GC content
75 $stat{'num_bp_not_N'} = $stat{'num_G'} + $stat{'num_C'} + $stat{'num_A'} + $stat{'num_T'}; 75 $stat{'num_bp_not_N'} = $stat{'num_G'} + $stat{'num_C'} + $stat{'num_A'} + $stat{'num_T'};
99 } 99 }
100 100
101 # N50/NG50 calculation sub 101 # N50/NG50 calculation sub
102 102
103 sub calc_x50{ 103 sub calc_x50{
104 my @x = shift; 104 my $ref = shift;
105 my @x = @$ref;
105 my $thresh = shift; 106 my $thresh = shift;
106 my $cum=0; 107 my $cum=0;
107 for my $i (0 .. $#x) { 108 for my $i (0 .. $#x) {
108 $cum += $x[$i]; 109 $cum += $x[$i];
109 if ($cum >= $thresh) { 110 if ($cum >= $thresh) {
110 return $x[$i]; 111 return $x[$i], $i+1;
111 } 112 }
112 } 113 }
113 return 0; 114 return (0,0);
114 } 115 }
115 116