# HG changeset patch
# User iuc
# Date 1619431303 0
# Node ID cd0874854f51db59784f4310656b8221542ed06f
# Parent 16f1f3e2de426a34a75654a3697b309855548987
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit adc5e3616c1849551c9a712b651b0d1c6b0e88f1"
diff -r 16f1f3e2de42 -r cd0874854f51 fasta-stats.pl
--- a/fasta-stats.pl Wed Apr 21 09:10:46 2021 +0000
+++ b/fasta-stats.pl Mon Apr 26 10:01:43 2021 +0000
@@ -47,7 +47,7 @@
# sort length array
# (should use hash here for efficiency with huge no of short reads?)
-@len = sort { $a <=> $b } @len;
+@len = sort { $b <=> $a } @len;
# compute more stats
@@ -62,12 +62,12 @@
# calculate n50
my $thresh = int 0.5 * $stat{'num_bp'};
- $stat{'len_N50'} = &calc_x50(@len, $thresh);
+ ($stat{'len_N50'}, $stat{'L50'}) = &calc_x50(\@len, $thresh);
#calculate NG50
if ($calc_ng50) {
- my $thresh = int 0.5 * $genome_size * 1000000;
- $stat{'len_NG50'} = &calc_x50(@len, $thresh);
+ my $thresh = int 0.5 * $genome_size;
+ ($stat{'len_NG50'}, $stat{'LG50'}) = &calc_x50(\@len, $thresh);
}
}
@@ -101,15 +101,16 @@
# N50/NG50 calculation sub
sub calc_x50{
- my @x = shift;
+ my $ref = shift;
+ my @x = @$ref;
my $thresh = shift;
my $cum=0;
for my $i (0 .. $#x) {
$cum += $x[$i];
if ($cum >= $thresh) {
- return $x[$i];
+ return $x[$i], $i+1;
}
}
- return 0;
+ return (0,0);
}
diff -r 16f1f3e2de42 -r cd0874854f51 fasta-stats.xml
--- a/fasta-stats.xml Wed Apr 21 09:10:46 2021 +0000
+++ b/fasta-stats.xml Mon Apr 26 10:01:43 2021 +0000
@@ -14,7 +14,7 @@
-
+
@@ -25,8 +25,8 @@
-
-
+
+
diff -r 16f1f3e2de42 -r cd0874854f51 test-data/ng50_input.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ng50_input.fasta Mon Apr 26 10:01:43 2021 +0000
@@ -0,0 +1,88 @@
+>1
+ACTTGATCCTGCTCCCTCGGTGTCTGCATTGACTCCTCATGCTGGGACTG
+GACCCGTCAACCCCCCTGCTCGCTGCTCACGTACCTTCATCACTTTTAGT
+GATGATGCAACTTTCGAGGAATGGTTCCCCCAAGGGCGGCCCCCAAAAGT
+CCCTGTTCGTGAGGTCTGTCCAGTGACCCATCGTCCAGCCCTATACCGGG
+ACCCTGTTACAGACATACCCTATGCCACTGCTCGAGCCTTCAAGATCATT
+CGTGAGGCTTACAAGAAGTACATTACTGCCCATGGACTGCCGCCCACTGC
+CTCAGCCCTGGGCCCCGGCCCGCCACCTCCTGAGCCCCTCCCTGGCTCTG
+GGCCCCGAGCCTTGCGCCAGAAAATTGTCATTAAATGA
+>2
+ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTT
+TGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG
+>3
+GTTCTCAGCTTCCTTGCTTCCATGGCTCCAGCACCATTCGAAACCTCAAA
+GAGAGGTTCCACATGAGCATGACTGAGGAGCAGCTGCAGCTGCTGGTGGA
+GCAGATGGTGGATGGCAGTATGCGGTCTATCACCACCAAACTCTATGACG
+GCTTCCAGTACCTCACCAACGGCATCATGTGA
+>4
+ATGGAAGCGTTTTTGGGGTCGCGGTCCGGACTTTGGGCGGGGGGTCCGGC
+CCCAGGACAGTTTTACCGCATTCCGTCCACTCCCGATTCCTTCATGGATC
+CGGCGTCTGCACTTTACAGAGGTCCAATCACGCGGACCCA
+>5
+TCTTTTCCTTCTCTACCATTTTCAACAAAGCAGGGGAAATAACTCAGTCT
+CAGAAGACAGGAAACATCAACAAGTTGTGATGCCCTTTTCTTCCAATACT
+ATTGAGGCTCACAAGTCAGCTCATGTAGACGGATCACTTAAGAGCAACAA
+ACTGAAGTCTGCAAGAAAATTCACATTTCTATCTGATGAGGATGACTTAA
+GTGCCCATAATCCCCTTTATAAGGAAAACATAAGTCAAGTATCAACAAAT
+TCAGACATTTCACAGAGAACAGATTTTGTAGACCCATTTTCACCCAAAAT
+ACAAGCCAAGAGTAAGTCTCTGAGGGGCCCAAGAGAAAAGATTCAGAGGC
+TGTGGAGTCAGTCAGTCAGCTTACCCAGGAGGCTGATGAGGAAAGTTCCA
+AATAGACCAGAGATCATAGATCTGCAGCAGTGGCAAGGCACCAGGCAGAA
+AGCTGAAAATGAAAACACTGGAATCTGTACAAACAAAAGAGGTAGCAGCA
+ATCCATTGCTTACAACTGAAGAGGCAAATTTGACAGAGAAAGAGGAAATA
+AGGCAAGGTGAAACACTGATGATAGAAGGAACAGAACAGTTGAAATCTCT
+CTCTTCAGACTCTTCATTTTGCTTTCCCAGGCCTCACTTCTCATTCTCCA
+CTTTGCCAACTGTTTCAAGAACTGTGGAACTCAAATCAGAACCTAATGTC
+ATCAGTTCTCCTGCTGAGTGTTCCTTGGAACTTTCTCCTTCAAGGCCTTG
+TGTTTTACATTCTTCACTCTCTAGGAGAGAGACACCTATTTGTATGTTAC
+CTATTGAAACCGAAAGAAATATTTTTGAAAATTTTGCCCATCCACCAAAC
+ATCTCTCCTTCTGCCTGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNACATTTTTTCCACTTTCCGTTTCAACGTCTGGTCCCCC
+AACAccacctcttctacctccatttccaactcctcttcctccaccacctc
+cttctattccttgccctccacctccttcAGCTTCATTTCTGTCCACAGAG
+TGTGTCTGTATAACAGGTGTTAAATGCACGACCAACTTGATGCCTGCCGA
+GAAAATTAAGTCCTCTATGACACAGCTATCAACAACGACAGTGTGTAAAA
+CAGACCCTCAGAGAGAACCAAAAGGCATCCTCAGACACGTTAAAAACTTA
+GCAGAACTTGAAAAATCAGTAGCTAACATGTACAGTCAAATAGAAAAAAA
+CTATCTACGCACAAATGTTTCAGAACTTCAAACTATGTGCCCTTCAGAAG
+TAACAAATATGGAAATCACATCTGAACAAAACAAGGGGAGTTTGAACAAT
+ATTGTCGAGGGAACTGAAAAACAATCTCACAGTCAATCTACTTCACTGTA
+A
+>6
+ATCCAATGGATTTGAACAGAAGCGCTTTGCCAGGCTTGCCAGCAAGAAGG
+CAGTGGAGGAACTTGCCTACAAATGGAGTGTTGAGGATATGTAA
+>7
+ATGCAGCCCCGGGTACTCCTTGTTGTTGCCCTCCTGGCGCTCCTGGCCTC
+TGCCC
+>8
+CCTAAAGCTCCTTGACAACTGGGACAGCGTGACCTCCACCTTCAGCAAGC
+TGCGCGAACAGCTCGGCCCTGTGACCCAGGAGTTCTGGGATAACCTGGAA
+AAGGAGACAGAGGGCCTGAGGCAGGAGATGAGCAAGGATCTGGAGGAGGT
+GAAGGCCAAGGTGCAGCCCTACCTGGACGACTTCCAGAAGAAGTGGCAGG
+AGGAGATGGAGCTCTACCGCCAGAAGGTGGAGCCGCTGCGCGCAGAGCTC
+CAAGAGGGCGCGCGCCAGAAGCTGCACGAGCTGCAAGAGAAGCTGAGCCC
+ACTGGGCGAGGAGATGCGCGACCGCGCGCGCGCCCATGTGGACGCGCTGC
+GCACGCATCTGGCCCCCTACAGCGACGAGCTGCGCCAGCGCTTGGCCGCG
+CGCCTTGAGGCTCTCAAGGAGAACGGCGGCGCCAGACTGGCCGAGTACCA
+CGCCAAGGCCACCGAGCATCTGAGCACGCTCAGCGAGAAGGCCAAGCCCG
+CGCTCGAGGACCTCCGCCAAGGCCTGCTGCCCGTGCTGGAGAGCTTCAAG
+GTCAGCTTCCTGAGCGCTCTCGAGGAGTACACTAAGAAGCTCAACACCCA
+GTGA
+>9
+ATGCTCCACCTGCATGGCTGGCAAACCATG
+>10
+GAGCTTTCTTCCTCTATGCTGGATTTGCTGCTGTGGGACTCCTTTTCATC
+TATGGCTGTCTTCCTGAGACCAAAGGCAAAAAATTAGAGGAAATTGAATC
+ACTCTTTGACAACAGGCTATGTACATGTGGCACTTCAGATTCTGATGAAG
+GGAGATATATTGAATATATTCGGGTAAAGGGAAGTAACTATCATCTTTCT
+GACAATGATGCTTCTGATGTGGAATAA
+>11
+ATGAACTCACCAGAGGCGAGGCTCTGCGTTGCTCAATGCAGAGACTCTTA
+CCCAGGGTGTCAGCCTCTGAAAGATACACGTGCCTGGGCCTCTTCCCTGA
+AGATGGACCCGGCAGGTCTGGAGGGAGGCCCCCGTGATGAATCCCGTGAT
+GAGCCGCCGATCCGAGCTCAGGCTGCGTCATGGGACCAGCCACAAGGTTG
+CCTGACCTATAAAGGTCGCAGGAGTGCCTCAGGGACACAGAAGCAGTTAC
+AGCTGCCAG
\ No newline at end of file
diff -r 16f1f3e2de42 -r cd0874854f51 test-data/ng50_out.txt
--- a/test-data/ng50_out.txt Wed Apr 21 09:10:46 2021 +0000
+++ b/test-data/ng50_out.txt Mon Apr 26 10:01:43 2021 +0000
@@ -1,15 +1,17 @@
-GC_content 52.0
-len_N50 194780
-len_NG50 0
-len_max 194780
-len_mean 194780
-len_median 194780
-len_min 194780
-num_A 46297
-num_C 50626
-num_G 50678
-num_N 0
-num_T 47179
-num_bp 194780
-num_bp_not_N 194780
-num_seq 1
+GC_content 51.1
+L50 2
+LG50 2
+len_N50 604
+len_NG50 604
+len_max 30
+len_mean 324
+len_median 182
+len_min 1501
+num_A 895
+num_C 940
+num_G 807
+num_N 145
+num_T 778
+num_bp 3565
+num_bp_not_N 3420
+num_seq 11
diff -r 16f1f3e2de42 -r cd0874854f51 test-data/test_out.txt
--- a/test-data/test_out.txt Wed Apr 21 09:10:46 2021 +0000
+++ b/test-data/test_out.txt Mon Apr 26 10:01:43 2021 +0000
@@ -1,4 +1,5 @@
GC_content 52.0
+L50 1
len_N50 194780
len_max 194780
len_mean 194780