Mercurial > repos > mvdbeek > damidseq_findpeaks
comparison find_peaks @ 0:be830200e987 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damidseq_findpeaks commit 87c99a97cb2ce55640afdd2e55c8b3ae5ad99324
author | mvdbeek |
---|---|
date | Fri, 20 Apr 2018 04:34:17 -0400 |
parents | |
children | 7b0ca503bb95 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:be830200e987 |
---|---|
1 #!/usr/bin/perl -w | |
2 | |
3 # Copyright © 2012-2015, Owen Marshall | |
4 | |
5 # This program is free software; you can redistribute it and/or modify | |
6 # it under the terms of the GNU General Public License as published by | |
7 # the Free Software Foundation; either version 2 of the License, or (at | |
8 # your option) any later version. | |
9 # | |
10 # This program is distributed in the hope that it will be useful, but | |
11 # WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 # General Public License for more details. | |
14 # | |
15 # You should have received a copy of the GNU General Public License | |
16 # along with this program; if not, write to the Free Software | |
17 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | |
18 # USA | |
19 | |
20 use strict; | |
21 use File::Basename; | |
22 use 5.010; | |
23 $|++; | |
24 | |
25 my $version = "1.0.1"; | |
26 | |
27 print STDERR "\nfind_peaks v$version\nCopyright © 2012-15, Owen Marshall\n\n"; | |
28 | |
29 my %vars = ( | |
30 'fdr' => 0.01, | |
31 'min_count' => 2, | |
32 'n' => 100, | |
33 'frac' => 0, | |
34 'min_quant' => 0.95, | |
35 'step' => 0.01, | |
36 'unified_peaks' => 'max', | |
37 ); | |
38 | |
39 my %vars_details = ( | |
40 'fdr' => 'False discovery rate value', | |
41 'min_count' => 'Minimum number of fragments to consider as a peak', | |
42 'n' => 'Number of iterations', | |
43 'frac' => 'Number of random fragments to consider per iteration', | |
44 'min_quant' => 'Minimum quantile for considering peaks', | |
45 'step' => 'Stepping for quantiles', | |
46 'unified_peaks' => "Method for calling peak overlaps (two options):\n\r'min': call minimum overlapping peak area\n\r'max': call maximum overlap as peak", | |
47 ); | |
48 | |
49 | |
50 my @in_files; | |
51 process_cli(); | |
52 | |
53 # Time and date | |
54 my ($sec,$min,$hour,$mday,$mon,$year) = localtime(); | |
55 my $date = sprintf("%04d-%02d-%02d.%02d-%02d-%02d",$year+1900,$mon+1,$mday,$hour,$min,$sec); | |
56 | |
57 help() unless @in_files; | |
58 | |
59 foreach my $fn (@in_files) { | |
60 my @in; | |
61 my @unified_peaks; | |
62 my @sig_peaks; | |
63 my @peakmins; | |
64 | |
65 my %peaks; | |
66 my %peak_count; | |
67 my %peak_count_real; | |
68 my %log_scores; | |
69 my %regression; | |
70 my %peak_fdr_cutoff; | |
71 my %fdr; | |
72 | |
73 # Output file names | |
74 my ($name,$dir,$ext) = fileparse($fn, qr/\.[^.]*/); | |
75 | |
76 # filenames | |
77 my $fn_base_date = "peak_analysis.".$name.".$date"; | |
78 my $base_dir = "$fn_base_date/"; | |
79 my $out = "$base_dir"."$name-FDR$vars{'fdr'}"; | |
80 my $out_peak_unified_track = $out.".peaks.gff"; | |
81 my $out_peaks = $out."_FDR-data"; | |
82 | |
83 # Load gff data files | |
84 load_gff(FILE=>$fn, IN_REF=>\@in); | |
85 | |
86 my $probes = @in; | |
87 | |
88 find_quants(IN_REF=>\@in, PEAKMINS_REF=>\@peakmins); | |
89 find_randomised_peaks(IN_REF=>\@in, PEAKMINS_REF=>\@peakmins, PEAK_COUNT=>\%peak_count, PEAKS=>\%peaks); | |
90 | |
91 # Make directory | |
92 mkdir($base_dir); | |
93 | |
94 # Open peaks file for writing | |
95 open(OUTP, ">$out_peaks")|| die "Cannot open peak file for writing: $!\n"; | |
96 print OUTP "FDR peak call v$version\n\n"; | |
97 print OUTP "Input file: $fn\n"; | |
98 | |
99 calculate_regressions(IN_REF=>\@in, PEAKMINS_REF=>\@peakmins, PEAK_COUNT=>\%peak_count, PEAKS=>\%peaks, LOG_SCORES=>\%log_scores, REGRESSION=>\%regression); | |
100 | |
101 call_peaks_unified_redux(ITER=>1, REAL=>1, AREF=>\@in, PEAKMINS_REF=>\@peakmins, PEAK_COUNT=>\%peak_count, PEAK_COUNT_REAL=>\%peak_count_real, PEAKS=>\%peaks); | |
102 | |
103 calculate_fdr(IN_REF=>\@in, PEAKMINS_REF=>\@peakmins, PEAK_COUNT=>\%peak_count, PEAK_COUNT_REAL=>\%peak_count_real, PEAK_FDR_CUTOFF=>\%peak_fdr_cutoff, FDR=>\%fdr, LOG_SCORES=>\%log_scores, REGRESSION=>\%regression); | |
104 | |
105 find_significant_peaks(PEAKMINS=>\@peakmins, SIG_PEAKS=>\@sig_peaks, PEAKS=>\%peaks, PEAK_FDR_CUTOFF=>\%peak_fdr_cutoff, FDR=>\%fdr); | |
106 | |
107 make_unified_peaks(SIG_PEAKS=>\@sig_peaks, UNIFIED_PEAKS=>\@unified_peaks, OUT=>$out_peak_unified_track, TYPE=>$vars{'unified_peaks'}); | |
108 | |
109 print STDERR "$#unified_peaks peaks found.\n\n"; | |
110 | |
111 close OUTP; | |
112 } | |
113 | |
114 print STDERR "All done.\n\n"; | |
115 exit 0; | |
116 | |
117 | |
118 ###################### | |
119 # Subroutines start here | |
120 # | |
121 | |
122 sub find_significant_peaks { | |
123 my (%opts) = @_; | |
124 | |
125 my $peakmins = $opts{PEAKMINS}; | |
126 my $peaks = $opts{PEAKS}; | |
127 my $sig_peaks = $opts{SIG_PEAKS}; | |
128 my $fdr = $opts{FDR}; | |
129 my $peak_fdr_cutoff = $opts{PEAK_FDR_CUTOFF}; | |
130 | |
131 # Generate significant peaks and unify peaks | |
132 print STDERR "Selecting significant peaks ... \n"; | |
133 | |
134 foreach my $pm (@$peakmins) { | |
135 for my $i (0 .. $#{$$peaks{$pm}}) { | |
136 my ($chr, $pstart, $pend, $mean_pscore, $pscore, $count, $size) = @{ $$peaks{$pm}[$i] }; | |
137 if ($count >= $$peak_fdr_cutoff{$pm}) { | |
138 push (@$sig_peaks, [ @{$$peaks{$pm}[$i]}, $$fdr{$pm}[$count] ]); | |
139 } | |
140 } | |
141 } | |
142 | |
143 print OUTP "\nNumber of peaks: $#$sig_peaks\n"; | |
144 } | |
145 | |
146 sub calculate_fdr { | |
147 my (%opts) = @_; | |
148 my $in_ref = $opts{IN_REF}; | |
149 my $peakmins = $opts{PEAKMINS_REF}; | |
150 my $peak_count = $opts{PEAK_COUNT}; | |
151 my $log_scores = $opts{LOG_SCORES}; | |
152 my $regression = $opts{REGRESSION}; | |
153 my $fdr = $opts{FDR}; | |
154 my $peak_fdr_cutoff = $opts{PEAK_FDR_CUTOFF}; | |
155 my $peak_count_real = $opts{PEAK_COUNT_REAL}; | |
156 | |
157 foreach my $pm (@$peakmins) { | |
158 # get regression variables | |
159 my ($m,$b) = @{$$regression{$pm}} if $$regression{$pm}[0]; | |
160 | |
161 for my $i (0 .. $#{$$peak_count_real{$pm}}) { | |
162 next unless $$peak_count_real{$pm}[$i]; | |
163 my $expect = 10**($m*$i + $b); | |
164 | |
165 my $real_count = $$peak_count_real{$pm}[$i]; | |
166 my $fdr_conservative = $expect/$real_count; | |
167 $$fdr{$pm}[$i]= $fdr_conservative; | |
168 } | |
169 } | |
170 | |
171 # print FDR rates | |
172 print OUTP "\n"; | |
173 | |
174 foreach my $pm (@$peakmins) { | |
175 print OUTP "Peak min = $pm\n"; | |
176 for my $c (0 .. $#{$$fdr{$pm}}) { | |
177 next unless defined($$fdr{$pm}[$c]); | |
178 print OUTP "Peak size: $c\tCount: $$peak_count_real{$pm}[$c]\tFDR: $$fdr{$pm}[$c]\n"; | |
179 $$peak_fdr_cutoff{$pm} = $c if (($$fdr{$pm}[$c]<$vars{'fdr'}) && (!$$peak_fdr_cutoff{$pm})); | |
180 } | |
181 $$peak_fdr_cutoff{$pm}||= 10**10; # clumsy hack to prevent errors | |
182 print OUTP "\n"; | |
183 } | |
184 | |
185 foreach my $pm (@$peakmins) { | |
186 print OUTP "Peak min $pm: peak cutoff size for alpha = $vars{'fdr'} was $$peak_fdr_cutoff{$pm}\n\n"; | |
187 } | |
188 } | |
189 | |
190 sub calculate_regressions { | |
191 my (%opts) = @_; | |
192 my $in_ref = $opts{IN_REF}; | |
193 my $peakmins = $opts{PEAKMINS_REF}; | |
194 my $peaks = $opts{PEAKS}; | |
195 my $peak_count = $opts{PEAK_COUNT}; | |
196 my $log_scores = $opts{LOG_SCORES}; | |
197 my $regression = $opts{REGRESSION}; | |
198 | |
199 my $in_num = @$in_ref; | |
200 | |
201 foreach my $pm (@$peakmins) { | |
202 print OUTP "Peak min = $pm\n"; | |
203 for my $c (0 .. $#{$$peak_count{$pm}}) { | |
204 my $peak_count_avg = $$peak_count{$pm}[$c]/$vars{'n'} if $$peak_count{$pm}[$c]; | |
205 next unless $peak_count_avg; | |
206 | |
207 if ($vars{'frac'}) { | |
208 $peak_count_avg = $peak_count_avg * $in_num/$vars{'frac'}; | |
209 } | |
210 $$log_scores{$pm}[$c] = log($peak_count_avg)/log(10); | |
211 print OUTP "Peak size: $c\tCount:$peak_count_avg\n"; | |
212 } | |
213 | |
214 # calculate exponential decay rates | |
215 # y= a+bx for log(y) | |
216 my ($sumx, $sumy, $sumxsq, $sumxy); | |
217 my $n=0; | |
218 for my $i (0 .. $#{$$peak_count{$pm}}) { | |
219 next unless $$peak_count{$pm}[$i]; | |
220 $n++; | |
221 $sumx += $i; | |
222 $sumy += $$log_scores{$pm}[$i]; | |
223 $sumxsq += $i ** 2; | |
224 $sumxy += $i * $$log_scores{$pm}[$i]; | |
225 } | |
226 | |
227 next unless $n > 1; | |
228 | |
229 my $mean_x = $sumx/$n; | |
230 my $mean_y = $sumy/$n; | |
231 my $mean_xsq = $sumxsq/$n; | |
232 my $mean_xy = $sumxy/$n; | |
233 | |
234 my $b = ($mean_xy - ($mean_x * $mean_y)) / ($mean_xsq - ($mean_x **2)); | |
235 my $a = $mean_y - ($b * $mean_x); | |
236 | |
237 # store values | |
238 $$regression{$pm}=[$b, $a]; | |
239 print OUTP "regression: log(y) = $b(x) + $a\n"; | |
240 | |
241 for my $i (0 .. $#{$$peak_count{$pm}}) { | |
242 next unless $$peak_count{$pm}[$i]; | |
243 my ($b,$a) = @{$$regression{$pm}}; | |
244 my $logval = ($b*$i + $a); | |
245 my $val = 10**$logval; | |
246 print OUTP "lin regress: $i\t$$log_scores{$pm}[$i]\t$logval\t$val\n"; | |
247 } | |
248 | |
249 print OUTP "\n"; | |
250 } | |
251 } | |
252 | |
253 sub find_randomised_peaks { | |
254 my (%opts) = @_; | |
255 my $in_ref = $opts{IN_REF}; | |
256 my $peakmins_ref = $opts{PEAKMINS_REF}; | |
257 my $peaks = $opts{PEAKS}; | |
258 my $peak_count = $opts{PEAK_COUNT}; | |
259 | |
260 print STDERR "Duplicating ... \n"; | |
261 my @inr = @$in_ref; | |
262 | |
263 # Call peaks on input file | |
264 print STDERR "Calling peaks on input file ...\n"; | |
265 | |
266 for my $iter (1 .. $vars{'n'}) { | |
267 #print STDERR "Iteration $iter ... \r"; | |
268 print STDERR "Iteration $iter: [shuffling] \r"; | |
269 | |
270 if ($vars{'frac'}) { | |
271 # only use a fraction of the array per rep | |
272 my @a = inside_out(\@inr, $vars{'frac'}); | |
273 call_peaks_unified_redux(ITER=>$iter, AREF=>\@a, PEAKMINS_REF=>$peakmins_ref, PEAK_COUNT=>$peak_count); | |
274 } else { | |
275 shuffle(\@inr); | |
276 call_peaks_unified_redux(ITER=>$iter, AREF=>\@inr, PEAKMINS_REF=>$peakmins_ref, PEAK_COUNT=>$peak_count); | |
277 } | |
278 } | |
279 | |
280 } | |
281 | |
282 sub call_peaks_unified_redux { | |
283 my (%opts) = @_; | |
284 my $iter = $opts{ITER}; | |
285 my $real = $opts{REAL}; | |
286 my $a = $opts{AREF}; | |
287 my $peakmins_ref = $opts{PEAKMINS_REF}; | |
288 my $peak_count_real = $opts{PEAK_COUNT_REAL}; | |
289 my $peaks = $opts{PEAKS}; | |
290 my $peak_count = $opts{PEAK_COUNT}; | |
291 | |
292 my ($pstart, $pend, $inpeak, $pscore, $count); | |
293 $pstart=$pend=$pscore=$inpeak=$count=0; | |
294 | |
295 my @tmp_peak; | |
296 my $total = $#$a; | |
297 | |
298 if ($real) { | |
299 print STDERR "Calling real peaks ... \r"; | |
300 } else { | |
301 print STDERR "Iteration $iter: [processing ...] \r"; | |
302 } | |
303 | |
304 my $old_chr=""; | |
305 foreach my $pm (@$peakmins_ref) { | |
306 for my $i (0 .. $total) { | |
307 my ($chr, $start, $end, $score) = @{ @$a[$i] }; | |
308 next unless $score; | |
309 | |
310 if ($real) { | |
311 unless ($chr eq $old_chr) { | |
312 # Next chromosome | |
313 # (Peaks can't carry over chromosomes, but we don't use this shortcut when randomly shuffling) | |
314 $pstart=$pend=$pscore=$inpeak=$count=0; | |
315 @tmp_peak = () if $real; | |
316 } | |
317 } | |
318 $old_chr = $chr if $real; | |
319 | |
320 unless ($inpeak) { | |
321 next unless $score >= $pm; | |
322 # record new peak | |
323 $pstart = $start; | |
324 $pend = $end; | |
325 $pscore = $score * ($end-$start)/1000; | |
326 $count++; | |
327 push @tmp_peak, $score if $real; | |
328 $inpeak = 1; | |
329 } else { | |
330 if ($score >= $pm) { | |
331 # still in peak | |
332 $count++; | |
333 | |
334 # Fragment score to deal with scoring peaks made from uneven sized fragments | |
335 my $fragment_score = $score * ($end-$start)/1000; | |
336 | |
337 push @tmp_peak, $score if $real; | |
338 $pscore += $fragment_score; | |
339 $pend = $end; | |
340 } else { | |
341 # out of a peak | |
342 if ($count >= $vars{'min_count'}) { | |
343 # record peak | |
344 if ($real) { | |
345 $$peak_count_real{$pm}[$count]++; | |
346 my $mean_pscore = sprintf('%0.2f',($pscore/(($pend-$pstart)/1000))); | |
347 push (@{$$peaks{$pm}},[($chr, $pstart, $pend, $mean_pscore, $pscore, $count, ($pend-$pstart))]); | |
348 } else { | |
349 $$peak_count{$pm}[$count]++; | |
350 } | |
351 } | |
352 | |
353 # reset | |
354 $pstart=$pend=$pscore=$inpeak=$count=0; | |
355 @tmp_peak = () if $real; | |
356 } | |
357 } | |
358 } | |
359 } | |
360 } | |
361 | |
362 | |
363 sub shuffle { | |
364 # Fisher-Yates shuffle (Knuth shuffle) | |
365 my ($array) = @_; | |
366 my $i = @$array; | |
367 while ( --$i ) { | |
368 my $j = int rand( $i+1 ); | |
369 @$array[$i,$j] = @$array[$j,$i]; | |
370 } | |
371 } | |
372 | |
373 sub inside_out { | |
374 my ($array, $frac) = @_; | |
375 my @a; | |
376 for my $i (0 .. $frac-1) { | |
377 my $j = int rand( $i+1 ); | |
378 if ($j != $i) { | |
379 $a[$i] = $a[$j] | |
380 } | |
381 $a[$j] = @$array[$i] | |
382 } | |
383 return @a; | |
384 } | |
385 | |
386 sub load_gff { | |
387 my (%opts) = @_; | |
388 my $fn = $opts{FILE}; | |
389 my $in_ref = $opts{IN_REF}; | |
390 | |
391 print STDERR "Reading input file: $fn ...\n"; | |
392 open (IN, "<$fn") || die "Unable to read $fn: $!\n"; | |
393 | |
394 my $i; | |
395 while (<IN>) { | |
396 $i++; | |
397 print STDERR "Read $i lines ...\r" if $i%10000 == 0; | |
398 chomp; | |
399 | |
400 my @line = split('\t'); | |
401 | |
402 my ($chr, $start, $end, $score); | |
403 if ($#line == 3) { | |
404 # bedgraph | |
405 ($chr, $start, $end, $score) = @line; | |
406 } else { | |
407 # GFF | |
408 ($chr, $start, $end, $score) = @line[0,3,4,5]; | |
409 } | |
410 | |
411 next unless $start; | |
412 | |
413 $score = 0 if $score eq "NA"; | |
414 | |
415 push (@$in_ref, [$chr, $start, $end, $score]); | |
416 } | |
417 | |
418 close (IN); | |
419 | |
420 print STDERR "Sorting ... \n"; | |
421 @$in_ref = sort { $a->[1] <=> $b->[1] } @$in_ref; | |
422 @$in_ref = sort { $a->[0] cmp $b->[0] } @$in_ref; | |
423 } | |
424 | |
425 sub find_quants { | |
426 my (%opts) = @_; | |
427 | |
428 my $in_ref = $opts{IN_REF}; | |
429 my $peakmins_ref = $opts{PEAKMINS_REF}; | |
430 | |
431 my %seg; | |
432 my @frags; | |
433 | |
434 my $total_coverage; | |
435 | |
436 foreach my $l (@$in_ref) { | |
437 my ($chr, $start, $end, $score) = @$l; | |
438 next unless $score; | |
439 $score = 0 if $score eq "NA"; | |
440 $total_coverage += $end-$start; | |
441 push @frags, $score; | |
442 } | |
443 | |
444 @frags = sort {$a <=> $b} @frags; | |
445 | |
446 print STDERR "Total coverage was $total_coverage bp\n"; | |
447 | |
448 my @quants; | |
449 for (my $q=0;$q<=1;$q+=$vars{'step'}) { | |
450 push @quants, [$q, int($q * @frags)] if $q > $vars{'min_quant'}; | |
451 } | |
452 | |
453 foreach (@quants) { | |
454 my $cut_off = @{$_}[0]; | |
455 my $score = $frags[@{$_}[1]]; | |
456 printf(" Quantile %0.2f: %0.2f\n",$cut_off,$score); | |
457 $seg{$cut_off} = $score; | |
458 } | |
459 | |
460 foreach my $c (sort {$a <=> $b} keys %seg) { | |
461 push (@$peakmins_ref, $seg{$c}); | |
462 } | |
463 } | |
464 | |
465 sub make_unified_peaks { | |
466 my (%opts) = @_; | |
467 my $ref = $opts{SIG_PEAKS}; | |
468 my $out_peak_unified_track = $opts{OUT}; | |
469 my $unified_peaks = $opts{UNIFIED_PEAKS}; | |
470 my $type = $opts{TYPE}; | |
471 my $total = @$ref; | |
472 | |
473 # Unify overlapping peaks, and make significant peaks file | |
474 my $skipped_peaks; | |
475 print STDERR "Combining significant peaks ...\n"; | |
476 | |
477 # unroll chromosomes for speed | |
478 foreach my $chr (uniq( map @{$_}[0], @$ref )) { | |
479 my @c = grep {@{$_}[0] eq $chr} @$ref; | |
480 | |
481 my @unified_peaks_chr; | |
482 foreach my $ar (@c) { | |
483 if (state $i++ % 100 == 0) { | |
484 my $pc = sprintf("%0.2f",($i*100)/$total); | |
485 print STDERR "$pc\% processed ...\r"; | |
486 } | |
487 | |
488 my ($chra, $start, $end, $score, $total_score, $count, $peaklen, $fdr) = @$ar; | |
489 | |
490 # next if @unified_peaks_chr already overlaps | |
491 next if grep { | |
492 @{$_}[3] < $end | |
493 && @{$_}[4] > $start | |
494 } @unified_peaks_chr; | |
495 | |
496 # Grab all elements that overlap | |
497 my @test = grep { | |
498 @{$_}[1] < $end | |
499 && @{$_}[2] > $start | |
500 } @c; | |
501 | |
502 for my $j (0 .. $#test) { | |
503 my ($chr1, $start1, $end1, $score1, $total_score1, $count1, $peaklen1, $fdr1) = @{$test[$j]}; | |
504 | |
505 next unless $start1 < $end; | |
506 next unless $end1 > $start; | |
507 | |
508 if ($type eq 'min') { | |
509 $start = max($start, $start1); | |
510 $end = min($end, $end1); | |
511 } else { | |
512 $start = min($start, $start1); | |
513 $end = max($end, $end1); | |
514 } | |
515 | |
516 $score = max($score, $score1); | |
517 $fdr = min($fdr, $fdr1); | |
518 } | |
519 | |
520 push @unified_peaks_chr, [($chr, '.', '.', $start, $end, $score, '.', '.', "FDR=$fdr")]; | |
521 } | |
522 | |
523 @$unified_peaks = (@$unified_peaks, @unified_peaks_chr); | |
524 } | |
525 | |
526 $total = $#$unified_peaks; | |
527 | |
528 print STDERR "Sorting unified peaks ...\n"; | |
529 @$unified_peaks = sort { $a->[3] <=> $b->[3] } @$unified_peaks; | |
530 @$unified_peaks = sort { $a->[0] cmp $b->[0] } @$unified_peaks; | |
531 | |
532 print STDERR "Writing unified peaks file ...\n"; | |
533 open(PEAKOUTUNI, ">$out_peak_unified_track") || die "Unable to open peak output track for writing: $!\n\n"; | |
534 for my $j (0 .. $#$unified_peaks) { | |
535 print PEAKOUTUNI join("\t", @{$$unified_peaks[$j]}), "\n"; | |
536 } | |
537 } | |
538 | |
539 sub max { | |
540 my ($max, @vars) = @_; | |
541 my $index=0; | |
542 $max||=0; | |
543 for my $i (0..$#vars) { | |
544 ($max, $index) = ($vars[$i], $i+1) if $vars[$i] > $max; | |
545 } | |
546 return $max; | |
547 } | |
548 | |
549 sub min { | |
550 my ($min, @vars) = @_; | |
551 my $index=0; | |
552 $min||=0; | |
553 for my $i (0..$#vars) { | |
554 ($min, $index) = ($vars[$i],$i+1) if $vars[$i] < $min; | |
555 } | |
556 return $min; | |
557 } | |
558 | |
559 sub uniq { | |
560 my %seen; | |
561 return grep { !$seen{$_}++ } @_; | |
562 } | |
563 | |
564 | |
565 | |
566 sub process_cli { | |
567 foreach (@ARGV) { | |
568 if (/--(.*)=(.*)/) { | |
569 unless (defined($vars{$1})) { | |
570 print STDERR "Did not understand $_ ...\n"; | |
571 help(); | |
572 } | |
573 my ($v, $opt) = ($1,$2); | |
574 $vars{$v} = $opt; | |
575 next; | |
576 } elsif (/--h[elp]*/) { | |
577 help(); | |
578 } elsif (/--(.*)/) { | |
579 print STDERR "Please add a parameter to $_ ...\n\n"; | |
580 exit 1; | |
581 } | |
582 push @in_files, $_; | |
583 } | |
584 } | |
585 | |
586 | |
587 sub help { | |
588 print STDOUT <<EOT; | |
589 Simple FDR random permutation peak caller | |
590 Usage: [options] [files in bedgraph or GFF format] | |
591 | |
592 Options: | |
593 EOT | |
594 | |
595 my $opt_len = 0; | |
596 foreach (keys %vars) { | |
597 my $l = length($_); | |
598 $opt_len = $l if $l > $opt_len; | |
599 } | |
600 | |
601 $opt_len+=2; | |
602 | |
603 my $cols= `tput cols` || 80; | |
604 | |
605 my ($v, $val, $def, $def_format); | |
606 my $help_format = "format STDOUT =\n" | |
607 .' '.'^'.'<'x$opt_len . ' '. '^' . '<'x($cols-$opt_len-4) . "\n" | |
608 .'$v, $def_format'."\n" | |
609 .' '.'^'.'<'x$opt_len . ' '. '^' . '<'x($cols-$opt_len-6) . "~~\n" | |
610 .'$v, $def_format'."\n" | |
611 .".\n"; | |
612 | |
613 eval $help_format; | |
614 die $@ if $@; | |
615 | |
616 foreach my $k (sort (keys %vars)) { | |
617 ($v, $val, $def) = ($k, $vars{$k}, $vars_details{$k}); | |
618 $def||=""; | |
619 $def_format = $val ? "$def\n\r[Current value: $val]" : $def; | |
620 $v = "--$v"; | |
621 # format = | |
622 # ^<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< | |
623 #$v, $def_format | |
624 # ^<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ~~ | |
625 #$v, $def_format | |
626 #. | |
627 | |
628 write(); | |
629 | |
630 } | |
631 print STDOUT "\n"; | |
632 exit 1; | |
633 } |