Mercurial > repos > big-tiandm > mirplant2
comparison miRPlant.pl @ 46:ca05d68aca13 draft
Uploaded
author | big-tiandm |
---|---|
date | Thu, 13 Nov 2014 22:43:35 -0500 |
parents | 0c4e11018934 |
children |
comparison
equal
deleted
inserted
replaced
45:2cb6add23dfe | 46:ca05d68aca13 |
---|---|
8 my $version=1.00; | 8 my $version=1.00; |
9 | 9 |
10 use strict; | 10 use strict; |
11 use Getopt::Long; | 11 use Getopt::Long; |
12 use threads; | 12 use threads; |
13 use threads::shared; | 13 #use threads::shared; |
14 use File::Path; | 14 use File::Path; |
15 use File::Basename; | 15 use File::Basename; |
16 use RNA; | 16 #use RNA; |
17 use Term::ANSIColor; | 17 #use Term::ANSIColor; |
18 | 18 |
19 my %opts; | 19 my %opts; |
20 GetOptions(\%opts,"i:s@","tag:s@","format=s","gfa=s","pre=s","mat=s","rfam:s","dis:i","flank:i","mfe:f","idx:s","idx2:s","mis:i","r:i","v:i","e:i","f:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","D","h"); | 20 GetOptions(\%opts,"i:s@","tag:s@","format=s","gfa=s","pre=s","mat=s","rfam:s","dis:i","flank:i","mfe:f","idx:s","idx2:s","mis:i","r:i","v:i","e:i","f:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","D","h"); |
21 if (!(defined $opts{i} and defined $opts{format} and defined $opts{gfa} and defined $opts{pre} and defined $opts{mat}) || defined $opts{h}) { #necessary arguments | 21 if (!(defined $opts{i} and defined $opts{format} and defined $opts{gfa} and defined $opts{pre} and defined $opts{mat}) || defined $opts{h}) { #necessary arguments |
22 &usage; | 22 &usage; |
26 print "miPlant program start:\n The time is $time!\n"; | 26 print "miPlant program start:\n The time is $time!\n"; |
27 print "Command line:\n $0 @ARGV\n"; | 27 print "Command line:\n $0 @ARGV\n"; |
28 | 28 |
29 my $format=$opts{'format'}; | 29 my $format=$opts{'format'}; |
30 if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { | 30 if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { |
31 &printErr(); | 31 #&printErr(); |
32 die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n"; | 32 die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n"; |
33 } | 33 } |
34 | 34 |
35 my $phred_qv=64; | 35 my $phred_qv=64; |
36 | 36 |
272 } | 272 } |
273 } | 273 } |
274 sub quantify{ | 274 sub quantify{ |
275 my $tag=join "\\;" ,@mark; | 275 my $tag=join "\\;" ,@mark; |
276 system("perl $scipt_path/quantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag"); | 276 system("perl $scipt_path/quantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag"); |
277 # print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n"; | 277 print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n"; |
278 } | 278 } |
279 sub filterbylength{ | 279 sub filterbylength{ |
280 my $tmpmark=join ",", @mark; | 280 my $tmpmark=join ",", @mark; |
281 system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark"); | 281 system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark"); |
282 system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt -o $preprocess/length.html"); | 282 system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt -o $preprocess/length.html"); |
319 push @mark,$tmp[1]; | 319 push @mark,$tmp[1]; |
320 &check_rawdata($tmp[0]); | 320 &check_rawdata($tmp[0]); |
321 } | 321 } |
322 close CON; | 322 close CON; |
323 if (@filein != @mark) { | 323 if (@filein != @mark) { |
324 &printErr(); | 324 #&printErr(); |
325 die "Maybe config file have some wrong!!!\n"; | 325 die "Maybe config file have some wrong!!!\n"; |
326 } | 326 } |
327 } | 327 } |
328 sub check_rawdata{ | 328 sub check_rawdata{ |
329 my ($fileforcheck)=@_; | 329 my ($fileforcheck)=@_; |
330 if (!(-s $fileforcheck)) { | 330 if (!(-s $fileforcheck)) { |
331 &printErr(); | 331 #&printErr(); |
332 die "Can not find $fileforcheck, or file is empty!!!\n"; | 332 die "Can not find $fileforcheck, or file is empty!!!\n"; |
333 } | 333 } |
334 if ($format eq "fasta" || $format eq "fa") { | 334 if ($format eq "fasta" || $format eq "fa") { |
335 &checkfa($fileforcheck); | 335 &checkfa($fileforcheck); |
336 } | 336 } |
342 my ($file_reads)=@_; | 342 my ($file_reads)=@_; |
343 open N,"<$file_reads"; | 343 open N,"<$file_reads"; |
344 my $line=<N>; | 344 my $line=<N>; |
345 chomp $line; | 345 chomp $line; |
346 if($line !~ /^>\S+/){ | 346 if($line !~ /^>\S+/){ |
347 printErr(); | 347 #printErr(); |
348 die "The first line of file $file_reads does not start with '>identifier' | 348 die "The first line of file $file_reads does not start with '>identifier' |
349 Reads file $file_reads is not a valid fasta file\n\n"; | 349 Reads file $file_reads is not a valid fasta file\n\n"; |
350 } | 350 } |
351 if(<N> !~ /^[ACGTNacgtn]*$/){ | 351 if(<N> !~ /^[ACGTNacgtn]*$/){ |
352 printErr(); | 352 #printErr(); |
353 die "File $file_reads contains not allowed characters in sequences | 353 die "File $file_reads contains not allowed characters in sequences |
354 Allowed characters are ACGTN | 354 Allowed characters are ACGTN |
355 Reads file $file_reads is not a fasta file\n\n"; | 355 Reads file $file_reads is not a fasta file\n\n"; |
356 } | 356 } |
357 close N; | 357 close N; |
368 chomp $a; | 368 chomp $a; |
369 chomp $b; | 369 chomp $b; |
370 chomp $c; | 370 chomp $c; |
371 chomp $d; | 371 chomp $d; |
372 if($a!~/^\@/){ | 372 if($a!~/^\@/){ |
373 &printErr(); | 373 #&printErr(); |
374 die "$file_reads is not a fastq file\n\n"; | 374 die "$file_reads is not a fastq file\n\n"; |
375 } | 375 } |
376 if($b!~ /^[ACGTNacgtn]*$/){ | 376 if($b!~ /^[ACGTNacgtn]*$/){ |
377 &printErr(); | 377 #&printErr(); |
378 die "File $file_reads contains not allowed characters in sequences | 378 die "File $file_reads contains not allowed characters in sequences |
379 Allowed characters are ACGTN | 379 Allowed characters are ACGTN |
380 Reads file $file_reads is not a fasta file\n\n"; | 380 Reads file $file_reads is not a fasta file\n\n"; |
381 } | 381 } |
382 if ($c!~/^\@/ && $c!~/^\+/) { | 382 if ($c!~/^\@/ && $c!~/^\+/) { |
383 &printErr(); | 383 #&printErr(); |
384 die "$file_reads is not a fastq file\n\n"; | 384 die "$file_reads is not a fastq file\n\n"; |
385 } | 385 } |
386 if ((length $b) != (length $d)) { | 386 if ((length $b) != (length $d)) { |
387 &printErr(); | 387 #&printErr(); |
388 die "$file_reads is not a fastq file\n\n"; | 388 die "$file_reads is not a fastq file\n\n"; |
389 } | 389 } |
390 my @qv=split //,$d; | 390 my @qv=split //,$d; |
391 for (my $j=0;$j<@qv ;$j++) { | 391 for (my $j=0;$j<@qv ;$j++) { |
392 my $q=ord($qv[$j])-64; | 392 my $q=ord($qv[$j])-64; |
405 push @ret, $file; | 405 push @ret, $file; |
406 } | 406 } |
407 } | 407 } |
408 closedir I; | 408 closedir I; |
409 if (@ret != 1) { | 409 if (@ret != 1) { |
410 &printErr(); | 410 #&printErr(); |
411 | 411 |
412 die "Can not find directory or file which name has string: $str !!!\n"; | 412 die "Can not find directory or file which name has string: $str !!!\n"; |
413 } | 413 } |
414 return $ret[0]; | 414 return $ret[0]; |
415 } | 415 } |
416 | |
417 =cut | |
416 | 418 |
417 sub printErr{ | 419 sub printErr{ |
418 print STDERR color 'bold red'; | 420 print STDERR color 'bold red'; |
419 print STDERR "Error: "; | 421 print STDERR "Error: "; |
420 print STDERR color 'reset'; | 422 print STDERR color 'reset'; |
421 } | 423 } |
422 =cut | |
423 sub Time{ | 424 sub Time{ |
424 my $time=time(); | 425 my $time=time(); |
425 my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6]; | 426 my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6]; |
426 $month++; | 427 $month++; |
427 $year+=1900; | 428 $year+=1900; |
451 | 452 |
452 sub usage{ | 453 sub usage{ |
453 print <<"USAGE"; | 454 print <<"USAGE"; |
454 Version $version | 455 Version $version |
455 Usage: | 456 Usage: |
457 | |
456 $0 -i -format -gfa -index -pre -mat -rfam -D -a -M -min -max -mis -e -f -v -t -o -path | 458 $0 -i -format -gfa -index -pre -mat -rfam -D -a -M -min -max -mis -e -f -v -t -o -path |
457 options: | 459 options: |
458 -i string, input file#input files information file | 460 -i input files, # raw data file, can be multipe eg. -i xxx.fq -i xxx .fq ... |
459 /path/filename mark | 461 -tag string # raw data file names, -tag xxx -tag xxx |
460 /path/filename mark | |
461 ... | |
462 | 462 |
463 -format string,#specific input rawdata file format : fastq|fq|fasta|fa | 463 -format string,#specific input rawdata file format : fastq|fq|fasta|fa |
464 | |
465 -path scirpt path | |
464 | 466 |
465 -gfa string, input file # genome fasta. sequence file | 467 -gfa string, input file # genome fasta. sequence file |
466 -idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter | 468 -idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter |
467 string must be the prefix of the bowtie index. For instance, if | 469 string must be the prefix of the bowtie index. For instance, if |
468 the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then | 470 the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then |