Mercurial > repos > bigrna > gpsrna
comparison microRNA_pipeline.pl @ 0:87fe81de0931 draft default tip
Uploaded
| author | bigrna |
|---|---|
| date | Sun, 04 Jan 2015 02:47:25 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:87fe81de0931 |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 #Filename: | |
| 3 #Author: Tian Dongmei | |
| 4 #Email: tiandm@big.ac.cn | |
| 5 #Date: 2014-4-22 | |
| 6 #Modified: | |
| 7 #Description: plant microRNA prediction | |
| 8 my $version=1.00; | |
| 9 | |
| 10 use strict; | |
| 11 use Getopt::Long; | |
| 12 use threads; | |
| 13 #use threads::shared; | |
| 14 use File::Path; | |
| 15 use File::Basename; | |
| 16 #use RNA; | |
| 17 #use Term::ANSIColor; | |
| 18 | |
| 19 my %opts; | |
| 20 GetOptions(\%opts,"i:s@","tag:s@","phred:i","format=s","gfa=s","pre=s","mat=s","rfam:s","dis:i","flank:i","mfe:f","idx:s","idx2:s","mis:i","r:i","v:i","e:i","f:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","D","h"); | |
| 21 if (!(defined $opts{i} and defined $opts{format} and defined $opts{gfa} and defined $opts{pre} and defined $opts{mat}) || defined $opts{h}) { #necessary arguments | |
| 22 &usage; | |
| 23 } | |
| 24 | |
| 25 my $time=&Time(); | |
| 26 print "miPlant program start:\n The time is $time!\n"; | |
| 27 print "Command line:\n $0 @ARGV\n"; | |
| 28 | |
| 29 my $format=$opts{'format'}; | |
| 30 if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { | |
| 31 #&printErr(); | |
| 32 die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n"; | |
| 33 } | |
| 34 | |
| 35 my $phred_qv=64; | |
| 36 if (defined $opts{'phred'}) { | |
| 37 $phred_qv=$opts{'phred'}; | |
| 38 } | |
| 39 | |
| 40 my @inputfiles=@{$opts{'i'}}; | |
| 41 my @inputtags=@{$opts{'tag'}}; | |
| 42 | |
| 43 my $mypath=`pwd`; | |
| 44 chomp $mypath; | |
| 45 | |
| 46 my $dir=defined $opts{'o'} ? $opts{'o'} : "$mypath/miRPlant_out/"; | |
| 47 | |
| 48 | |
| 49 unless ($dir=~/\/$/) {$dir.="/";} | |
| 50 if (not -d $dir) { | |
| 51 mkdir $dir; | |
| 52 } | |
| 53 my $config=$dir."/input_config"; | |
| 54 open CONFIG,">$config"; | |
| 55 for (my $i=0;$i<@inputfiles;$i++) { | |
| 56 print CONFIG $inputfiles[$i],"\t",$inputtags[$i],"\n"; | |
| 57 } | |
| 58 close CONFIG; | |
| 59 | |
| 60 my $scipt_path=defined $opts{'path'} ? $opts{'path'} : "/Users/big/galaxy-dist/tools/myTools/"; | |
| 61 | |
| 62 my $a="ATCTCGTATG"; #adapter | |
| 63 if (defined $opts{'a'}) {$a=$opts{'a'};} | |
| 64 | |
| 65 my $m=6; #adapter minimum mapped nt | |
| 66 if (defined $opts{'M'}) {$m=$opts{'M'};} | |
| 67 | |
| 68 my $t=1; #threads number | |
| 69 if (defined $opts{'t'}) {$t=$opts{'t'};} | |
| 70 | |
| 71 my $min_nt=19; # minimum reads length | |
| 72 if (defined $opts{'min'}) {$min_nt=$opts{'min'};} | |
| 73 | |
| 74 my $max_nt=28; #maximum reads length | |
| 75 if (defined $opts{'max'}) {$max_nt=$opts{'max'};} | |
| 76 | |
| 77 my $mis=0; #mismatch number for microRNA | |
| 78 if (defined $opts{'mis'}) {$mis=$opts{'mis'};} | |
| 79 | |
| 80 my $mis_rfam=0;# mismatch number for rfam | |
| 81 if (defined $opts{'v'}) {$mis_rfam=$opts{'v'};} | |
| 82 | |
| 83 my $hit=25; # maximum reads mapping hits in genome | |
| 84 if (defined $opts{'r'}) {$hit=$opts{'r'};} | |
| 85 | |
| 86 my $upstream = 2; # microRNA 5' extension | |
| 87 $upstream = $opts{'e'} if(defined $opts{'e'}); | |
| 88 | |
| 89 my $downstream = 5;# microRNA 3' extension | |
| 90 $downstream = $opts{'f'} if(defined $opts{'f'}); | |
| 91 | |
| 92 my $maxd=defined $opts{'dis'} ? $opts{'dis'} : 200; | |
| 93 my $flank=defined $opts{'flank'} ? $opts{'flank'} :10; | |
| 94 my $mfe=defined $opts{'mfe'} ? $opts{'mfe'} : -20; | |
| 95 | |
| 96 $time=&Time(); | |
| 97 print "$time, Checking input file!\n"; | |
| 98 | |
| 99 my (@filein,@mark,@clean); | |
| 100 #&read_config(); | |
| 101 @filein=@inputfiles; | |
| 102 @mark=@inputtags; | |
| 103 | |
| 104 &checkfa($opts{pre}); | |
| 105 &checkfa($opts{mat}); | |
| 106 &checkfa($opts{gfa}); | |
| 107 | |
| 108 | |
| 109 ##### clip adpter --> clean data start | |
| 110 $time=&Time(); | |
| 111 print "$time, Preprocess:\n trim adapter, reads collapse and filter reads by length.\n"; | |
| 112 | |
| 113 $time=~s/:/-/g; | |
| 114 $time=~s/ /-/g; | |
| 115 my $preprocess=$dir."preProcess/"; | |
| 116 mkdir $preprocess; | |
| 117 my $can_use_threads = eval 'use threads; 1'; | |
| 118 if ($can_use_threads) { | |
| 119 # Do processing using threads | |
| 120 print "Do processing using threads\n"; | |
| 121 my @filein1=@filein; my @mark1=@mark; | |
| 122 while (@filein1>0) { | |
| 123 my @thrs; my @res; | |
| 124 for (my $i=0;$i<$t ;$i++) { | |
| 125 last if(@filein1==0); | |
| 126 my $in=shift @filein1; | |
| 127 my $out=shift @mark1; | |
| 128 push @clean,$preprocess.$out."_clips_adapter.fq"; | |
| 129 $thrs[$i]=threads->create(\&clips,$in,$out); | |
| 130 } | |
| 131 for (my $i=0;$i<@thrs;$i++) { | |
| 132 $res[$i]=$thrs[$i]->join(); | |
| 133 } | |
| 134 } | |
| 135 } else { | |
| 136 # Do not processing using threads | |
| 137 print "Do not processing using threads\n"; | |
| 138 for (my $i=0;$i<@filein ;$i++) { | |
| 139 my $in=$filein[$i]; | |
| 140 my $out=$mark[$i]; | |
| 141 push @clean,$preprocess.$out."_clips_adapter.fq"; | |
| 142 &clips($in,$out); | |
| 143 } | |
| 144 } | |
| 145 | |
| 146 ##### clip adpter --> clean data end | |
| 147 | |
| 148 my $collapsed=$preprocess."collapse_reads.fa"; | |
| 149 my $data=$preprocess."collapse_reads_${min_nt}_${max_nt}.fa"; ## raw clean data | |
| 150 my $data2; ### mirbase not mapped reads | |
| 151 my $data3; ### rfam not mapped reads | |
| 152 &collapse(\@clean,$collapsed); #collapse reads to tags | |
| 153 | |
| 154 &filterbylength(); # filter <$min_nt && >$max_nt | |
| 155 | |
| 156 print "The final clean data file is $data, only contains reads which length is among $min_nt\~$max_nt\n\n"; | |
| 157 | |
| 158 $time=Time(); | |
| 159 print "$time: known microRNA quantify!\n\n"; | |
| 160 | |
| 161 chdir $dir; | |
| 162 | |
| 163 $time=~s/:/-/g; | |
| 164 $time=~s/ /-/g; | |
| 165 my $known_result=$dir."known_miRNA_Express/"; | |
| 166 &quantify(); ### known microRAN quantify | |
| 167 | |
| 168 | |
| 169 #my $miR_exp_dir=&search($known_result,"miRNA_Express_"); | |
| 170 $data2=$known_result."/mirbase_not_mapped.fa"; | |
| 171 | |
| 172 my $pathfile="$dir/path.txt"; | |
| 173 open PA,">$pathfile"; | |
| 174 print PA "$config\n"; | |
| 175 print PA "$preprocess\n"; | |
| 176 print PA "$known_result\n"; | |
| 177 | |
| 178 if (defined $opts{'rfam'}) { #rfam mapping and analysis | |
| 179 $time=Time(); | |
| 180 print "$time: RNA annotate!\n\n"; | |
| 181 $time=~s/:/-/g; | |
| 182 $time=~s/ /-/g; | |
| 183 my $rfam_exp_dir=$dir."rfam_match"; | |
| 184 &rfam(); | |
| 185 #my $rfam_exp_dir=&search($dir,"rfam_match_"); | |
| 186 $data3=$rfam_exp_dir."/rfam_not_mapped.fa"; | |
| 187 print PA "$rfam_exp_dir\n"; | |
| 188 | |
| 189 my $tag=join "\\;" ,@mark; | |
| 190 system("perl $scipt_path/count_rfam_express.pl -i $rfam_exp_dir/rfam_mapped.bwt -tag $tag -o rfam_non-miRNA_annotation.txt"); | |
| 191 } | |
| 192 | |
| 193 my $data4=$data; | |
| 194 if (defined $opts{'D'}) { #genome mapping | |
| 195 $data4=$data3; | |
| 196 }else{ | |
| 197 $data4=$data2; | |
| 198 } | |
| 199 | |
| 200 $time=Time(); | |
| 201 print "$time: Genome alignment!\n\n"; | |
| 202 $time=~s/:/-/g; | |
| 203 $time=~s/ /-/g; | |
| 204 my $genome_map=$dir."genome_match"; | |
| 205 &genome($data4); | |
| 206 print PA "$genome_map\n"; | |
| 207 #my $genome_map=&search($dir,"genome_match_"); | |
| 208 my $mapfile=$genome_map."/genome_mapped.bwt"; | |
| 209 my $mapfa=$genome_map."/genome_mapped.fa"; | |
| 210 my $unmap=$genome_map."/genome_not_mapped.fa"; | |
| 211 | |
| 212 #$time=Time(); | |
| 213 #print "$time: Novel microRNA prediction!\n\n"; | |
| 214 | |
| 215 &predict($mapfa); | |
| 216 | |
| 217 close PA; | |
| 218 system("perl $scipt_path/html_miRPlant.pl -i $pathfile -format $format -o $dir/result.html"); | |
| 219 | |
| 220 $time=Time(); | |
| 221 print "$time: Program end!!\n"; | |
| 222 | |
| 223 ############################## sub programs ################################### | |
| 224 sub predict{ | |
| 225 my ($file)=@_; | |
| 226 $time=&Time(); | |
| 227 print "$time: Novel microRNA prediction!\n\n"; | |
| 228 $time=~s/:/-/g; | |
| 229 $time=~s/ /-/g; | |
| 230 my $predict=$dir."novel_miRNA_predict"; | |
| 231 print PA "$predict\n"; | |
| 232 mkdir $predict; | |
| 233 chdir $predict; | |
| 234 system("perl $scipt_path/precursors.pl -map $mapfile -g $opts{gfa} -d $maxd -f $flank -o $predict/excised_precursor.fa -s $predict/excised_precursor_struc.txt -e $mfe"); | |
| 235 # print "\nprecursors.pl -map $mapfile -g $opts{gfa} -d $maxd -f $flank -o $predict/excised_precursor.fa -s $predict/excised_precursor_struc.txt -e $mfe\n"; | |
| 236 | |
| 237 system("bowtie-build -f excised_precursor.fa excised_precursor"); | |
| 238 # print "\nbowtie-build -f excised_precursor.fa excised_precursor\n"; | |
| 239 | |
| 240 system("bowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt 2> run.log"); | |
| 241 # print "\nbowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt\n"; | |
| 242 | |
| 243 system("perl $scipt_path/convert_bowtie_to_blast.pl precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst"); | |
| 244 # print "\nconvert_bowtie_to_blast.pl precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst\n"; | |
| 245 | |
| 246 system("sort -k 4 precursor_mapped.bst > signatures.bst"); | |
| 247 # print "\nsort +3 -25 precursor_mapped.bst > ../signatures.bst\n"; | |
| 248 | |
| 249 chdir $dir; | |
| 250 system("perl $scipt_path/miRDeep_plant.pl $predict/signatures.bst $predict/excised_precursor_struc.txt novel_tmp_dir -y > microRNA_prediction.mrd"); | |
| 251 # print "\nmiRDeep_plant.pl $dir/signatures.bst $predict/excised_precursor_struc.txt tmp_dir -y > microRNA_prediction.txt\n"; | |
| 252 #system("rm novel_tmp_dir -rf"); | |
| 253 my $tag=join "," ,@mark; | |
| 254 system("perl $scipt_path/miRNA_Express_and_sequence.pl -i microRNA_prediction.mrd -list novel_microRNA_express.txt -fa novel_microRNA_mature.fa -pre novel_microRNA_precursor.fa -tag $tag"); | |
| 255 } | |
| 256 | |
| 257 sub genome{ | |
| 258 my ($file)=@_; | |
| 259 if(defined $opts{'idx'}){ | |
| 260 system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} ") ; | |
| 261 # print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} -time $time\n"; | |
| 262 }else{ | |
| 263 system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir ") ; | |
| 264 # print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -time $time\n"; | |
| 265 } | |
| 266 } | |
| 267 sub rfam{ | |
| 268 if (defined $opts{'idx2'}) { | |
| 269 system("perl $scipt_path/rfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} "); | |
| 270 # print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} -time $time\n"; | |
| 271 }else{ | |
| 272 system("perl $scipt_path/rfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir"); | |
| 273 # print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -time $time\n"; | |
| 274 } | |
| 275 } | |
| 276 sub quantify{ | |
| 277 my $tag=join "\\;" ,@mark; | |
| 278 system("perl $scipt_path/quantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -mis $mis -t $t -e $upstream -f $downstream -tag $tag"); | |
| 279 print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n"; | |
| 280 } | |
| 281 sub filterbylength{ | |
| 282 my $tmpmark=join ",", @mark; | |
| 283 system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark"); | |
| 284 system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt -o $preprocess/length.html"); | |
| 285 # print "\nfilterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark\n"; | |
| 286 | |
| 287 } | |
| 288 sub collapse{ | |
| 289 my ($ins,$data)=@_; | |
| 290 my $str=""; | |
| 291 for (my $i=0;$i<@{$ins};$i++) { | |
| 292 $str .="-i $$ins[$i] "; | |
| 293 } | |
| 294 system ("perl $scipt_path/collapseReads2Tags.pl $str -mark seq -o $data -format $format"); | |
| 295 # print "\ncollapseReads2Tags.pl $str -mark seq -o $data -format $format\n"; | |
| 296 } | |
| 297 | |
| 298 sub clips{ | |
| 299 my ($in,$out)=@_; | |
| 300 my $adapter=$preprocess.$out."_clips_adapter.fq"; | |
| 301 if($format eq "fq" || $format eq "fastq"){ | |
| 302 system("fastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter") ; | |
| 303 print "\nfastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter\n"; | |
| 304 } | |
| 305 if($format eq "fa" || $format eq "fasta"){ | |
| 306 system("fastx_clipper -a $a -M $m -i $in -o $adapter") ; | |
| 307 # print "\nfastx_clipper -a $a -M $m -i $in -o $adapter\n"; | |
| 308 } | |
| 309 #my $clean=$preprocess.$out."_clean.fq"; | |
| 310 #system("filterReadsByLength.pl -i $adapter -o $clean -min $min_nt -max $max_nt "); | |
| 311 | |
| 312 return; | |
| 313 } | |
| 314 | |
| 315 sub read_config{ | |
| 316 open CON,"<$config"; | |
| 317 while (my $aline=<CON>) { | |
| 318 chomp $aline; | |
| 319 my @tmp=split/\t/,$aline; | |
| 320 push @filein,$tmp[0]; | |
| 321 push @mark,$tmp[1]; | |
| 322 &check_rawdata($tmp[0]); | |
| 323 } | |
| 324 close CON; | |
| 325 if (@filein != @mark) { | |
| 326 #&printErr(); | |
| 327 die "Maybe config file have some wrong!!!\n"; | |
| 328 } | |
| 329 } | |
| 330 sub check_rawdata{ | |
| 331 my ($fileforcheck)=@_; | |
| 332 if (!(-s $fileforcheck)) { | |
| 333 #&printErr(); | |
| 334 die "Can not find $fileforcheck, or file is empty!!!\n"; | |
| 335 } | |
| 336 if ($format eq "fasta" || $format eq "fa") { | |
| 337 &checkfa($fileforcheck); | |
| 338 } | |
| 339 if ($format eq "fastq" || $format eq "fq") { | |
| 340 &checkfq($fileforcheck); | |
| 341 } | |
| 342 } | |
| 343 sub checkfa{ | |
| 344 my ($file_reads)=@_; | |
| 345 open N,"<$file_reads"; | |
| 346 my $line=<N>; | |
| 347 chomp $line; | |
| 348 if($line !~ /^>\S+/){ | |
| 349 #printErr(); | |
| 350 die "The first line of file $file_reads does not start with '>identifier' | |
| 351 Reads file $file_reads is not a valid fasta file\n\n"; | |
| 352 } | |
| 353 if(<N> !~ /^[ACGTNacgtn]*$/){ | |
| 354 #printErr(); | |
| 355 die "File $file_reads contains not allowed characters in sequences | |
| 356 Allowed characters are ACGTN | |
| 357 Reads file $file_reads is not a fasta file\n\n"; | |
| 358 } | |
| 359 close N; | |
| 360 } | |
| 361 sub checkfq{ | |
| 362 my ($file_reads)=@_; | |
| 363 | |
| 364 open N,"<$file_reads"; | |
| 365 for (my $i=0;$i<10;$i++) { | |
| 366 my $a=<N>; | |
| 367 my $b=<N>; | |
| 368 my $c=<N>; | |
| 369 my $d=<N>; | |
| 370 chomp $a; | |
| 371 chomp $b; | |
| 372 chomp $c; | |
| 373 chomp $d; | |
| 374 if($a!~/^\@/){ | |
| 375 #&printErr(); | |
| 376 die "$file_reads is not a fastq file\n\n"; | |
| 377 } | |
| 378 if($b!~ /^[ACGTNacgtn]*$/){ | |
| 379 #&printErr(); | |
| 380 die "File $file_reads contains not allowed characters in sequences | |
| 381 Allowed characters are ACGTN | |
| 382 Reads file $file_reads is not a fasta file\n\n"; | |
| 383 } | |
| 384 if ($c!~/^\@/ && $c!~/^\+/) { | |
| 385 #&printErr(); | |
| 386 die "$file_reads is not a fastq file\n\n"; | |
| 387 } | |
| 388 if ((length $b) != (length $d)) { | |
| 389 #&printErr(); | |
| 390 die "$file_reads is not a fastq file\n\n"; | |
| 391 } | |
| 392 my @qv=split //,$d; | |
| 393 for (my $j=0;$j<@qv ;$j++) { | |
| 394 my $q=ord($qv[$j])-64; | |
| 395 if($q<0){$phred_qv=33;} | |
| 396 } | |
| 397 } | |
| 398 close N; | |
| 399 } | |
| 400 | |
| 401 sub search{ | |
| 402 my ($dir,$str)=@_; | |
| 403 opendir I,$dir; | |
| 404 my @ret; | |
| 405 while (my $file=readdir I) { | |
| 406 if ($file=~/$str/) { | |
| 407 push @ret, $file; | |
| 408 } | |
| 409 } | |
| 410 closedir I; | |
| 411 if (@ret != 1) { | |
| 412 #&printErr(); | |
| 413 | |
| 414 die "Can not find directory or file which name has string: $str !!!\n"; | |
| 415 } | |
| 416 return $ret[0]; | |
| 417 } | |
| 418 | |
| 419 =cut | |
| 420 | |
| 421 sub printErr{ | |
| 422 print STDERR color 'bold red'; | |
| 423 print STDERR "Error: "; | |
| 424 print STDERR color 'reset'; | |
| 425 } | |
| 426 sub Time{ | |
| 427 my $time=time(); | |
| 428 my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6]; | |
| 429 $month++; | |
| 430 $year+=1900; | |
| 431 if (length($sec) == 1) {$sec = "0"."$sec";} | |
| 432 if (length($min) == 1) {$min = "0"."$min";} | |
| 433 if (length($hour) == 1) {$hour = "0"."$hour";} | |
| 434 if (length($day) == 1) {$day = "0"."$day";} | |
| 435 if (length($month) == 1) {$month = "0"."$month";} | |
| 436 #print "$year-$month-$day $hour:$min:$sec\n"; | |
| 437 return("$year-$month-$day-$hour-$min-$sec"); | |
| 438 } | |
| 439 =cut | |
| 440 sub Time{ | |
| 441 my $time=time(); | |
| 442 my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6]; | |
| 443 $month++; | |
| 444 $year+=1900; | |
| 445 if (length($sec) == 1) {$sec = "0"."$sec";} | |
| 446 if (length($min) == 1) {$min = "0"."$min";} | |
| 447 if (length($hour) == 1) {$hour = "0"."$hour";} | |
| 448 if (length($day) == 1) {$day = "0"."$day";} | |
| 449 if (length($month) == 1) {$month = "0"."$month";} | |
| 450 #print "$year-$month-$day $hour:$min:$sec\n"; | |
| 451 return("$year-$month-$day $hour:$min:$sec"); | |
| 452 } | |
| 453 | |
| 454 | |
| 455 sub usage{ | |
| 456 print <<"USAGE"; | |
| 457 Version $version | |
| 458 Usage: | |
| 459 | |
| 460 $0 -i -format -gfa -index -pre -mat -rfam -D -a -M -min -max -mis -e -f -v -t -o -path | |
| 461 options: | |
| 462 -i input files, # raw data file, can be multipe eg. -i xxx.fq -i xxx .fq ... | |
| 463 -tag string # raw data file names, -tag xxx -tag xxx | |
| 464 | |
| 465 -format string,#specific input rawdata file format : fastq|fq|fasta|fa | |
| 466 | |
| 467 -path scirpt path | |
| 468 | |
| 469 -gfa string, input file # genome fasta. sequence file | |
| 470 -idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter | |
| 471 string must be the prefix of the bowtie index. For instance, if | |
| 472 the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then | |
| 473 the prefix is 'h_sapiens_37_asm'.##can be null | |
| 474 | |
| 475 -pre string, input file #species specific microRNA precursor sequences | |
| 476 -mat string, input file #species specific microRNA mature sequences | |
| 477 | |
| 478 -rfam string, input file# rfam database file, microRNAs must not be contained in this file## if not define, rfam small RNA will not be count. | |
| 479 -idx2 string, rfam file index, file-prefix #(must be indexed by bowtie-build) The parameter | |
| 480 string must be the prefix of the bowtie index. For instance, if | |
| 481 the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then | |
| 482 the prefix is 'h_sapiens_37_asm'.##can be null | |
| 483 | |
| 484 -D If [-D] is specified,will discard rfam mapped reads(nead -rfam). | |
| 485 | |
| 486 -a string, ADAPTER string. default is ATCTCGTATG. | |
| 487 -M int, require minimum adapter alignment length of N. If less than N nucleotides aligned with the adapter - don't clip it. | |
| 488 -min int, reads min length,default is 19. | |
| 489 -max int, reads max length,default is 28. | |
| 490 | |
| 491 -mis [int] number of allowed mismatches when mapping reads to precursors, default 0 | |
| 492 -e [int] number of nucleotides upstream of the mature sequence to consider, default 2 | |
| 493 -f [int] number of nucleotides downstream of the mature sequence to consider, default 5 | |
| 494 -v <int> report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; used in rfam alignment | |
| 495 -r int a read is allowed to map up to this number of positions in the genome,default is 25 | |
| 496 | |
| 497 -dis <int> Maximal space between miRNA and miRNA* (200) | |
| 498 -flank <int> Flank sequence length of miRNA precursor (10) | |
| 499 -mfe <folat> Maximal free energy allowed for a miRNA precursor (-20) | |
| 500 | |
| 501 -t int, number of threads [1] | |
| 502 | |
| 503 -o output directory# absolute path | |
| 504 -h help | |
| 505 USAGE | |
| 506 exit(1); | |
| 507 } | |
| 508 |
