14
|
1 #!/usr/bin/perl -w
|
|
2 #Filename:
|
|
3 #Author: Tian Dongmei
|
|
4 #Email: tiandm@big.ac.cn
|
|
5 #Date: 2014-4-22
|
|
6 #Modified:
|
|
7 #Description: plant microRNA prediction
|
|
8 my $version=1.00;
|
|
9
|
|
10 use strict;
|
|
11 use Getopt::Long;
|
|
12 use threads;
|
46
|
13 #use threads::shared;
|
14
|
14 use File::Path;
|
|
15 use File::Basename;
|
46
|
16 #use RNA;
|
|
17 #use Term::ANSIColor;
|
14
|
18
|
|
19 my %opts;
|
|
20 GetOptions(\%opts,"i:s@","tag:s@","format=s","gfa=s","pre=s","mat=s","rfam:s","dis:i","flank:i","mfe:f","idx:s","idx2:s","mis:i","r:i","v:i","e:i","f:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","D","h");
|
|
21 if (!(defined $opts{i} and defined $opts{format} and defined $opts{gfa} and defined $opts{pre} and defined $opts{mat}) || defined $opts{h}) { #necessary arguments
|
|
22 &usage;
|
|
23 }
|
|
24
|
|
25 my $time=&Time();
|
|
26 print "miPlant program start:\n The time is $time!\n";
|
|
27 print "Command line:\n $0 @ARGV\n";
|
|
28
|
|
29 my $format=$opts{'format'};
|
|
30 if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") {
|
46
|
31 #&printErr();
|
14
|
32 die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n";
|
|
33 }
|
|
34
|
|
35 my $phred_qv=64;
|
|
36
|
|
37
|
|
38 my @inputfiles=@{$opts{'i'}};
|
|
39 my @inputtags=@{$opts{'tag'}};
|
|
40
|
|
41 my $mypath=`pwd`;
|
|
42 chomp $mypath;
|
|
43
|
|
44 my $dir=defined $opts{'o'} ? $opts{'o'} : "$mypath/miRPlant_out/";
|
|
45
|
|
46
|
|
47 unless ($dir=~/\/$/) {$dir.="/";}
|
|
48 if (not -d $dir) {
|
|
49 mkdir $dir;
|
|
50 }
|
|
51 my $config=$dir."/input_config";
|
|
52 open CONFIG,">$config";
|
|
53 for (my $i=0;$i<@inputfiles;$i++) {
|
|
54 print CONFIG $inputfiles[$i],"\t",$inputtags[$i],"\n";
|
|
55 }
|
|
56 close CONFIG;
|
|
57
|
|
58 my $scipt_path=defined $opts{'path'} ? $opts{'path'} : "/Users/big/galaxy-dist/tools/myTools/";
|
|
59
|
|
60 my $a="ATCTCGTATG"; #adapter
|
|
61 if (defined $opts{'a'}) {$a=$opts{'a'};}
|
|
62
|
|
63 my $m=6; #adapter minimum mapped nt
|
|
64 if (defined $opts{'M'}) {$m=$opts{'M'};}
|
|
65
|
|
66 my $t=1; #threads number
|
|
67 if (defined $opts{'t'}) {$t=$opts{'t'};}
|
|
68
|
|
69 my $min_nt=19; # minimum reads length
|
|
70 if (defined $opts{'min'}) {$min_nt=$opts{'min'};}
|
|
71
|
|
72 my $max_nt=28; #maximum reads length
|
|
73 if (defined $opts{'max'}) {$max_nt=$opts{'max'};}
|
|
74
|
|
75 my $mis=0; #mismatch number for microRNA
|
|
76 if (defined $opts{'mis'}) {$mis=$opts{'mis'};}
|
|
77
|
|
78 my $mis_rfam=0;# mismatch number for rfam
|
|
79 if (defined $opts{'v'}) {$mis_rfam=$opts{'v'};}
|
|
80
|
|
81 my $hit=25; # maximum reads mapping hits in genome
|
|
82 if (defined $opts{'r'}) {$hit=$opts{'r'};}
|
|
83
|
|
84 my $upstream = 2; # microRNA 5' extension
|
|
85 $upstream = $opts{'e'} if(defined $opts{'e'});
|
|
86
|
|
87 my $downstream = 5;# microRNA 3' extension
|
|
88 $downstream = $opts{'f'} if(defined $opts{'f'});
|
|
89
|
|
90 my $maxd=defined $opts{'dis'} ? $opts{'dis'} : 200;
|
|
91 my $flank=defined $opts{'flank'} ? $opts{'flank'} :10;
|
|
92 my $mfe=defined $opts{'mfe'} ? $opts{'mfe'} : -20;
|
|
93
|
|
94 $time=&Time();
|
|
95 print "$time, Checking input file!\n";
|
|
96
|
|
97 my (@filein,@mark,@clean);
|
|
98 #&read_config();
|
|
99 @filein=@inputfiles;
|
|
100 @mark=@inputtags;
|
|
101
|
|
102 &checkfa($opts{pre});
|
|
103 &checkfa($opts{mat});
|
|
104 &checkfa($opts{gfa});
|
|
105
|
|
106
|
|
107 ##### clip adpter --> clean data start
|
|
108 $time=&Time();
|
|
109 print "$time, Preprocess:\n trim adapter, reads collapse and filter reads by length.\n";
|
|
110
|
|
111 $time=~s/:/-/g;
|
|
112 $time=~s/ /-/g;
|
|
113 my $preprocess=$dir."preProcess_${time}/";
|
|
114 mkdir $preprocess;
|
|
115 my $can_use_threads = eval 'use threads; 1';
|
|
116 if ($can_use_threads) {
|
|
117 # Do processing using threads
|
|
118 print "Do processing using threads\n";
|
|
119 my @filein1=@filein; my @mark1=@mark;
|
|
120 while (@filein1>0) {
|
|
121 my @thrs; my @res;
|
|
122 for (my $i=0;$i<$t ;$i++) {
|
|
123 last if(@filein1==0);
|
|
124 my $in=shift @filein1;
|
|
125 my $out=shift @mark1;
|
|
126 push @clean,$preprocess.$out."_clips_adapter.fq";
|
|
127 $thrs[$i]=threads->create(\&clips,$in,$out);
|
|
128 }
|
|
129 for (my $i=0;$i<@thrs;$i++) {
|
|
130 $res[$i]=$thrs[$i]->join();
|
|
131 }
|
|
132 }
|
|
133 } else {
|
|
134 # Do not processing using threads
|
|
135 print "Do not processing using threads\n";
|
|
136 for (my $i=0;$i<@filein ;$i++) {
|
|
137 my $in=$filein[$i];
|
|
138 my $out=$mark[$i];
|
|
139 push @clean,$preprocess.$out."_clips_adapter.fq";
|
|
140 &clips($in,$out);
|
|
141 }
|
|
142 }
|
|
143
|
|
144 ##### clip adpter --> clean data end
|
|
145
|
|
146 my $collapsed=$preprocess."collapse_reads.fa";
|
|
147 my $data=$preprocess."collapse_reads_${min_nt}_${max_nt}.fa"; ## raw clean data
|
|
148 my $data2; ### mirbase not mapped reads
|
|
149 my $data3; ### rfam not mapped reads
|
|
150 &collapse(\@clean,$collapsed); #collapse reads to tags
|
|
151
|
|
152 &filterbylength(); # filter <$min_nt && >$max_nt
|
|
153
|
|
154 print "The final clean data file is $data, only contains reads which length is among $min_nt\~$max_nt\n\n";
|
|
155
|
|
156 $time=Time();
|
|
157 print "$time: known microRNA quantify!\n\n";
|
|
158
|
|
159 chdir $dir;
|
|
160
|
|
161 $time=~s/:/-/g;
|
|
162 $time=~s/ /-/g;
|
|
163 my $known_result=$dir."miRNA_Express_${time}/";
|
|
164 &quantify(); ### known microRAN quantify
|
|
165
|
|
166
|
|
167 #my $miR_exp_dir=&search($known_result,"miRNA_Express_");
|
|
168 $data2=$known_result."/mirbase_not_mapped.fa";
|
|
169
|
|
170 my $pathfile="$dir/path.txt";
|
|
171 open PA,">$pathfile";
|
|
172 print PA "$config\n";
|
|
173 print PA "$preprocess\n";
|
|
174 print PA "$known_result\n";
|
|
175
|
|
176 if (defined $opts{'rfam'}) { #rfam mapping and analysis
|
|
177 $time=Time();
|
|
178 print "$time: RNA annotate!\n\n";
|
|
179 $time=~s/:/-/g;
|
|
180 $time=~s/ /-/g;
|
|
181 my $rfam_exp_dir=$dir."rfam_match_${time}";
|
|
182 &rfam();
|
|
183 #my $rfam_exp_dir=&search($dir,"rfam_match_");
|
|
184 $data3=$rfam_exp_dir."/rfam_not_mapped.fa";
|
|
185 print PA "$rfam_exp_dir\n";
|
|
186
|
|
187 my $tag=join "\\;" ,@mark;
|
|
188 system("perl $scipt_path/count_rfam_express.pl -i $rfam_exp_dir/rfam_mapped.bwt -tag $tag -o rfam_non-miRNA_annotation.txt");
|
|
189 }
|
|
190
|
|
191 my $data4=$data;
|
|
192 if (defined $opts{'D'}) { #genome mapping
|
|
193 $data4=$data3;
|
|
194 }else{
|
|
195 $data4=$data2;
|
|
196 }
|
|
197
|
|
198 $time=Time();
|
|
199 print "$time: Genome alignment!\n\n";
|
|
200 $time=~s/:/-/g;
|
|
201 $time=~s/ /-/g;
|
|
202 my $genome_map=$dir."genome_match_${time}";
|
|
203 &genome($data4);
|
|
204 print PA "$genome_map\n";
|
|
205 #my $genome_map=&search($dir,"genome_match_");
|
|
206 my $mapfile=$genome_map."/genome_mapped.bwt";
|
|
207 my $mapfa=$genome_map."/genome_mapped.fa";
|
|
208 my $unmap=$genome_map."/genome_not_mapped.fa";
|
|
209
|
|
210 #$time=Time();
|
|
211 #print "$time: Novel microRNA prediction!\n\n";
|
|
212
|
|
213 &predict($mapfa);
|
|
214
|
|
215 close PA;
|
|
216 system("perl $scipt_path/html.pl -i $pathfile -format $format -o $dir/result.html");
|
|
217
|
|
218 $time=Time();
|
|
219 print "$time: Program end!!\n";
|
|
220
|
|
221 ############################## sub programs ###################################
|
|
222 sub predict{
|
|
223 my ($file)=@_;
|
|
224 $time=&Time();
|
|
225 print "$time: Novel microRNA prediction!\n\n";
|
|
226 $time=~s/:/-/g;
|
|
227 $time=~s/ /-/g;
|
|
228 my $predict=$dir."miRNA_predict_${time}";
|
|
229 print PA "$predict\n";
|
|
230 mkdir $predict;
|
|
231 chdir $predict;
|
|
232 system("perl $scipt_path/precursors.pl -map $mapfile -g $opts{gfa} -d $maxd -f $flank -o $predict/excised_precursor.fa -s $predict/excised_precursor_struc.txt -e $mfe");
|
|
233 # print "\nprecursors.pl -map $mapfile -g $opts{gfa} -d $maxd -f $flank -o $predict/excised_precursor.fa -s $predict/excised_precursor_struc.txt -e $mfe\n";
|
|
234
|
|
235 system("bowtie-build -f excised_precursor.fa excised_precursor");
|
|
236 # print "\nbowtie-build -f excised_precursor.fa excised_precursor\n";
|
|
237
|
44
|
238 system("bowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt 2> run.log");
|
14
|
239 # print "\nbowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt\n";
|
|
240
|
|
241 system("perl $scipt_path/convert_bowtie_to_blast.pl precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst");
|
|
242 # print "\nconvert_bowtie_to_blast.pl precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst\n";
|
|
243
|
44
|
244 system("sort -k 4 precursor_mapped.bst > signatures.bst");
|
14
|
245 # print "\nsort +3 -25 precursor_mapped.bst > ../signatures.bst\n";
|
|
246
|
|
247 chdir $dir;
|
|
248 system("perl $scipt_path/miRDeep_plant.pl $predict/signatures.bst $predict/excised_precursor_struc.txt novel_tmp_dir -y > microRNA_prediction.mrd");
|
|
249 # print "\nmiRDeep_plant.pl $dir/signatures.bst $predict/excised_precursor_struc.txt tmp_dir -y > microRNA_prediction.txt\n";
|
44
|
250 #system("rm novel_tmp_dir -rf");
|
14
|
251 my $tag=join "," ,@mark;
|
|
252 system("perl $scipt_path/miRNA_Express_and_sequence.pl -i microRNA_prediction.mrd -list novel_microRNA_express.txt -fa novel_microRNA_mature.fa -pre novel_microRNA_precursor.fa -tag $tag");
|
|
253 }
|
|
254
|
|
255 sub genome{
|
|
256 my ($file)=@_;
|
|
257 if(defined $opts{'idx'}){
|
|
258 system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} -time $time") ;
|
|
259 # print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} -time $time\n";
|
|
260 }else{
|
|
261 system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -time $time") ;
|
|
262 # print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -time $time\n";
|
|
263 }
|
|
264 }
|
|
265 sub rfam{
|
|
266 if (defined $opts{'idx2'}) {
|
|
267 system("perl $scipt_path/rfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} -time $time");
|
|
268 # print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} -time $time\n";
|
|
269 }else{
|
|
270 system("perl $scipt_path/rfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -time $time");
|
|
271 # print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -time $time\n";
|
|
272 }
|
|
273 }
|
|
274 sub quantify{
|
|
275 my $tag=join "\\;" ,@mark;
|
|
276 system("perl $scipt_path/quantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag");
|
46
|
277 print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n";
|
14
|
278 }
|
|
279 sub filterbylength{
|
|
280 my $tmpmark=join ",", @mark;
|
|
281 system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark");
|
44
|
282 system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt -o $preprocess/length.html");
|
14
|
283 # print "\nfilterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark\n";
|
|
284
|
|
285 }
|
|
286 sub collapse{
|
|
287 my ($ins,$data)=@_;
|
|
288 my $str="";
|
|
289 for (my $i=0;$i<@{$ins};$i++) {
|
|
290 $str .="-i $$ins[$i] ";
|
|
291 }
|
|
292 system ("perl $scipt_path/collapseReads2Tags.pl $str -mark seq -o $data -format $format");
|
|
293 # print "\ncollapseReads2Tags.pl $str -mark seq -o $data -format $format\n";
|
|
294 }
|
|
295
|
|
296 sub clips{
|
|
297 my ($in,$out)=@_;
|
|
298 my $adapter=$preprocess.$out."_clips_adapter.fq";
|
|
299 if($format eq "fq" || $format eq "fastq"){
|
|
300 system("fastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter") ;
|
|
301 print "\nfastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter\n";
|
|
302 }
|
|
303 if($format eq "fa" || $format eq "fasta"){
|
|
304 system("fastx_clipper -a $a -M $m -i $in -o $adapter") ;
|
|
305 # print "\nfastx_clipper -a $a -M $m -i $in -o $adapter\n";
|
|
306 }
|
|
307 #my $clean=$preprocess.$out."_clean.fq";
|
|
308 #system("filterReadsByLength.pl -i $adapter -o $clean -min $min_nt -max $max_nt ");
|
|
309
|
|
310 return;
|
|
311 }
|
|
312
|
|
313 sub read_config{
|
|
314 open CON,"<$config";
|
|
315 while (my $aline=<CON>) {
|
|
316 chomp $aline;
|
|
317 my @tmp=split/\t/,$aline;
|
|
318 push @filein,$tmp[0];
|
|
319 push @mark,$tmp[1];
|
|
320 &check_rawdata($tmp[0]);
|
|
321 }
|
|
322 close CON;
|
|
323 if (@filein != @mark) {
|
46
|
324 #&printErr();
|
14
|
325 die "Maybe config file have some wrong!!!\n";
|
|
326 }
|
|
327 }
|
|
328 sub check_rawdata{
|
|
329 my ($fileforcheck)=@_;
|
|
330 if (!(-s $fileforcheck)) {
|
46
|
331 #&printErr();
|
14
|
332 die "Can not find $fileforcheck, or file is empty!!!\n";
|
|
333 }
|
|
334 if ($format eq "fasta" || $format eq "fa") {
|
|
335 &checkfa($fileforcheck);
|
|
336 }
|
|
337 if ($format eq "fastq" || $format eq "fq") {
|
|
338 &checkfq($fileforcheck);
|
|
339 }
|
|
340 }
|
|
341 sub checkfa{
|
|
342 my ($file_reads)=@_;
|
|
343 open N,"<$file_reads";
|
|
344 my $line=<N>;
|
|
345 chomp $line;
|
|
346 if($line !~ /^>\S+/){
|
46
|
347 #printErr();
|
14
|
348 die "The first line of file $file_reads does not start with '>identifier'
|
|
349 Reads file $file_reads is not a valid fasta file\n\n";
|
|
350 }
|
|
351 if(<N> !~ /^[ACGTNacgtn]*$/){
|
46
|
352 #printErr();
|
14
|
353 die "File $file_reads contains not allowed characters in sequences
|
|
354 Allowed characters are ACGTN
|
|
355 Reads file $file_reads is not a fasta file\n\n";
|
|
356 }
|
|
357 close N;
|
|
358 }
|
|
359 sub checkfq{
|
|
360 my ($file_reads)=@_;
|
|
361
|
|
362 open N,"<$file_reads";
|
|
363 for (my $i=0;$i<10;$i++) {
|
|
364 my $a=<N>;
|
|
365 my $b=<N>;
|
|
366 my $c=<N>;
|
|
367 my $d=<N>;
|
|
368 chomp $a;
|
|
369 chomp $b;
|
|
370 chomp $c;
|
|
371 chomp $d;
|
|
372 if($a!~/^\@/){
|
46
|
373 #&printErr();
|
14
|
374 die "$file_reads is not a fastq file\n\n";
|
|
375 }
|
|
376 if($b!~ /^[ACGTNacgtn]*$/){
|
46
|
377 #&printErr();
|
14
|
378 die "File $file_reads contains not allowed characters in sequences
|
|
379 Allowed characters are ACGTN
|
|
380 Reads file $file_reads is not a fasta file\n\n";
|
|
381 }
|
|
382 if ($c!~/^\@/ && $c!~/^\+/) {
|
46
|
383 #&printErr();
|
14
|
384 die "$file_reads is not a fastq file\n\n";
|
|
385 }
|
|
386 if ((length $b) != (length $d)) {
|
46
|
387 #&printErr();
|
14
|
388 die "$file_reads is not a fastq file\n\n";
|
|
389 }
|
|
390 my @qv=split //,$d;
|
|
391 for (my $j=0;$j<@qv ;$j++) {
|
|
392 my $q=ord($qv[$j])-64;
|
|
393 if($q<0){$phred_qv=33;}
|
|
394 }
|
|
395 }
|
|
396 close N;
|
|
397 }
|
|
398
|
|
399 sub search{
|
|
400 my ($dir,$str)=@_;
|
|
401 opendir I,$dir;
|
|
402 my @ret;
|
|
403 while (my $file=readdir I) {
|
|
404 if ($file=~/$str/) {
|
|
405 push @ret, $file;
|
|
406 }
|
|
407 }
|
|
408 closedir I;
|
|
409 if (@ret != 1) {
|
46
|
410 #&printErr();
|
14
|
411
|
|
412 die "Can not find directory or file which name has string: $str !!!\n";
|
|
413 }
|
|
414 return $ret[0];
|
|
415 }
|
|
416
|
46
|
417 =cut
|
|
418
|
14
|
419 sub printErr{
|
|
420 print STDERR color 'bold red';
|
|
421 print STDERR "Error: ";
|
|
422 print STDERR color 'reset';
|
|
423 }
|
|
424 sub Time{
|
|
425 my $time=time();
|
|
426 my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
|
|
427 $month++;
|
|
428 $year+=1900;
|
|
429 if (length($sec) == 1) {$sec = "0"."$sec";}
|
|
430 if (length($min) == 1) {$min = "0"."$min";}
|
|
431 if (length($hour) == 1) {$hour = "0"."$hour";}
|
|
432 if (length($day) == 1) {$day = "0"."$day";}
|
|
433 if (length($month) == 1) {$month = "0"."$month";}
|
|
434 #print "$year-$month-$day $hour:$min:$sec\n";
|
|
435 return("$year-$month-$day-$hour-$min-$sec");
|
|
436 }
|
|
437 =cut
|
|
438 sub Time{
|
|
439 my $time=time();
|
|
440 my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
|
|
441 $month++;
|
|
442 $year+=1900;
|
|
443 if (length($sec) == 1) {$sec = "0"."$sec";}
|
|
444 if (length($min) == 1) {$min = "0"."$min";}
|
|
445 if (length($hour) == 1) {$hour = "0"."$hour";}
|
|
446 if (length($day) == 1) {$day = "0"."$day";}
|
|
447 if (length($month) == 1) {$month = "0"."$month";}
|
|
448 #print "$year-$month-$day $hour:$min:$sec\n";
|
|
449 return("$year-$month-$day $hour:$min:$sec");
|
|
450 }
|
|
451
|
|
452
|
|
453 sub usage{
|
|
454 print <<"USAGE";
|
|
455 Version $version
|
|
456 Usage:
|
46
|
457
|
14
|
458 $0 -i -format -gfa -index -pre -mat -rfam -D -a -M -min -max -mis -e -f -v -t -o -path
|
|
459 options:
|
46
|
460 -i input files, # raw data file, can be multipe eg. -i xxx.fq -i xxx .fq ...
|
|
461 -tag string # raw data file names, -tag xxx -tag xxx
|
14
|
462
|
|
463 -format string,#specific input rawdata file format : fastq|fq|fasta|fa
|
|
464
|
46
|
465 -path scirpt path
|
|
466
|
14
|
467 -gfa string, input file # genome fasta. sequence file
|
|
468 -idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter
|
|
469 string must be the prefix of the bowtie index. For instance, if
|
|
470 the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
|
|
471 the prefix is 'h_sapiens_37_asm'.##can be null
|
|
472
|
|
473 -pre string, input file #species specific microRNA precursor sequences
|
|
474 -mat string, input file #species specific microRNA mature sequences
|
|
475
|
|
476 -rfam string, input file# rfam database file, microRNAs must not be contained in this file## if not define, rfam small RNA will not be count.
|
|
477 -idx2 string, rfam file index, file-prefix #(must be indexed by bowtie-build) The parameter
|
|
478 string must be the prefix of the bowtie index. For instance, if
|
|
479 the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
|
|
480 the prefix is 'h_sapiens_37_asm'.##can be null
|
|
481
|
|
482 -D If [-D] is specified,will discard rfam mapped reads(nead -rfam).
|
|
483
|
|
484 -a string, ADAPTER string. default is ATCTCGTATG.
|
|
485 -M int, require minimum adapter alignment length of N. If less than N nucleotides aligned with the adapter - don't clip it.
|
|
486 -min int, reads min length,default is 19.
|
|
487 -max int, reads max length,default is 28.
|
|
488
|
|
489 -mis [int] number of allowed mismatches when mapping reads to precursors, default 0
|
|
490 -e [int] number of nucleotides upstream of the mature sequence to consider, default 2
|
|
491 -f [int] number of nucleotides downstream of the mature sequence to consider, default 5
|
|
492 -v <int> report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; used in rfam alignment
|
|
493 -r int a read is allowed to map up to this number of positions in the genome,default is 25
|
|
494
|
|
495 -dis <int> Maximal space between miRNA and miRNA* (200)
|
|
496 -flank <int> Flank sequence length of miRNA precursor (10)
|
|
497 -mfe <folat> Maximal free energy allowed for a miRNA precursor (-20)
|
|
498
|
|
499 -t int, number of threads [1]
|
|
500
|
|
501 -o output directory# absolute path
|
|
502 -h help
|
|
503 USAGE
|
|
504 exit(1);
|
|
505 }
|
|
506
|