Mercurial > repos > big-tiandm > mirplant2
changeset 44:0c4e11018934 draft
Uploaded
author | big-tiandm |
---|---|
date | Thu, 30 Oct 2014 21:29:19 -0400 |
parents | 4c0b1a94b882 |
children | 2cb6add23dfe |
files | DEGseq.pl Length_Distibution.pl collapseReads2Tags.pl convert_bowtie_to_blast.pl count_rfam_express.pl filterReadsByLength.pl html.pl matching.pl miRDeep_plant.pl miRNA_Express_and_sequence.pl miRPlant.pl miRPlant.xml precursors.pl quantify.pl randfold rfam.pl tool_dependencies.xml |
diffstat | 11 files changed, 545 insertions(+), 54 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DEGseq.pl Thu Oct 30 21:29:19 2014 -0400 @@ -0,0 +1,67 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2009-05-06 +#Modified: +#Description: +my $version=1.00; + +use strict; +use Getopt::Long; +use File::Basename; + +my %opts; +GetOptions(\%opts,"i=s","outdir=s","column1:i","mark1=s","depth1:i","depth2:i","column2:i","mark2=s","h"); +if (!(defined $opts{i} and defined $opts{outdir} and defined $opts{mark1} and defined $opts{mark2}) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $filein=$opts{'i'}; +my $outputdir=$opts{'outdir'}; +unless ($outputdir=~/\/$/) {$outputdir .="/";} +my $column1=defined $opts{column1} ? $opts{column1} : 3; +my $column2=defined $opts{column2} ? $opts{column2} : 4; +my $mark1=$opts{mark1}; +my $mark2=$opts{mark2}; +my $fileout=$outputdir."degseq.R"; + +open OUT,">$fileout"; #output file + +print OUT "library(DEGseq)\n"; +print OUT "geneExpFile <- system.file(package=\"DEGseq\")\n"; +print OUT "geneExpFile<-file.path(\"$filein\")\n"; +print OUT "layout(matrix(c(1,2,3,4,5,6), 3, 2, byrow=TRUE))\npar(mar=c(2, 2, 2,2))\n"; +print OUT "outputdir<-file.path(\"$outputdir\")\n"; +print OUT "geneExpMatrix1 <- readGeneExp(file=geneExpFile, geneCol=1, valCol=c($column1))\n"; +print OUT "geneExpMatrix2 <- readGeneExp(file=geneExpFile, geneCol=1, valCol=c($column2))\n"; +if(defined $opts{'depth1'} && defined $opts{'depth2'}){ +print OUT "DEGexp(geneExpMatrix1=geneExpMatrix1, geneCol1=1, expCol1=c(2), groupLabel1=\"$mark1\",geneExpMatrix2=geneExpMatrix2, geneCol2=1, expCol2=c(2), groupLabel2=\"$mark2\",depth1=$opts{depth1},depth2=$opts{depth2},outputDir=outputdir,method=\"MARS\")\n"; +} +else{ +print OUT "DEGexp(geneExpMatrix1=geneExpMatrix1, geneCol1=1, expCol1=c(2), groupLabel1=\"$mark1\",geneExpMatrix2=geneExpMatrix2, geneCol2=1, expCol2=c(2), groupLabel2=\"$mark2\",outputDir=outputdir,method=\"MARS\")\n"; +} +close OUT; + +system("R CMD BATCH $fileout"); + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -outdir -column1 -mark1 -column2 -mark2 -depth1 -depth2 +options: +-i input file +-outdir output file dir +-column1 the first column for DEGseq +-mark1 the name of the column1 +-depth1 depth for the first file,use for normalize +-column2 the second column for DEGseq +-mark2 the name of the column2 +-depth2 depth for the second file,use for normalize + +-h help +USAGE +exit(1); +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Length_Distibution.pl Thu Oct 30 21:29:19 2014 -0400 @@ -0,0 +1,219 @@ +#!/usr/bin/perl -w +#========================================================================================== +# Date: +# Title: +# Comment: Program to plot gene structure +# Input: 1. input file of Gene region annotation which format like GenePred +# 2. input file of Transcripts region annotation which format like GenePred +# 3. input file of gene snp detail info +# Output: output file of gene structure graph by html or svg formt +# Test Usage: +#======================================================================================== +#use strict; +my $version=1.00; +use SVG; +use Getopt::Long; +my %opt; +GetOptions(\%opt,"i=s","o=s",,"h"); +if (!(defined $opt{i} and defined $opt{o}) || defined $opt{h}) { +&usage; +} +#===============================Define Attribute========================================== +my %attribute=( + canvas=>{ + 'width'=>1500, + 'height'=>1800 + }, + text=>{ + 'stroke'=>"#000000", + 'fill'=>"none", + 'stroke-width'=>0.5 + #'stroke-width2'=>1 + }, + line=>{ + 'stroke'=>"black", + 'stroke-width'=>1 + }, + font=>{ + 'fill'=>"#000000", + 'font-size'=>12, + 'font-size2'=>10, + 'font-weight'=>'bold', + 'font-family'=>"Arial" + #'font-family2'=>"ArialNarrow-bold" + }, + rect=>{ + 'fill'=>"lightgreen", + 'stroke'=>"black", + 'stroke-width'=>0.5 + }, + readwidth=>0.5 +); +#my $Xscale=600/$length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算 +#========================================data============================ +open(IN,"$opt{i}")||die"cannot open the file $opt{i}"; +my @R_length; +my @T_length; +my $R_number=0; +my $T_number=0; +my $R_max=0; +my $T_max=0; + +my $title=<IN>; +chomp $title; +my @title=split/\t/,$title; +my @mark=split/\s+/,$title[1]; +my $sample_number=@mark; +while (my $aline=<IN>) { + if ($aline=~/^\s/) { + my $T_title=<IN>; + chomp $T_title; + while (my $a_aline=<IN>) { + chomp $a_aline; + my @temp=split/\t/,$a_aline; + my @number=split/\s+/,$temp[1]; + for (my $i=0;$i<@number ;$i++) { + if ($R_max<$number[$i]) { + $R_max=$number[$i]; + } + } + push @R_length,[$temp[0],@number]; + $R_number++; + } + } + else { + chomp $aline; + my @temp=split/\t/,$aline; + my @number=split/\s+/,$temp[1]; + for (my $i=0;$i<@number ;$i++) { + if ($T_max<$number[$i]) { + $T_max=$number[$i]; + } + } + push @T_length,[$temp[0],@number]; + $T_number++; + } +} +close IN; +print "Tag max: $T_max\nRead max: $R_max\n"; +my $kd_number=5; +##=======================Reads 纵坐标刻度========================== +my $r=1; +my $rr=1; +my $R=$R_max; +while ($R>10) { + $R=$R/10; + $r=$r*10; + $rr++; +} +$R=int($R+0.5); +my $R_xg=$R/$kd_number*$r;#纵坐标一小格大小(一共10格) +my $R_kedu_scale_x=6*$rr;#纵坐标刻度文字 +##=======================Tags 纵坐标刻度========================== +my $t=1; +my $tt=1; +my $T=$T_max; +while ($T>10) { + $T=$T/10; + $t=$t*10; + $tt++; +} +$T=int($T+0.5); +my $T_xg=$T/$kd_number*$t;#纵坐标一小格大小(一共10格) +my $T_kedu_scale_x=6*$tt;#纵坐标刻度文字 + +#############################s#define start coordinate and scale +my $XOFFSET=50; +my $YOFFSET=60; +my $width=800; +my $heigth=800; +my $X_width=600; +#my $height=1600; +#### Starting #### +#新建画布 +my $svg=SVG->new(width=>$width,height=>$heigth); +####坐标轴 +my $axisL=300;#read 纵坐标长度 +my $x_margin = 50; +#=========Reads number setting========================================== +my $Y_R_title=30;#标题的纵向宽度 +my $Y_R_0=$YOFFSET+$axisL+$Y_R_title; +my $X_R_0=$XOFFSET+$x_margin; +my $R_Yscale=$axisL/$R_xg/$kd_number; +my $R_Xscale=$X_width/$R_number/($sample_number+1); +#=====================================Reads Y axis====================== +$svg->line('x1',$X_R_0,'y1',$Y_R_0,'x2',$X_R_0,'y2',$Y_R_0-$axisL,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); +for (my $i=1;$i<$kd_number ;$i++) { + $svg->line('x1',$X_R_0-5,'y1',$Y_R_0-$i*$R_xg*$R_Yscale,'x2',$X_R_0,'y2',$Y_R_0-$i*$R_xg*$R_Yscale,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + $svg->text('x',$X_R_0-$R_kedu_scale_x,'y',$Y_R_0-$i*$R_xg*$R_Yscale+4,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$i*$R_xg); +} +#=====================================Reads X axis====================== +$svg->line('x1',$X_R_0,'y1',$Y_R_0,'x2',$X_R_0+$X_width,'y2',$Y_R_0,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + +#print "$R_number\t$sample_number\n"; +for ($i=0;$i<$R_number ;$i++) { + for (my $j=1;$j<$sample_number+1 ;$j++) { + my $red=$j/$sample_number*255; + $svg->rect('x',$X_R_0+($j+$i*($sample_number+1))*$R_Xscale,'y',$Y_R_0-$R_length[$i][$j]*$R_Yscale,'width',$R_Xscale,'height',$R_length[$i][$j]*$R_Yscale,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); + } + $svg->text('x',$X_R_0+(1+$sample_number/2+$i*($sample_number+1))*$R_Xscale,'y',$Y_R_0+15,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$R_length[$i][0]); +} +#===Reads number title +$svg->text('x',$XOFFSET+400,'y',$YOFFSET+$Y_R_title,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',"1",'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Reads Length Distribution"); +#===Reads +for (my $i=0;$i<$sample_number ;$i++) { + my $red=($i+1)/$sample_number*255; + $svg->rect('x',$X_R_0+550,'y',$YOFFSET+$Y_R_title+20*$i,'width',15,'height',10,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); + $svg->text('x',$X_R_0+550+30,'y',$YOFFSET+$Y_R_title+20*$i+10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$i]); +} +####================================================================================== +#=========================================Tag s +my $Y_T_title=30;#标题的纵向宽度 +my $Y_T_0=$Y_R_0+$axisL+$Y_R_title+50;#length size +my $X_T_0=$XOFFSET+$x_margin; +my $T_Yscale=$axisL/$T_xg/$kd_number; +my $T_Xscale=$X_width/$T_number/($sample_number+1); +#=====================================Tags Y axis====================== +$svg->line('x1',$X_T_0,'y1',$Y_T_0,'x2',$X_T_0,'y2',$Y_T_0-$axisL,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); +for (my $i=1;$i<$kd_number ;$i++) { + $svg->line('x1',$X_T_0-5,'y1',$Y_T_0-$i*$T_xg*$T_Yscale,'x2',$X_T_0,'y2',$Y_T_0-$i*$T_xg*$T_Yscale,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + $svg->text('x',$X_T_0-$T_kedu_scale_x,'y',$Y_T_0-$i*$T_xg*$T_Yscale+4,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$i*$T_xg); +} +#=====================================Tags X axis====================== +$svg->line('x1',$X_T_0,'y1',$Y_T_0,'x2',$X_T_0+$X_width,'y2',$Y_T_0,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + +#print "$R_number\t$sample_number\n"; +for ($i=0;$i<$T_number ;$i++) { + for (my $j=1;$j<$sample_number+1 ;$j++) { + my $red=$j/$sample_number*255; + $svg->rect('x',$X_T_0+($j+$i*($sample_number+1))*$T_Xscale,'y',$Y_T_0-$T_length[$i][$j]*$T_Yscale,'width',$T_Xscale,'height',$T_length[$i][$j]*$T_Yscale,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); + } + $svg->text('x',$X_T_0+(1+$sample_number/2+$i*($sample_number+1))*$T_Xscale,'y',$Y_T_0+15,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$T_length[$i][0]); +} +#===Reads number title +$svg->text('x',$XOFFSET+400,'y',$Y_R_0+30+$Y_T_title,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',"1",'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Tags Length Distribution"); +#===Reads +for (my $i=0;$i<$sample_number ;$i++) { + my $red=($i+1)/$sample_number*255; + $svg->rect('x',$X_T_0+550,'y',$Y_R_0+30+$Y_T_title+20*$i,'width',15,'height',10,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); + $svg->text('x',$X_T_0+550+30,'y',$Y_R_0+30+$Y_T_title+20*$i+10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$i]); +} + + + + +open (OUT,">$opt{o}"); +print OUT $svg->xmlify(); + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 +options: +-i +-o svg output +-h help +USAGE +exit(1); +} \ No newline at end of file
--- a/html.pl Tue Oct 28 01:35:32 2014 -0400 +++ b/html.pl Thu Oct 30 21:29:19 2014 -0400 @@ -161,13 +161,17 @@ The clean data file path is: <b>$clean</b><br /> </p> <h2> 1. Sequence length count</h2> -<h3> 1.1 Reads length</h3> "; +print OUT "\n"; -print OUT "<img src=\"./$predir/Reads_length.png\" alt=\"Reads_length.png\" width=\"400\" height=\"300\"/> -<h3> 1.2 Tags length count</h3> -<img src=\"./$predir/Tags_length.png\" alt=\"Tags_length.png\" width=\"400\" height=\"300\"/> -<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution.txt\"> length file</a> +my $length=$prepath."length.html"; +open IN,"<$length"; +while (my $aline=<IN>) { + chomp $aline; + print OUT "$aline\n"; +} + +print OUT "<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution.txt\"> length file</a> </p> ";
--- a/matching.pl Tue Oct 28 01:35:32 2014 -0400 +++ b/matching.pl Thu Oct 30 21:29:19 2014 -0400 @@ -44,7 +44,7 @@ } ### genome mapping -`bowtie -v $mis -f -p $threads -m $hits -a --best --strata $index $filein --al genome_mapped.fa --un genome_not_mapped.fa --max genome_mapped_Mlimit.fa > genome_mapped.bwt`; +`bowtie -v $mis -f -p $threads -m $hits -a --best --strata $index $filein --al genome_mapped.fa --un genome_not_mapped.fa --max genome_mapped_Mlimit.fa > genome_mapped.bwt 2> run.log`; #`convert_bowtie_to_blast.pl genome_mapped.bwt genome_mapped.fa $genome > genome_mapped.bst`;
--- a/miRDeep_plant.pl Tue Oct 28 01:35:32 2014 -0400 +++ b/miRDeep_plant.pl Thu Oct 30 21:29:19 2014 -0400 @@ -3,7 +3,7 @@ use warnings; use strict; use Getopt::Std; - +use RNA; ################################# MIRDEEP ################################################# @@ -125,7 +125,7 @@ #parse signature file in blast_parsed format and resolve each potential precursor parse_file_blast_parsed($file_blast_parsed); -`rm -rf $tmpdir`; +#`rm -rf $tmpdir`; exit; @@ -360,14 +360,16 @@ #print sequence to temporary file, test randfold value, return 1 or 0 # print_file("pri_seq.fa",">pri_seq\n".$hash_comp{"pri_seq"}); - my $tmpfile=$tmpdir.$hash_comp{"pri_id"}; - open(FILE, ">$tmpfile"); - print FILE ">pri_seq\n",$hash_comp{"pri_seq"}; - close FILE; + #my $tmpfile=$tmpdir.$hash_comp{"pri_id"}; + #open(FILE, ">$tmpfile"); + #print FILE ">pri_seq\n",$hash_comp{"pri_seq"}; + #close FILE; # my $p_value=`randfold -s $tmpfile 999 | cut -f 3`; - my $p1=`randfold -s $tmpfile 999 | cut -f 3`; - my $p2=`randfold -s $tmpfile 999 | cut -f 3`; + #my $p1=`randfold -s $tmpfile 999 | cut -f 3`; + #my $p2=`randfold -s $tmpfile 999 | cut -f 3`; + my $p1=&randfold_pvalue($hash_comp{"pri_seq"},999); + my $p2=&randfold_pvalue($hash_comp{"pri_seq"},999); my $p_value=($p1+$p2)/2; wait; # system "rm $tmpfile"; @@ -377,18 +379,82 @@ return 0; } +sub randfold_pvalue{ + my $cpt_sup = 0; + my $cpt_inf = 0; + my $cpt_ega = 1; + + my ($seq,$number_of_randomizations)=@_; + my $str =$seq; + my $mfe = RNA::fold($seq,$str); -#sub print_file{ + for (my $i=0;$i<$number_of_randomizations;$i++) { + $seq = shuffle_sequence_dinucleotide($seq); + $str = $seq; + + my $rand_mfe = RNA::fold($str,$str); + + if ($rand_mfe < $mfe) { + $cpt_inf++; + } + if ($rand_mfe == $mfe) { + $cpt_ega++; + } + if ($rand_mfe > $mfe) { + $cpt_sup++; + } + } + + my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1); - #print string to file + #print "$name\t$mfe\t$proba\n"; + return $proba; +} + +sub shuffle_sequence_dinucleotide { + + my ($str) = @_; -# my($file,$string)=@_; + # upper case and convert to ATGC + $str = uc($str); + $str =~ s/U/T/g; + + my @nuc = ('A','T','G','C'); + my $count_swap = 0; + # set maximum number of permutations + my $stop = length($str) * 10; + + while($count_swap < $stop) { + + my @pos; + + # look start and end letters + my $firstnuc = $nuc[int(rand 4)]; + my $thirdnuc = $nuc[int(rand 4)]; -# open(FILE, ">$file"); -# print FILE "$string"; -# close FILE; -#} - + # get positions for matching nucleotides + for (my $i=0;$i<(length($str)-2);$i++) { + if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) { + push (@pos,($i+1)); + $i++; + } + } + + # swap at random trinucleotides + my $max = scalar(@pos); + for (my $i=0;$i<$max;$i++) { + my $swap = int(rand($max)); + if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) { + $count_swap++; + my $w1 = substr($str,$pos[$i],1); + my $w2 = substr($str,$pos[$swap],1); + substr($str,$pos[$i],1,$w2); + substr($str,$pos[$swap],1,$w1); + } + } + } + return($str); +} sub test_nucleus_conservation{ @@ -1279,7 +1345,7 @@ my $freq=$1; return $freq; }else{ - print STDERR "Problem with read format\n"; + #print STDERR "Problem with read format\n"; return 0; } } @@ -1497,7 +1563,7 @@ #parameters of known precursors and background hairpins, scale and location my $a=1.339e-12;my $b=2.778e-13;my $c=45.834; my $ev=$e**($mfe_adj1*$c); - print STDERR "\n***",$ev,"**\t",$ev+$b,"\t"; + #print STDERR "\n***",$ev,"**\t",$ev+$b,"\t"; my $log_odds=($a/($b+$ev)); @@ -1506,7 +1572,7 @@ my $odds=$prob_test/$prob_background; my $log_odds_2=log($odds); - print STDERR "log_odds :",$log_odds,"\t",$log_odds_2,"\n"; + #print STDERR "log_odds :",$log_odds,"\t",$log_odds_2,"\n"; return $log_odds; }
--- a/miRPlant.pl Tue Oct 28 01:35:32 2014 -0400 +++ b/miRPlant.pl Thu Oct 30 21:29:19 2014 -0400 @@ -13,7 +13,7 @@ use threads::shared; use File::Path; use File::Basename; -#use RNA; +use RNA; use Term::ANSIColor; my %opts; @@ -235,19 +235,19 @@ system("bowtie-build -f excised_precursor.fa excised_precursor"); # print "\nbowtie-build -f excised_precursor.fa excised_precursor\n"; - system("bowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt"); + system("bowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt 2> run.log"); # print "\nbowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt\n"; system("perl $scipt_path/convert_bowtie_to_blast.pl precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst"); # print "\nconvert_bowtie_to_blast.pl precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst\n"; - system("sort +3 -25 precursor_mapped.bst > signatures.bst"); + system("sort -k 4 precursor_mapped.bst > signatures.bst"); # print "\nsort +3 -25 precursor_mapped.bst > ../signatures.bst\n"; chdir $dir; system("perl $scipt_path/miRDeep_plant.pl $predict/signatures.bst $predict/excised_precursor_struc.txt novel_tmp_dir -y > microRNA_prediction.mrd"); # print "\nmiRDeep_plant.pl $dir/signatures.bst $predict/excised_precursor_struc.txt tmp_dir -y > microRNA_prediction.txt\n"; - system("rm novel_tmp_dir -rf"); + #system("rm novel_tmp_dir -rf"); my $tag=join "," ,@mark; system("perl $scipt_path/miRNA_Express_and_sequence.pl -i microRNA_prediction.mrd -list novel_microRNA_express.txt -fa novel_microRNA_mature.fa -pre novel_microRNA_precursor.fa -tag $tag"); } @@ -279,6 +279,7 @@ sub filterbylength{ my $tmpmark=join ",", @mark; system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark"); + system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt -o $preprocess/length.html"); # print "\nfilterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark\n"; }
--- a/miRPlant.xml Tue Oct 28 01:35:32 2014 -0400 +++ b/miRPlant.xml Thu Oct 30 21:29:19 2014 -0400 @@ -4,9 +4,9 @@ <requirements> <requirement type="set_environment">SCRIPT_PATH</requirement> <requirement type="package" version="0.12.7">bowtie</requirement> - <requirement type="package" version="2.11.0">R</requirement> + <requirement type="package" version="3.0.1">R</requirement> <requirement type="package" version="0.0.13">fastx_toolkit </requirement> - <requirement type="package" version="1.5.0">X11</requirement> + <requirement type="package" version="1.5.0">libx11</requirement> <requirement type="package" version="2.1.8">ViennaRNA</requirement> </requirements> @@ -27,7 +27,7 @@ -tag ${s.tag} #end for - -format $format -gfa $gfa -pre $pre -mat $mat -rfam $rfam -a $a -M $mapnt -min $min -max $max -mis $mismatch -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe + -format $format -gfa $gfa -pre $pre -mat $mat -rfam $rfam -a $a -M $mapnt -min $min -max $max -mis $mismatch -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe > run.log </command> <inputs> @@ -72,16 +72,15 @@ </inputs> <outputs> - <data format="txt" name="known microRNA express list" from_work_dir="miRPlant_out/known_microRNA_express.txt"/> - <data format="txt" name="known microRNA express alignment" from_work_dir="miRPlant_out/known_microRNA_express.aln"/> - <data format="txt" name="known microRNA moRs result" from_work_dir="miRPlant_out/known_microRNA_express.moRs"/> - <data format="txt" name="known microRNA precursor file" from_work_dir="miRPlant_out/known_microRNA_precursor.fa"/> - <data format="txt" name="known microRNA mature file" from_work_dir="miRPlant_out/known_microRNA_mature.fa"/> - <data format="txt" name="novel microRNA prediction file" from_work_dir="miRPlant_out/known_microRNA_mature.fa"/> - <data format="txt" name="novel microRNA express list" from_work_dir="miRPlant_out/novel_microRNA_express.txt"/> - <data format="txt" name="novel microRNA precursor file" from_work_dir="miRPlant_out/novel_microRNA_precursor.fa"/> - <data format="txt" name="novel microRNA mature sequence file" from_work_dir="miRPlant_out/novel_microRNA_mature.fa"/> - <data format="txt" name="analysis result" from_work_dir="miRPlant_out/result.html"/> + <data format="txt" name="known microRNA express list" from_work_dir="miRPlant_out/known_microRNA_express.txt" label="${tool.name} on ${on_string}: known microRNA express list"/> + <data format="txt" name="known microRNA express alignment" from_work_dir="miRPlant_out/known_microRNA_express.aln" label="${tool.name} on ${on_string}: known microRNA express alignment"/> + <data format="txt" name="known microRNA moRs result" from_work_dir="miRPlant_out/known_microRNA_express.moRs" label="${tool.name} on ${on_string}: known microRNA moRs result"/> + <data format="txt" name="known microRNA precursor file" from_work_dir="miRPlant_out/known_microRNA_precursor.fa" label="${tool.name} on ${on_string}: known microRNA precursor file"/> + <data format="txt" name="known microRNA mature file" from_work_dir="miRPlant_out/known_microRNA_mature.fa" label="${tool.name} on ${on_string}: known microRNA mature file"/> + <data format="txt" name="novel microRNA express list" from_work_dir="miRPlant_out/novel_microRNA_express.txt" label="${tool.name} on ${on_string}: novel microRNA express list"/> + <data format="txt" name="novel microRNA precursor file" from_work_dir="miRPlant_out/novel_microRNA_precursor.fa" label="${tool.name} on ${on_string}: novel microRNA precursor file"/> + <data format="txt" name="novel microRNA mature sequence file" from_work_dir="miRPlant_out/novel_microRNA_mature.fa" label="${tool.name} on ${on_string}: novel microRNA mature sequence file"/> + <data format="html" name="analysis result" from_work_dir="miRPlant_out/result.html" label="${tool.name} on ${on_string}: analysis result"/> </outputs> <help>
--- a/quantify.pl Tue Oct 28 01:35:32 2014 -0400 +++ b/quantify.pl Thu Oct 30 21:29:19 2014 -0400 @@ -422,16 +422,16 @@ sub mapping{ my $err; ## build bowtie index - print STDERR "building bowtie index\n"; + #print STDERR "building bowtie index\n"; $err = `bowtie-build $pre_file_name miRNA_precursor`; ## map mature sequences against precursors - print STDERR "mapping mature sequences against index\n"; - $err = `bowtie -p $threads -f -v 0 -a --best --strata --norc miRNA_precursor $mature mature_mapped.bwt`; + #print STDERR "mapping mature sequences against index\n"; + $err = `bowtie -p $threads -f -v 0 -a --best --strata --norc miRNA_precursor $mature > mature_mapped.bwt 2> run.log`; ## map reads against precursors - print STDERR "mapping read sequences against index\n"; - $err=`bowtie -p $threads -f -v $mismatch -a --best --strata --norc miRNA_precursor $read --al mirbase_mapped.fa --un mirbase_not_mapped.fa read_mapped.bwt `; + #print STDERR "mapping read sequences against index\n"; + $err=`bowtie -p $threads -f -v $mismatch -a --best --strata --norc miRNA_precursor $read --al mirbase_mapped.fa --un mirbase_not_mapped.fa > read_mapped.bwt 2> run.log`; }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/randfold Thu Oct 30 21:29:19 2014 -0400 @@ -0,0 +1,139 @@ +# randfold (c) Eric Bonnet & Jan Wuyts 2003 +# randomization test for rna secondary structure + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# This soft needs BioPerl and ViennaRNA packages +# See http://www.bioperl.org and http://www.tbi.univie.ac.at/~ivo/RNA/ +# See also README + +use strict; +use Bio::SeqIO; +use RNA; + +if (scalar(@ARGV) != 3) { + die "Usage $0 fasta_file number_of_randomizations type[m|d]\n"; +} + +my $in = Bio::SeqIO->new(-file => "$ARGV[0]", -format => "fasta"); +my $number_of_randomizations = $ARGV[1]; +my $type = $ARGV[2]; +if ($type !~ /[m|d]/) { + die "error: wrong type of randomization."; +} + +while (my $o = $in->next_seq) { + my $seq = uc($o->seq()); + my $name = $o->display_id(); + + my $cpt_sup = 0; + my $cpt_inf = 0; + my $cpt_ega = 1; + + my $str = $seq; + my $mfe = RNA::fold($seq,$str); + + for (my $i=0;$i<$number_of_randomizations;$i++) { + if ($type eq "d") { + $seq = shuffle_sequence_dinucleotide($seq); + $str = $seq; + } + elsif ($type eq "m") { + my @tmp = split //,$seq; + fisher_yates_shuffle(\@tmp); + $seq = join '',@tmp; + $str = $seq; + } + + my $rand_mfe = RNA::fold($str,$str); + + if ($rand_mfe < $mfe) { + $cpt_inf++; + } + if ($rand_mfe == $mfe) { + $cpt_ega++; + } + if ($rand_mfe > $mfe) { + $cpt_sup++; + } + } + + my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1); + + print "$name\t$mfe\t$proba\n"; +} + +# fisher_yates_shuffle( \@array ) : +# generate a random permutation of @array in place +# input array ref +# return nothing +sub fisher_yates_shuffle { + my $array = shift; + my $i; + for ($i = @$array; --$i; ) { + my $j = int rand ($i+1); + next if $i == $j; + @$array[$i,$j] = @$array[$j,$i]; + } +} + +# shuffle a sequence while preserving dinucleotide distribution +# input sequence string +# return sequence string shuffled +sub shuffle_sequence_dinucleotide { + + my ($str) = @_; + + # upper case and convert to ATGC + $str = uc($str); + $str =~ s/U/T/g; + + my @nuc = ('A','T','G','C'); + my $count_swap = 0; + # set maximum number of permutations + my $stop = length($str) * 10; + + while($count_swap < $stop) { + + my @pos; + + # look start and end letters + my $firstnuc = $nuc[int(rand 4)]; + my $thirdnuc = $nuc[int(rand 4)]; + + # get positions for matching nucleotides + for (my $i=0;$i<(length($str)-2);$i++) { + if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) { + push (@pos,($i+1)); + $i++; + } + } + + # swap at random trinucleotides + my $max = scalar(@pos); + for (my $i=0;$i<$max;$i++) { + my $swap = int(rand($max)); + if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) { + $count_swap++; + my $w1 = substr($str,$pos[$i],1); + my $w2 = substr($str,$pos[$swap],1); + substr($str,$pos[$i],1,$w2); + substr($str,$pos[$swap],1,$w1); + } + } + } + return($str); +} +
--- a/rfam.pl Tue Oct 28 01:35:32 2014 -0400 +++ b/rfam.pl Thu Oct 30 21:29:19 2014 -0400 @@ -47,7 +47,7 @@ $index="$rfam"; } ### genome mapping -`bowtie -v $mis -f -p $threads -k 1 $index $filein --al rfam_mapped.fa --un rfam_not_mapped.fa > rfam_mapped.bwt`; +`bowtie -v $mis -f -p $threads -k 1 $index $filein --al rfam_mapped.fa --un rfam_not_mapped.fa > rfam_mapped.bwt 2> run.log`; sub checkACGT{ my $string;
--- a/tool_dependencies.xml Tue Oct 28 01:35:32 2014 -0400 +++ b/tool_dependencies.xml Thu Oct 30 21:29:19 2014 -0400 @@ -9,12 +9,8 @@ <set_environment version="1.0"> <environment_variable action="set_to" name="SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable> </set_environment> - <package name="R" version="2.11.0"> - <repository changeset_revision="8d0a55bf7aaf" name="package_r_2_11_0" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> - - <package name="X11" version="1.5.0"> - <repositoty name="package_libx11_1_5_0" owner="devteam" /> + <package name="R" version="3.0.1"> + <repository changeset_revision="c5ff6dd33c79" name="package_r_3_0_1" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> </package> <package name="ViennaRNA" version="2.1.8">