Mercurial > repos > big-tiandm > mirplant2
view miRNA_Express_and_sequence.pl @ 42:3b9a6381e6bf draft
Uploaded
author | big-tiandm |
---|---|
date | Tue, 28 Oct 2014 01:35:17 -0400 |
parents | d1cc2e6ecf90 |
children |
line wrap: on
line source
#!/usr/bin/perl -w #Filename: #Author: Tian Dongmei #Email: tiandm@big.ac.cn #Date: 2014-6-4 #Modified: #Description: solexa miRNA express and sequence my $version=1.00; use strict; use Getopt::Long; my %opts; GetOptions(\%opts,"i=s","list=s","fa=s","pre=s","tag=s","h"); if (!(defined $opts{i} and defined $opts{list} and defined $opts{fa} and defined $opts{pre} and defined $opts{tag}) || defined $opts{h}) { #necessary arguments &usage; } my $filein=$opts{'i'}; my $fileout=$opts{'list'}; my $out=$opts{'fa'}; my $preout=$opts{'pre'}; =cut my %hash_pri; open PRI,"<$opts{p}"; while (my $aline=<PRI>) { chomp $aline; if($aline=~/^>(\S+)/){$hash_pri{$1}=$aline;} } close PRI; =cut open IN,"<$filein"; #input file open OUT,">$fileout"; #output file open FA ,">$out"; open PRE,">$preout"; print OUT "#ID\tcoordinate\tpos1\tpos2"; my @marks=split/\,/,$opts{'tag'}; foreach (@marks) { print OUT "\t",$_,"_matureExp"; } foreach (@marks) { print OUT "\t",$_,"_starExp"; } foreach (@marks) { print OUT "\t",$_,"_totalExp"; } print OUT "\n"; my (%uniq_id,$novel); while (my $aline=<IN>) { chomp $aline; until ($aline =~ /^score\s+[-\d\.]+/){ $aline = <IN>; if (eof) {last;} } if (eof) {last;} ########## miRNA ID ################ $novel++; ########### annotate#################### do {$aline=<IN>;} until($aline=~/flank_first_end/) ; chomp $aline; my @flank1=split/\t/,$aline; do {$aline=<IN>;} until($aline=~/flank_second_beg/) ; chomp $aline; my @flank2=split/\t/,$aline; # ########## mature start loop pre #### do {$aline=<IN>;} until($aline=~/mature_beg/) ; chomp $aline; my @start=split/\t/,$aline; # $start[1] -=$flank1[1]; do {$aline=<IN>;} until($aline=~/mature_end/) ; chomp $aline; my @end=split/\t/,$aline; # $end[1] -=$flank1[1]; do {$aline=<IN>;} until($aline=~/mature_seq/) ; chomp $aline; my @arr1=split/\t/,$aline; do {$aline=<IN>;} until($aline=~/pre_seq/) ; chomp $aline; my @arr2=split/\t/,$aline; do {$aline=<IN>;} until($aline=~/pri_id/) ; chomp $aline; my @pri_id=split/\t/,$aline; do {$aline=<IN>;} until($aline=~/pri_seq/) ; chomp $aline; my @pri_seq=split/\t/,$aline; do {$aline=<IN>;} until($aline=~/star_beg/) ; chomp $aline; my @star_start=split/\t/,$aline; # $star_start[1] -=$flank1[1]; do {$aline=<IN>;} until($aline=~/star_end/) ; chomp $aline; my @star_end=split/\t/,$aline; # $star_end[1] -=$flank1[1]; do {$aline=<IN>;} until($aline=~/star_seq/) ; chomp $aline; my @arr3=split/\t/,$aline; print OUT "miR-c-$novel\t$pri_id[1]\tmature:$start[1]:$end[1]\tstar:$star_start[1]:$star_end[1]\t"; #print OUT "$arr1[1]\t$arr3[1]\t$arr2[1]\t\/\t"; print FA ">miR-c-$novel\n$arr1[1]\n"; print PRE ">miR-c-$novel\n$pri_seq[1]\n"; ########## reads count ############# <IN>; my @count1;my @count2;my @count3;my @count4; $aline=<IN>; do { chomp $aline; my @reads=split/\t/,$aline; my @pos=(); $reads[5]=~/(\d+)\.\.(\d+)/; # $pos[0] =$1-$flank1[1]; # $pos[1] =$2-$flank1[1]; $pos[0]=$1; $pos[1]=$2; $reads[0]=~/:([\d|_]+)_x(\d+)$/; my @ss=split/_/,$1; for (my $i=0;$i<@ss ;$i++) { if (!(defined $count3[$i])) { $count3[$i]=0; } if (!(defined $count4[$i])) { $count4[$i]=0; } $count2[$i]+=$ss[$i]; } # $count3 +=$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 ); # $count4 +=$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 ); # $count1 =$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 && $count1<$1); # $count2 =$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 && $count2<$1); if($end[1]-$pos[1]>=-5 && $end[1]-$pos[1]<=5 && $pos[0]-$start[1]>=-3 && $pos[0]-$start[1]<=3 ) { for (my $i=0;$i<@ss;$i++) { $count3[$i]+=$ss[$i]; } } if($star_end[1]-$pos[1]<=5 && $star_end[1]-$pos[1]>=-5 && $pos[0]-$star_start[1]>=-3 && $pos[0]-$star_start[1]<=3){ for (my $i=0;$i<@ss;$i++) { $count4[$i]+=$ss[$i]; } } $aline=<IN>; chomp $aline; } until(length $aline < 1) ; $"="\t"; print OUT "@count3\t@count4\t@count2\n"; $"=" "; } close IN; close OUT; sub usage{ print <<"USAGE"; Version $version Usage: $0 -i -list -fa -pre -tag options: -i input file,predictions file -list output file miRNA list file -fa output file ,miRNA sequence fasta file. -pre output file, miRNA precursor fasta file. -tag string, sample names# eg: samA,samB,samC -h help USAGE exit(1); }