Mercurial > repos > big-tiandm > sirna_plant
view conventional.pl @ 2:911a4581f480 draft
Uploaded
author | big-tiandm |
---|---|
date | Thu, 23 Oct 2014 21:35:28 -0400 |
parents | 07745c0958dd |
children |
line wrap: on
line source
#!/usr/bin/perl -w #Filename: #Author: Chentt #Email: chentt@big.ac.cn #Date: 2014/04/09 #Modified: #Description: islands merged of merged samples my $version=1.00; use strict; use Getopt::Long; my %opts; GetOptions(\%opts,"i=s","d=i","o=s","N=i","t=s","mark=s","h"); if (!(defined $opts{i} and defined $opts{d} and defined $opts{N} and defined $opts{mark} and defined $opts{t} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments &usage; } my $filein=$opts{'i'}; my $fileout=$opts{'o'}; my $distance=$opts{'d'}; my $tempout=$opts{'t'}; my $mark=$opts{'mark'}; my @sample=split/\#/,$mark; $mark=join"\"\t\"",@sample; open IN,"<$filein"; #input file open OUT,">$fileout"; #output file print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n"; open TMP,">$tempout"; print TMP "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n"; my %hash; while (my $aline=<IN>) { chomp $aline; if($aline=~/^\#/){ #print OUT "$aline\n"; next; } my @tmp=split/\t/,$aline; my $chr=shift @tmp; #shift @tmp; push @{$hash{$chr}},[@tmp]; } close IN; foreach my $key (keys %hash) { my @tag=sort{$a->[1] <=> $b->[1]} @{$hash{$key}}; my @sample; my $start=$tag[0][1]; my $end=$tag[0][2]; push @sample,[@{$tag[0]}]; for (my $i=1;$i<@tag-1;$i++) { if ($tag[$i][1]-$end<=$distance) { if ($tag[$i][2]>$end) { $end=$tag[$i][2]; } push @sample,[@{$tag[$i]}]; } else{ my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample); my $cluster_exp=join"\t",@cluster_exp; if ($max_length>30) { print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n"; $max_length="\>30"; } else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";} print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n"; $start=$tag[$i][1]; $end=$tag[$i][2]; @sample=(); push @sample,[@{$tag[$i]}]; } } if ($tag[$#tag][1]-$end<=$distance) { if ($tag[$#tag][2]>$end) { $end=$tag[$#tag][2]; } push @sample,[@{$tag[$#tag]}]; my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample); my $cluster_exp=join"\t",@cluster_exp; if ($max_length>30) { $max_length="\>30"; print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n"; } else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";} print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n"; } else{ my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample); my $cluster_exp=join"\t",@cluster_exp; if ($max_length>30) { $max_length="\>30"; print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n"; } else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";} print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n"; } } close OUT; close TMP; sub Max_length{ my @exp=@{$_[0]}; my %sample_length; my $total_exp; my @each; my @tag; for (my $i=0;$i<=$#exp ;$i++) { my $length=$exp[$i][2]-$exp[$i][1]+1; #if ($length>30) { # $length=40; #} my $exp=0; foreach (1..$opts{'N'}) { $exp+=$exp[$i][$_+2]; $each[$_-1]+=$exp[$i][$_+2]; } $sample_length{$length}+=$exp; $total_exp+=$exp; push @tag,($exp[$i][1].",".$exp[$i][2].",".$exp[$i][0].",".$exp); } my $max=0; my $max_key; foreach my $key (sort keys %sample_length) { my $p=$sample_length{$key}/$total_exp; if ($p>$max) { $max=$p; $max_key=$key; } $sample_length{$key}=sprintf("%.2f",$p); } my $tag_n=@tag; my $tag=join";",@tag; $tag=$tag_n."\t".$tag; return($max_key,$sample_length{$max_key},$tag,@each); } sub usage{ print <<"USAGE"; Version $version Usage: $0 -i -o -d -N -t -mark options: -i input file -d distance of two islands -mark sample name; -o output file -N sample number -t temp output file -h help USAGE exit(1); }