Mercurial > repos > antmarge > dataoverview
comparison dataOverview.pl @ 3:80205e898861 draft default tip
New updates
| author | antmarge |
|---|---|
| date | Tue, 02 May 2017 21:20:54 -0400 |
| parents | b66f4a551e25 |
| children |
comparison
equal
deleted
inserted
replaced
| 2:3ed885628c9f | 3:80205e898861 |
|---|---|
| 282 print OUT "$sat%\tSaturation of TA sites after cutoff filter (validInsertions/TAsites)\n"; | 282 print OUT "$sat%\tSaturation of TA sites after cutoff filter (validInsertions/TAsites)\n"; |
| 283 print OUT "$inscov%\tGenome coverage by insertions (validInsertions/genomeSize)\n"; | 283 print OUT "$inscov%\tGenome coverage by insertions (validInsertions/genomeSize)\n"; |
| 284 print OUT "$tacov%\tGenome coverage by TA sites (TAsites/genomeSize)\n"; | 284 print OUT "$tacov%\tGenome coverage by TA sites (TAsites/genomeSize)\n"; |
| 285 print OUT "$lg_dist_ta\tLargest distance between TA sites\n"; | 285 print OUT "$lg_dist_ta\tLargest distance between TA sites\n"; |
| 286 print OUT "$lg_dist_ins\tLargest distance between insertions\n"; | 286 print OUT "$lg_dist_ins\tLargest distance between insertions\n"; |
| 287 print OUT "\n\nOpen Reading Frames\n\n"; | |
| 288 | 287 |
| 289 #Store everything to be print OUTed in array | 288 #Store everything to be print OUTed in array |
| 290 my @table; | 289 my @table; |
| 291 | |
| 292 #Find open reading frames from fasta file | |
| 293 local $_ = $fasta; | |
| 294 my @orfSize; | |
| 295 my @allc; #numbers of TAs in the ORFS here. | |
| 296 my $blank=0; #ORFS that don't have any TA sites. | |
| 297 my $orfCount=0; #keep track of the number of ORFs found. | |
| 298 my $minSize=0; | |
| 299 #Read somewhere that 99 is a good min but there is an annotated 86 bp gene for 19F | |
| 300 while ( /ATG/g ) { | |
| 301 my $start = pos() - 3; | |
| 302 if ( /T(?:AA|AG|GA)/g ) { | |
| 303 my $stop = pos; | |
| 304 my $size=$stop - $start; | |
| 305 if ($size>=$minSize){ | |
| 306 push (@orfSize,$size); | |
| 307 my $seq=substr ($_, $start, $stop - $start); | |
| 308 my @ctemp = $seq =~ /$x/g; | |
| 309 my $countTA = @ctemp; | |
| 310 if ($countTA==0){$blank++} | |
| 311 push (@allc,$countTA); | |
| 312 $orfCount++; | |
| 313 } | |
| 314 } | |
| 315 } | |
| 316 | |
| 317 print OUT "\nORFs based on Fasta sequence and start (ATG) and end (TAA,TAG,TGA) codons\n"; | |
| 318 push (@table,["Set minimum size for an ORF",$minSize]); | |
| 319 print OUT "$orfCount\tTotal number of ORFs found\n"; | |
| 320 my ($minORF, $maxORF) = minmax(@orfSize); | |
| 321 print OUT "$minORF\tSmallest ORF\n"; | |
| 322 print OUT "$maxORF\tLargest ORF\n"; | |
| 323 my ($mintaORF,$maxtaORF) = minmax(@allc); | |
| 324 print OUT "$mintaORF\tFewest # TA sites in an ORF\n"; | |
| 325 print OUT "$maxtaORF\tGreatest # TA sites in an ORF\n"; | |
| 326 print OUT "$blank\tNumber of ORFs that don't have any TA sites\n"; | |
| 327 | |
| 328 | 290 |
| 329 print OUT "\nGenes using the genbank annotation file\n\n"; | 291 print OUT "\nGenes using the genbank annotation file\n\n"; |
| 330 ###Get genbank file. Find all start and stop for genes | 292 ###Get genbank file. Find all start and stop for genes |
| 331 #See how many insertions fall into genes vs intergenic regions | 293 #See how many insertions fall into genes vs intergenic regions |
| 332 #Get array of coordinates for all insertions then remove insertion if it is | 294 #Get array of coordinates for all insertions then remove insertion if it is |
