Mercurial > repos > dereeper > pangenome_explorer
comparison PanExplorer_workflow/Perl/Naegleria/calculateFeatureDensitiesFromGFF.pl @ 1:032f6b3806a3 draft
Uploaded
| author | dereeper |
|---|---|
| date | Thu, 30 May 2024 11:16:08 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:3cbb01081cde | 1:032f6b3806a3 |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 | |
| 3 use strict; | |
| 4 | |
| 5 my $gff = $ARGV[0]; | |
| 6 my $window_size = $ARGV[1]; | |
| 7 | |
| 8 my %counts; | |
| 9 my %hash; | |
| 10 open(F,$gff); | |
| 11 while(<F>){ | |
| 12 if (/^#/){next;} | |
| 13 my @infos = split("\t",$_); | |
| 14 my $chr = $infos[0]; | |
| 15 #$chr =~s/chr//g; | |
| 16 my $source = $infos[1]; | |
| 17 my $feature = $infos[2]; | |
| 18 my $start = $infos[3]; | |
| 19 my $end = $infos[4]; | |
| 20 my $window_num = int($start/$window_size)+1; | |
| 21 my $window_num2 = int($end/$window_size)+1; | |
| 22 if ($source eq "repeatmasker" && $feature eq "match"){ | |
| 23 $feature = "repeats"; | |
| 24 } | |
| 25 if ($feature ne "gene" && $feature ne "CDS" && $feature ne "repeats" && $feature !~/UTR/ && $feature ne "exon"){next;} | |
| 26 $counts{$chr}{$window_num}{$feature}++; | |
| 27 if ($window_num == $window_num2){ | |
| 28 for (my $i = $start; $i <= $end; $i++){ | |
| 29 $hash{$chr}{$window_num}{$feature}{$i} = 1; | |
| 30 } | |
| 31 } | |
| 32 else{ | |
| 33 for (my $i = $start; $i <= ($window_num*$window_size); $i++){ | |
| 34 $hash{$chr}{$window_num}{$feature}{$i} = 1; | |
| 35 } | |
| 36 for (my $j = ($window_num2-1)*$window_size; $j <= $end; $j++){ | |
| 37 $hash{$chr}{$window_num2}{$feature}{$j} = 1; | |
| 38 } | |
| 39 if (($window_num2 - $window_num) > 1){ | |
| 40 for (my $window_num_new = $window_num + 1; $window_num_new <= $window_num2 - 1; $window_num_new++){ | |
| 41 for (my $j = (($window_num_new-1)*$window_size); $j <= ($window_num_new*$window_size); $j++){ | |
| 42 $hash{$chr}{$window_num_new}{$feature}{$j} = 1; | |
| 43 } | |
| 44 } | |
| 45 } | |
| 46 | |
| 47 } | |
| 48 } | |
| 49 close(F); | |
| 50 | |
| 51 #my $refhash2 = $hash{"Chr13"}{"1"}{"exon"}; | |
| 52 #my %subhash2 = %$refhash2; | |
| 53 #print scalar %subhash2; | |
| 54 #exit; | |
| 55 | |
| 56 open(G2,">gene_counts.txt"); | |
| 57 open(G,">gene_density.txt"); | |
| 58 open(I,">exon_density.txt"); | |
| 59 open(R,">repeat_density.txt"); | |
| 60 foreach my $chr(sort {$a<=>$b} keys(%hash)){ | |
| 61 #print G "$chr 0 1 0\n"; | |
| 62 #print G "$chr 1 2 1\n"; | |
| 63 #print I "$chr 0 1 0\n"; | |
| 64 #print I "$chr 0 1 0\n"; | |
| 65 my $refhash = $hash{$chr}; | |
| 66 my %subhash = %$refhash; | |
| 67 foreach my $window_num(sort {$a<=>$b} keys(%subhash)){ | |
| 68 | |
| 69 # genes | |
| 70 my $proportion_gene = 0; | |
| 71 if ($hash{$chr}{$window_num}{"gene"}){ | |
| 72 my $refhash2 = $hash{$chr}{$window_num}{"gene"}; | |
| 73 my %subhash2 = %$refhash2; | |
| 74 $proportion_gene = scalar %subhash2 / $window_size; | |
| 75 if ($proportion_gene > 1.1){ | |
| 76 #print "$chr $window_num $proportion_gene ".scalar %subhash2."\n";exit; | |
| 77 } | |
| 78 } | |
| 79 | |
| 80 # exon | |
| 81 my $proportion_exon = 0; | |
| 82 if ($hash{$chr}{$window_num}{"CDS"}){ | |
| 83 my $refhash2 = $hash{$chr}{$window_num}{"CDS"}; | |
| 84 my %subhash2 = %$refhash2; | |
| 85 $proportion_exon = scalar %subhash2 / $window_size; | |
| 86 } | |
| 87 | |
| 88 # repeat | |
| 89 my $proportion_repeat = 0; | |
| 90 if ($hash{$chr}{$window_num}{"repeats"}){ | |
| 91 my $refhash2 = $hash{$chr}{$window_num}{"repeats"}; | |
| 92 my %subhash2 = %$refhash2; | |
| 93 $proportion_repeat = scalar %subhash2 / $window_size; | |
| 94 #$proportion_repeat = $proportion_repeat + $proportion_gene; | |
| 95 } | |
| 96 else{ | |
| 97 #$proportion_repeat = $proportion_gene; | |
| 98 $proportion_repeat = 0; | |
| 99 } | |
| 100 my $start = ($window_num-1)*$window_size; | |
| 101 my $end = $window_num*$window_size; | |
| 102 print I "$chr $start $end $proportion_exon\n"; | |
| 103 print G "$chr $start $end $proportion_gene\n"; | |
| 104 print R "$chr $start $end $proportion_repeat\n"; | |
| 105 if ($counts{$chr}{$window_num}{"gene"}){ | |
| 106 print G2 "$chr $start $end ".$counts{$chr}{$window_num}{"gene"}."\n"; | |
| 107 } | |
| 108 else{ | |
| 109 print G2 "$chr $start $end 0\n"; | |
| 110 } | |
| 111 } | |
| 112 } | |
| 113 close(G); | |
| 114 close(I); | |
| 115 close(R); | |
| 116 close(G2); | |
| 117 |
