Mercurial > repos > jnavarro > creatematrix
comparison CreateMatrix.pl @ 0:e94977a3dcad draft
Uploaded
| author | jnavarro |
|---|---|
| date | Thu, 26 Oct 2017 06:41:56 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e94977a3dcad |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 #V1.0.0 | |
| 3 use strict; | |
| 4 use warnings; | |
| 5 use Getopt::Long; | |
| 6 | |
| 7 my $input_pileup_file; | |
| 8 my $input_mpileupFiltred_file; | |
| 9 my $input_name; | |
| 10 | |
| 11 GetOptions ( | |
| 12 "input_name=s" => \$input_name, | |
| 13 "input_pileup_file=s" => \$input_pileup_file, | |
| 14 "input_mpileupFiltred_file=s" => \$input_mpileupFiltred_file | |
| 15 ) or die("Error in command line arguments\n"); | |
| 16 | |
| 17 | |
| 18 print "Chrom\tPos\tRef\t$input_name\n"; | |
| 19 | |
| 20 my %mpileupFiltred; | |
| 21 open (VU,$input_mpileupFiltred_file) or die ("Can't open mpileupFiltred file\n"); | |
| 22 while (my $line=<VU>){ | |
| 23 if ($line!~/^\s*$/){ | |
| 24 my @fields = split (/\t+/,$line); | |
| 25 my $chr; | |
| 26 my $pos; | |
| 27 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){ | |
| 28 $chr = $1; | |
| 29 } | |
| 30 else { | |
| 31 print STDERR "Unable to detect chromosome in pileup file\n$line",$fields[0],"\n",$fields[1],"\n"; | |
| 32 exit(0); | |
| 33 } | |
| 34 if ($fields[1]=~/^\s*(\d+)\s*$/){ | |
| 35 $pos = $1; | |
| 36 } | |
| 37 else { | |
| 38 print STDERR "Unable to detect position in pileup file\n$line",$fields[0],"\n",$fields[1],"\n"; | |
| 39 exit(0); | |
| 40 } | |
| 41 if ($#fields>=4){ | |
| 42 $mpileupFiltred{"$chr#$pos"}=$fields[4]; | |
| 43 } | |
| 44 else { | |
| 45 print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n"; | |
| 46 } | |
| 47 } | |
| 48 } | |
| 49 close VU; | |
| 50 | |
| 51 my %covered; | |
| 52 | |
| 53 my $compt=0; | |
| 54 open (PU,$input_pileup_file) or die ("Can't open pileup file\n"); | |
| 55 while (my $line=<PU>){ | |
| 56 #$compt++; | |
| 57 #if ($compt>200000){last;} | |
| 58 if ($line!~/^\s*$/){ | |
| 59 my @fields = split (/\t+/,$line); | |
| 60 my $chr; | |
| 61 my $pos; | |
| 62 my $ref; | |
| 63 my $depth; | |
| 64 my $pile; | |
| 65 | |
| 66 if ($#fields>=4){ | |
| 67 if ($fields[0]=~/^\s*([\w\-]+)\s*$/){ | |
| 68 $chr = $1; | |
| 69 } | |
| 70 else { | |
| 71 print STDERR "Unable to detect chromosome in pileup file\n$line\n"; | |
| 72 exit(0); | |
| 73 } | |
| 74 if ($fields[1]=~/^\s*(\d+)\s*$/){ | |
| 75 $pos = $1; | |
| 76 } | |
| 77 else { | |
| 78 print STDERR "Unable to detect position in pileup file\n$line\n"; | |
| 79 exit(0); | |
| 80 } | |
| 81 | |
| 82 if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){ | |
| 83 $ref = $1; | |
| 84 } | |
| 85 else { | |
| 86 print STDERR "Unable to detect reference base in pileup file\n$line\n"; | |
| 87 exit(0); | |
| 88 } | |
| 89 | |
| 90 if ($fields[3]=~/^\s*(\d+)\s*$/i){ | |
| 91 $depth = $1; | |
| 92 } | |
| 93 else { | |
| 94 print STDERR "Unable to detect depth in pileup file\n$line\n"; | |
| 95 exit(0); | |
| 96 } | |
| 97 | |
| 98 $pile = $fields[4]; | |
| 99 $pile =~ s/\$//g; #the read start at this position | |
| 100 $pile =~ s/\^.//g; #the read end at this position followed by quality char | |
| 101 | |
| 102 if ($mpileupFiltred{"$chr#$pos"}) { | |
| 103 $pile = $mpileupFiltred{"$chr#$pos"}; | |
| 104 $pile =~ s/\$//g; #the read start at this position | |
| 105 $pile =~ s/\^.//g; #the read end at this position followed by quality char | |
| 106 my $ori_pile = $pile; | |
| 107 | |
| 108 my @detail = split(//,$pile); | |
| 109 my $current_in=""; | |
| 110 my $current_del=""; | |
| 111 my $current_length=0; | |
| 112 my $noindel_pile=""; | |
| 113 my @IN; | |
| 114 my @DEL; | |
| 115 | |
| 116 $noindel_pile = $pile; | |
| 117 while ($noindel_pile =~ /\+(\d+)/){ | |
| 118 my $length = $1; | |
| 119 $noindel_pile =~ s/(\+\d+[ATGCNX]{$length})//i ; | |
| 120 push(@IN,$1); | |
| 121 #print "test : $pile / $noindel_pile\n"; | |
| 122 } | |
| 123 while ($noindel_pile =~ /\-(\d+)/){ | |
| 124 my $length = $1; | |
| 125 $noindel_pile =~ s/(\-\d+[ATGCNX]{$length})//i ; | |
| 126 push(@DEL,$1); | |
| 127 #print "test : $pile / $noindel_pile\n"; | |
| 128 } | |
| 129 | |
| 130 my %variant_type; | |
| 131 | |
| 132 my $indel_pile = $ori_pile; | |
| 133 $variant_type{"IN"} = ($indel_pile =~ s/\+//gi); | |
| 134 $variant_type{"DEL"} = ($indel_pile =~ s/\-//gi); | |
| 135 | |
| 136 my $base_pile = $noindel_pile; | |
| 137 $variant_type{"A"} = ($base_pile =~ s/a//gi); | |
| 138 $variant_type{"T"} = ($base_pile =~ s/t//gi); | |
| 139 $variant_type{"C"} = ($base_pile =~ s/c//gi); | |
| 140 $variant_type{"G"} = ($base_pile =~ s/g//gi); | |
| 141 $variant_type{"N"} = ($base_pile =~ s/n//gi); | |
| 142 | |
| 143 my $top_number=0; | |
| 144 my $top_variant=""; | |
| 145 foreach my $key (sort{$variant_type{$b}<=>$variant_type{$a}} keys %variant_type){ | |
| 146 if ($variant_type{$key} > 0){ | |
| 147 if ($variant_type{$key} > $top_number){ | |
| 148 $top_number = $variant_type{$key}; | |
| 149 $top_variant = $key; | |
| 150 } | |
| 151 elsif ($variant_type{$key} == $top_number){ | |
| 152 $top_variant .= "/$key"; | |
| 153 } | |
| 154 } | |
| 155 | |
| 156 } | |
| 157 print "$chr\t$pos\t$ref\t$top_variant\n"; | |
| 158 | |
| 159 } | |
| 160 elsif (($pile=~/[\+\-]/)||($pile=~/[ATGCN]/i)){ | |
| 161 print "$chr\t$pos\t$ref\tX\n"; | |
| 162 } | |
| 163 else { | |
| 164 print "$chr\t$pos\t$ref\t$ref\n"; | |
| 165 } | |
| 166 } | |
| 167 else { | |
| 168 print STDERR "Unrecognized pileup format, need at least 5 columns\n$line\n"; | |
| 169 } | |
| 170 } | |
| 171 } | |
| 172 close PU; | |
| 173 | |
| 174 |
