comparison alignment/seqfill.pl @ 0:5b9a38ec4a39 draft default tip

First commit of old repositories
author osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
date Tue, 11 Mar 2014 12:19:13 -0700
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5b9a38ec4a39
1 #!/usr/bin/perl
2
3 my $file = $ARGV[0];
4 my $q_mark = $ARGV[1];
5 my $hyphen = $ARGV[2];
6 my $N = $ARGV[3];
7 my $usePartFile = $ARGV[4];
8 my $partFile = $ARGV[5];
9
10 my $out = "out.phylipnon"; # output file
11
12 open(FILE, $file);
13 my @speciesNames;
14 my @sequenceLines;
15
16 my @currentLineContent;
17
18 my $i = 0;
19 while($currentLine = <FILE>) {
20 chomp($currentLine);
21 @currentLineContent = split(/\t/, $currentLine);
22 $speciesNames[$i] = $currentLineContent[0];
23 $sequenceLines[$i] = $currentLineContent[1];
24 $i++;
25 }
26
27 my $dataInfo = $speciesNames[1]; # gets num of species and sequence length
28 my @numbers = split(/ /, $dataInfo);
29
30 my $numberOfSpecies = $numbers[0];
31 my $sequenceLength = $numbers[1];
32
33 close(FILE);
34
35 open(OUT, '>'.$out);
36 my @columnData; # this will have $sequenceLength elements
37 for($j = 0; $j < $numberOfSpecies+2; $j++) {
38 for($k = 0; $k < $sequenceLength; $k++) {
39 $currChar = substr($sequenceLines[$j], $k, 1);
40 $columnData[$k] = $columnData[$k].$currChar;
41 }
42 }
43
44 # mark locations that will be removed
45 my @flagMap;
46 for($i = 0; $i < $sequenceLength; $i++) {
47 $flagMap[$i] = 0;
48 }
49 my $index = 0;
50 foreach $el(@columnData) {
51 my $tot = 0;
52 my $q_mark_occur = 0;
53 my $hyphen_occur = 0;
54 my $N_occur = 0;
55
56 if($q_mark eq "true") {
57 $q_mark_occur = ($el =~ tr/?//);
58 }
59 if($hyphen eq "true") {
60 $hyphen_occur = ($el =~ tr/-//);
61 }
62 if($N eq "true") {
63 $N_occur = ($el =~ tr/N//);
64 }
65
66 $tot = $q_mark_occur + $hyphen_occur + $N_occur;
67 if($tot == $numberOfSpecies) {
68 $flagMap[$index] = 1;
69 }
70 $index++;
71 }
72
73 my $newSequenceLength = $sequenceLength;
74 foreach $el(@flagMap) {
75 if($el == 1) {
76 $newSequenceLength--;
77 }
78 }
79
80 print OUT $speciesNames[0]."\n";
81 print OUT $numberOfSpecies." ".$newSequenceLength."\n";
82 for($i = 2; $i < $numberOfSpecies+3; $i++) {
83 print OUT $speciesNames[$i]."\t";
84 for($j = 0; $j < $sequenceLength; $j++) {
85 if($flagMap[$j] == 0) {
86 my $character = substr($sequenceLines[$i], $j, 1);
87 print OUT $character;
88 }
89 }
90 print OUT "\n";
91 }
92
93 close(OUT);
94
95 my $partOut = "partOut.txt";
96
97 if($usePartFile eq "true") {
98 # update the partition file
99 open(PART, $partFile);
100 my @data;
101 my @ranges;
102 my @names;
103 $i = 0;
104 while($currentLine = <PART>) {
105 @data = split(/=/, $currentLine);
106 $names[$i] = $data[0];
107 $ranges[$i] = $data[1];
108 $i++;
109 }
110 close(PART);
111
112 my $firstFlag = 1;
113 open(PARTOUT, '>'.$partOut);
114 $j = 0;
115 my $newLower;
116 foreach $el(@ranges) {
117 print PARTOUT $names[$j]." = ";
118 @lowerUpper = split(/-/, $el);
119 if($firstFlag == 1) {
120 $newLower = $lowerUpper[0];
121 $firstFlag = 0;
122 }
123 my $currUpper = $lowerUpper[1];
124 my $newUpper = $currUpper;
125
126
127
128 for($i = $currLower; $i < $currUpper; $i++) {
129 if($flagMap[$i] == 1) {
130 $newUpper--;
131 }
132 }
133
134 print PARTOUT $newLower." - ".$newUpper."\n";
135 $newLower = $newUpper + 1;
136 $j++;
137 }
138 close(PARTOUT);
139 }