Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison alignment/seqfill.pl @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5b9a38ec4a39 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 my $file = $ARGV[0]; | |
4 my $q_mark = $ARGV[1]; | |
5 my $hyphen = $ARGV[2]; | |
6 my $N = $ARGV[3]; | |
7 my $usePartFile = $ARGV[4]; | |
8 my $partFile = $ARGV[5]; | |
9 | |
10 my $out = "out.phylipnon"; # output file | |
11 | |
12 open(FILE, $file); | |
13 my @speciesNames; | |
14 my @sequenceLines; | |
15 | |
16 my @currentLineContent; | |
17 | |
18 my $i = 0; | |
19 while($currentLine = <FILE>) { | |
20 chomp($currentLine); | |
21 @currentLineContent = split(/\t/, $currentLine); | |
22 $speciesNames[$i] = $currentLineContent[0]; | |
23 $sequenceLines[$i] = $currentLineContent[1]; | |
24 $i++; | |
25 } | |
26 | |
27 my $dataInfo = $speciesNames[1]; # gets num of species and sequence length | |
28 my @numbers = split(/ /, $dataInfo); | |
29 | |
30 my $numberOfSpecies = $numbers[0]; | |
31 my $sequenceLength = $numbers[1]; | |
32 | |
33 close(FILE); | |
34 | |
35 open(OUT, '>'.$out); | |
36 my @columnData; # this will have $sequenceLength elements | |
37 for($j = 0; $j < $numberOfSpecies+2; $j++) { | |
38 for($k = 0; $k < $sequenceLength; $k++) { | |
39 $currChar = substr($sequenceLines[$j], $k, 1); | |
40 $columnData[$k] = $columnData[$k].$currChar; | |
41 } | |
42 } | |
43 | |
44 # mark locations that will be removed | |
45 my @flagMap; | |
46 for($i = 0; $i < $sequenceLength; $i++) { | |
47 $flagMap[$i] = 0; | |
48 } | |
49 my $index = 0; | |
50 foreach $el(@columnData) { | |
51 my $tot = 0; | |
52 my $q_mark_occur = 0; | |
53 my $hyphen_occur = 0; | |
54 my $N_occur = 0; | |
55 | |
56 if($q_mark eq "true") { | |
57 $q_mark_occur = ($el =~ tr/?//); | |
58 } | |
59 if($hyphen eq "true") { | |
60 $hyphen_occur = ($el =~ tr/-//); | |
61 } | |
62 if($N eq "true") { | |
63 $N_occur = ($el =~ tr/N//); | |
64 } | |
65 | |
66 $tot = $q_mark_occur + $hyphen_occur + $N_occur; | |
67 if($tot == $numberOfSpecies) { | |
68 $flagMap[$index] = 1; | |
69 } | |
70 $index++; | |
71 } | |
72 | |
73 my $newSequenceLength = $sequenceLength; | |
74 foreach $el(@flagMap) { | |
75 if($el == 1) { | |
76 $newSequenceLength--; | |
77 } | |
78 } | |
79 | |
80 print OUT $speciesNames[0]."\n"; | |
81 print OUT $numberOfSpecies." ".$newSequenceLength."\n"; | |
82 for($i = 2; $i < $numberOfSpecies+3; $i++) { | |
83 print OUT $speciesNames[$i]."\t"; | |
84 for($j = 0; $j < $sequenceLength; $j++) { | |
85 if($flagMap[$j] == 0) { | |
86 my $character = substr($sequenceLines[$i], $j, 1); | |
87 print OUT $character; | |
88 } | |
89 } | |
90 print OUT "\n"; | |
91 } | |
92 | |
93 close(OUT); | |
94 | |
95 my $partOut = "partOut.txt"; | |
96 | |
97 if($usePartFile eq "true") { | |
98 # update the partition file | |
99 open(PART, $partFile); | |
100 my @data; | |
101 my @ranges; | |
102 my @names; | |
103 $i = 0; | |
104 while($currentLine = <PART>) { | |
105 @data = split(/=/, $currentLine); | |
106 $names[$i] = $data[0]; | |
107 $ranges[$i] = $data[1]; | |
108 $i++; | |
109 } | |
110 close(PART); | |
111 | |
112 my $firstFlag = 1; | |
113 open(PARTOUT, '>'.$partOut); | |
114 $j = 0; | |
115 my $newLower; | |
116 foreach $el(@ranges) { | |
117 print PARTOUT $names[$j]." = "; | |
118 @lowerUpper = split(/-/, $el); | |
119 if($firstFlag == 1) { | |
120 $newLower = $lowerUpper[0]; | |
121 $firstFlag = 0; | |
122 } | |
123 my $currUpper = $lowerUpper[1]; | |
124 my $newUpper = $currUpper; | |
125 | |
126 | |
127 | |
128 for($i = $currLower; $i < $currUpper; $i++) { | |
129 if($flagMap[$i] == 1) { | |
130 $newUpper--; | |
131 } | |
132 } | |
133 | |
134 print PARTOUT $newLower." - ".$newUpper."\n"; | |
135 $newLower = $newUpper + 1; | |
136 $j++; | |
137 } | |
138 close(PARTOUT); | |
139 } |