Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
view alignment/seqfill.pl @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl my $file = $ARGV[0]; my $q_mark = $ARGV[1]; my $hyphen = $ARGV[2]; my $N = $ARGV[3]; my $usePartFile = $ARGV[4]; my $partFile = $ARGV[5]; my $out = "out.phylipnon"; # output file open(FILE, $file); my @speciesNames; my @sequenceLines; my @currentLineContent; my $i = 0; while($currentLine = <FILE>) { chomp($currentLine); @currentLineContent = split(/\t/, $currentLine); $speciesNames[$i] = $currentLineContent[0]; $sequenceLines[$i] = $currentLineContent[1]; $i++; } my $dataInfo = $speciesNames[1]; # gets num of species and sequence length my @numbers = split(/ /, $dataInfo); my $numberOfSpecies = $numbers[0]; my $sequenceLength = $numbers[1]; close(FILE); open(OUT, '>'.$out); my @columnData; # this will have $sequenceLength elements for($j = 0; $j < $numberOfSpecies+2; $j++) { for($k = 0; $k < $sequenceLength; $k++) { $currChar = substr($sequenceLines[$j], $k, 1); $columnData[$k] = $columnData[$k].$currChar; } } # mark locations that will be removed my @flagMap; for($i = 0; $i < $sequenceLength; $i++) { $flagMap[$i] = 0; } my $index = 0; foreach $el(@columnData) { my $tot = 0; my $q_mark_occur = 0; my $hyphen_occur = 0; my $N_occur = 0; if($q_mark eq "true") { $q_mark_occur = ($el =~ tr/?//); } if($hyphen eq "true") { $hyphen_occur = ($el =~ tr/-//); } if($N eq "true") { $N_occur = ($el =~ tr/N//); } $tot = $q_mark_occur + $hyphen_occur + $N_occur; if($tot == $numberOfSpecies) { $flagMap[$index] = 1; } $index++; } my $newSequenceLength = $sequenceLength; foreach $el(@flagMap) { if($el == 1) { $newSequenceLength--; } } print OUT $speciesNames[0]."\n"; print OUT $numberOfSpecies." ".$newSequenceLength."\n"; for($i = 2; $i < $numberOfSpecies+3; $i++) { print OUT $speciesNames[$i]."\t"; for($j = 0; $j < $sequenceLength; $j++) { if($flagMap[$j] == 0) { my $character = substr($sequenceLines[$i], $j, 1); print OUT $character; } } print OUT "\n"; } close(OUT); my $partOut = "partOut.txt"; if($usePartFile eq "true") { # update the partition file open(PART, $partFile); my @data; my @ranges; my @names; $i = 0; while($currentLine = <PART>) { @data = split(/=/, $currentLine); $names[$i] = $data[0]; $ranges[$i] = $data[1]; $i++; } close(PART); my $firstFlag = 1; open(PARTOUT, '>'.$partOut); $j = 0; my $newLower; foreach $el(@ranges) { print PARTOUT $names[$j]." = "; @lowerUpper = split(/-/, $el); if($firstFlag == 1) { $newLower = $lowerUpper[0]; $firstFlag = 0; } my $currUpper = $lowerUpper[1]; my $newUpper = $currUpper; for($i = $currLower; $i < $currUpper; $i++) { if($flagMap[$i] == 1) { $newUpper--; } } print PARTOUT $newLower." - ".$newUpper."\n"; $newLower = $newUpper + 1; $j++; } close(PARTOUT); }