annotate external_tools/darwin/lib/hh/scripts/create_profile_from_hhm.pl @ 6:2277dd59b9f9 draft

Uploaded
author hammock
date Wed, 01 Nov 2017 05:54:28 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
1 #!/usr/bin/env perl
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
2 #
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
3 # create_profile_from_hhm.pl
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
4 # Create a profile (.prf) from a given HHM file
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
5
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
6 # HHsuite version 2.0.16 (January 2013)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
7 #
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
8 # Reference:
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
9 # Remmert M., Biegert A., Hauser A., and Soding J.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
10 # HHblits: Lightning-fast iterative protein sequence searching by HMM-HMM alignment.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
11 # Nat. Methods, epub Dec 25, doi: 10.1038/NMETH.1818 (2011).
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
12
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
13 # (C) Michael Remmert and Johannes Soeding, 2012
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
14
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
15 # This program is free software: you can redistribute it and/or modify
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
16 # it under the terms of the GNU General Public License as published by
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
17 # the Free Software Foundation, either version 3 of the License, or
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
18 # (at your option) any later version.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
19
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
20 # This program is distributed in the hope that it will be useful,
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
23 # GNU General Public License for more details.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
24
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
25 # You should have received a copy of the GNU General Public License
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
26 # along with this program. If not, see <http://www.gnu.org/licenses/>.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
27
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
28 # We are very grateful for bug reports! Please contact us at soeding@genzentrum.lmu.de
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
29
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
30 use lib $ENV{"HHLIB"}."/scripts";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
31 use HHPaths; # config file with path variables for nr, blast, psipred, pdb, dssp etc.
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
32 use strict;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
33
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
34 $|= 1; # Activate autoflushing on STDOUT
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
35
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
36 # Default values:
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
37 our $v=2; # verbose mode
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
38
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
39 my $help="
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
40 create_profile_from_hhm.pl from HHsuite $VERSION
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
41 Create a profile (.prf) from a given HHM file
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
42
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
43 Usage: perl create_profile_from_hhm.pl -i <infile> [-o <outfile>]
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
44
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
45 Options:
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
46 -i <infile> Input file in HHM format
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
47 -o <outfile> Output file in prf-format (default: infile.prf)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
48
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
49 -v [0-5] verbose mode (default: $v)
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
50 \n";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
51
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
52 # Variable declarations
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
53 my $line;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
54 my $infile;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
55 my $outfile;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
56 my $i;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
57 my $a;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
58
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
59 my @counts; # count profile
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
60 my @neffs;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
61 my $name; # name of HHM profile
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
62 my $len; # length of HHM profile
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
63
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
64 # A C D E F G H I K L M N P Q R S T V W Y
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
65 my @hhmaa2csaa = ( 0, 4, 3, 6, 13, 7, 8, 9, 11, 10, 12, 2, 14, 5, 1, 15, 16, 19, 17, 18);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
66 my @aminoacids = ('A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V');
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
67 my %aa2i;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
68 for ($a = 0; $a < 20; $a++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
69 $aa2i{$aminoacids[$a]} = $a;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
70 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
71
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
72 ###############################################################################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
73 # Processing command line input
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
74 ###############################################################################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
75
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
76 if (@ARGV<1) {die ($help);}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
77
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
78 my $options="";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
79 for (my $i=0; $i<@ARGV; $i++) {$options.=" $ARGV[$i] ";}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
80
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
81 if ($options=~s/ -i\s+(\S+) //) {$infile=$1;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
82 if ($options=~s/ -o\s+(\S+) //) {$outfile=$1;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
83
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
84 if ($options=~s/ -v\s+(\S+) //) {$v=$1;}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
85
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
86 if (!$infile) {print($help); print "ERROR! No input file!\n"; exit(1);}
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
87
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
88 if (!$outfile) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
89 $infile =~ /^(\S+)\.\S+?$/;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
90 $outfile = "$1.prf";
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
91 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
92
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
93 ##############################################################################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
94 # Main part
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
95 ##############################################################################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
96
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
97 ######################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
98 # Read HHM profile
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
99 ######################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
100 open (IN, $infile);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
101 while ($line = <IN>) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
102 if ($line =~ /^NAME\s+(\S+)/) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
103 $name = $1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
104 } elsif ($line =~ /^LENG\s+(\d+)/) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
105 $len = $1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
106 } elsif ($line =~ /^HMM/) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
107 last;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
108 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
109 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
110
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
111 $i = 0;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
112 while ($line = <IN>) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
113 if ($line =~ /^\/\//) { last; }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
114
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
115 if ($line =~ s/^\S \d+ //) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
116
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
117 for ($a = 0; $a < 20; $a++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
118 $line =~ s/^\s*(\S+)\s/ /;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
119 $counts[$i][$hhmaa2csaa[$a]] = $1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
120 if ($counts[$i][$hhmaa2csaa[$a]] !~ /\*/ && $counts[$i][$hhmaa2csaa[$a]] == 0) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
121 $counts[$i][$hhmaa2csaa[$a]] = 1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
122 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
123 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
124
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
125 $line = <IN>;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
126 $line =~ /^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+/;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
127 $neffs[$i] = $1;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
128
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
129 $i++;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
130 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
131 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
132
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
133 ######################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
134 # write count_profile
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
135 ######################################
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
136
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
137 open (OUT, ">$outfile");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
138 # Write header
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
139 printf(OUT "CountProfile\n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
140 printf(OUT "NAME\t%s\n", $name);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
141 printf(OUT "LENG\t%i\n", $len);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
142 printf(OUT "ALPH\t20\n"); # 20 amino acid alphabet
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
143 printf(OUT "COUNTS");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
144 for ($a = 0; $a < 20; $a++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
145 printf(OUT "\t%s", $aminoacids[$a]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
146 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
147 printf(OUT "\tNEFF\n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
148
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
149 # Write profile
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
150 for ($i = 0; $i < $len; $i++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
151 printf(OUT "%i", $i+1);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
152 for ($a = 0; $a < 20; $a++) {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
153 if ($counts[$i][$a] == '*') {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
154 printf(OUT "\t*");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
155 } else {
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
156 printf(OUT "\t%i", $counts[$i][$a]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
157 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
158 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
159 printf(OUT "\t%i\n", $neffs[$i]);
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
160 }
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
161
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
162 printf(OUT "//\n");
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
163 close OUT;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
164
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
165 exit;
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
166
2277dd59b9f9 Uploaded
hammock
parents:
diff changeset
167