Mercurial > repos > hammock > hammock
comparison external_tools/darwin/lib/hh/scripts/create_profile_from_hhm.pl @ 6:2277dd59b9f9 draft
Uploaded
author | hammock |
---|---|
date | Wed, 01 Nov 2017 05:54:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:b7652b7c97bd | 6:2277dd59b9f9 |
---|---|
1 #!/usr/bin/env perl | |
2 # | |
3 # create_profile_from_hhm.pl | |
4 # Create a profile (.prf) from a given HHM file | |
5 | |
6 # HHsuite version 2.0.16 (January 2013) | |
7 # | |
8 # Reference: | |
9 # Remmert M., Biegert A., Hauser A., and Soding J. | |
10 # HHblits: Lightning-fast iterative protein sequence searching by HMM-HMM alignment. | |
11 # Nat. Methods, epub Dec 25, doi: 10.1038/NMETH.1818 (2011). | |
12 | |
13 # (C) Michael Remmert and Johannes Soeding, 2012 | |
14 | |
15 # This program is free software: you can redistribute it and/or modify | |
16 # it under the terms of the GNU General Public License as published by | |
17 # the Free Software Foundation, either version 3 of the License, or | |
18 # (at your option) any later version. | |
19 | |
20 # This program is distributed in the hope that it will be useful, | |
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
23 # GNU General Public License for more details. | |
24 | |
25 # You should have received a copy of the GNU General Public License | |
26 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
27 | |
28 # We are very grateful for bug reports! Please contact us at soeding@genzentrum.lmu.de | |
29 | |
30 use lib $ENV{"HHLIB"}."/scripts"; | |
31 use HHPaths; # config file with path variables for nr, blast, psipred, pdb, dssp etc. | |
32 use strict; | |
33 | |
34 $|= 1; # Activate autoflushing on STDOUT | |
35 | |
36 # Default values: | |
37 our $v=2; # verbose mode | |
38 | |
39 my $help=" | |
40 create_profile_from_hhm.pl from HHsuite $VERSION | |
41 Create a profile (.prf) from a given HHM file | |
42 | |
43 Usage: perl create_profile_from_hhm.pl -i <infile> [-o <outfile>] | |
44 | |
45 Options: | |
46 -i <infile> Input file in HHM format | |
47 -o <outfile> Output file in prf-format (default: infile.prf) | |
48 | |
49 -v [0-5] verbose mode (default: $v) | |
50 \n"; | |
51 | |
52 # Variable declarations | |
53 my $line; | |
54 my $infile; | |
55 my $outfile; | |
56 my $i; | |
57 my $a; | |
58 | |
59 my @counts; # count profile | |
60 my @neffs; | |
61 my $name; # name of HHM profile | |
62 my $len; # length of HHM profile | |
63 | |
64 # A C D E F G H I K L M N P Q R S T V W Y | |
65 my @hhmaa2csaa = ( 0, 4, 3, 6, 13, 7, 8, 9, 11, 10, 12, 2, 14, 5, 1, 15, 16, 19, 17, 18); | |
66 my @aminoacids = ('A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'); | |
67 my %aa2i; | |
68 for ($a = 0; $a < 20; $a++) { | |
69 $aa2i{$aminoacids[$a]} = $a; | |
70 } | |
71 | |
72 ############################################################################################### | |
73 # Processing command line input | |
74 ############################################################################################### | |
75 | |
76 if (@ARGV<1) {die ($help);} | |
77 | |
78 my $options=""; | |
79 for (my $i=0; $i<@ARGV; $i++) {$options.=" $ARGV[$i] ";} | |
80 | |
81 if ($options=~s/ -i\s+(\S+) //) {$infile=$1;} | |
82 if ($options=~s/ -o\s+(\S+) //) {$outfile=$1;} | |
83 | |
84 if ($options=~s/ -v\s+(\S+) //) {$v=$1;} | |
85 | |
86 if (!$infile) {print($help); print "ERROR! No input file!\n"; exit(1);} | |
87 | |
88 if (!$outfile) { | |
89 $infile =~ /^(\S+)\.\S+?$/; | |
90 $outfile = "$1.prf"; | |
91 } | |
92 | |
93 ############################################################################################## | |
94 # Main part | |
95 ############################################################################################## | |
96 | |
97 ###################################### | |
98 # Read HHM profile | |
99 ###################################### | |
100 open (IN, $infile); | |
101 while ($line = <IN>) { | |
102 if ($line =~ /^NAME\s+(\S+)/) { | |
103 $name = $1; | |
104 } elsif ($line =~ /^LENG\s+(\d+)/) { | |
105 $len = $1; | |
106 } elsif ($line =~ /^HMM/) { | |
107 last; | |
108 } | |
109 } | |
110 | |
111 $i = 0; | |
112 while ($line = <IN>) { | |
113 if ($line =~ /^\/\//) { last; } | |
114 | |
115 if ($line =~ s/^\S \d+ //) { | |
116 | |
117 for ($a = 0; $a < 20; $a++) { | |
118 $line =~ s/^\s*(\S+)\s/ /; | |
119 $counts[$i][$hhmaa2csaa[$a]] = $1; | |
120 if ($counts[$i][$hhmaa2csaa[$a]] !~ /\*/ && $counts[$i][$hhmaa2csaa[$a]] == 0) { | |
121 $counts[$i][$hhmaa2csaa[$a]] = 1; | |
122 } | |
123 } | |
124 | |
125 $line = <IN>; | |
126 $line =~ /^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+/; | |
127 $neffs[$i] = $1; | |
128 | |
129 $i++; | |
130 } | |
131 } | |
132 | |
133 ###################################### | |
134 # write count_profile | |
135 ###################################### | |
136 | |
137 open (OUT, ">$outfile"); | |
138 # Write header | |
139 printf(OUT "CountProfile\n"); | |
140 printf(OUT "NAME\t%s\n", $name); | |
141 printf(OUT "LENG\t%i\n", $len); | |
142 printf(OUT "ALPH\t20\n"); # 20 amino acid alphabet | |
143 printf(OUT "COUNTS"); | |
144 for ($a = 0; $a < 20; $a++) { | |
145 printf(OUT "\t%s", $aminoacids[$a]); | |
146 } | |
147 printf(OUT "\tNEFF\n"); | |
148 | |
149 # Write profile | |
150 for ($i = 0; $i < $len; $i++) { | |
151 printf(OUT "%i", $i+1); | |
152 for ($a = 0; $a < 20; $a++) { | |
153 if ($counts[$i][$a] == '*') { | |
154 printf(OUT "\t*"); | |
155 } else { | |
156 printf(OUT "\t%i", $counts[$i][$a]); | |
157 } | |
158 } | |
159 printf(OUT "\t%i\n", $neffs[$i]); | |
160 } | |
161 | |
162 printf(OUT "//\n"); | |
163 close OUT; | |
164 | |
165 exit; | |
166 | |
167 |