0
|
1 #!/usr/bin/perl
|
|
2 use strict;
|
|
3 use warnings;
|
|
4 use Getopt::Std;
|
|
5
|
|
6 my %opt;
|
|
7 getopts('N:I:O:',\%opt);
|
|
8
|
|
9 my @usage=qq(
|
|
10 Version: 2016042201
|
|
11 Usage: perl Converter_draft.pl [options]
|
|
12
|
|
13 Options:
|
|
14
|
|
15 -N String Input the strain nickname
|
|
16 -I String Input file directory
|
|
17 -O String Output file directory
|
|
18 );
|
|
19
|
|
20 if (!scalar(keys %opt))
|
|
21 {
|
|
22 print join("\n",@usage)."\n";
|
|
23 exit;
|
|
24 }
|
|
25
|
|
26 my $prefix;
|
|
27 if (exists($opt{"N"}))
|
|
28 {
|
|
29 $prefix=$opt{"N"}
|
|
30 }else
|
|
31 {
|
|
32 print "-N could not be empty!";
|
|
33 print join("\n",@usage)."\n";
|
|
34 exit;
|
|
35 }
|
|
36
|
|
37 my $output;
|
|
38 if (exists($opt{"O"}))
|
|
39 {
|
|
40 $output=$opt{"O"};
|
|
41 }else
|
|
42 {
|
|
43 print "-O could not be empty!";
|
|
44 print join("\n",@usage)."\n";
|
|
45 exit;
|
|
46 }
|
|
47
|
|
48 my $input;
|
|
49 if (exists($opt{"I"}))
|
|
50 {
|
|
51 $input=$opt{"I"};
|
|
52 }else
|
|
53 {
|
|
54 print "-I could not be empty!";
|
|
55 print join("\n",@usage)."\n";
|
|
56 exit;
|
|
57 }
|
|
58
|
|
59 my $sp;
|
|
60 my $line;
|
|
61 my @row;
|
|
62 my @tmp;
|
|
63 my %hash;
|
|
64 my $flag;
|
|
65 my $file;
|
|
66 my $list;
|
|
67 my @list;
|
|
68 my $pttlost;
|
|
69 my $gi;
|
|
70
|
|
71 if ((-e $output) and ((-d $output)))
|
|
72 {
|
|
73 }else
|
|
74 {
|
|
75 mkdir($output);
|
|
76 }
|
|
77
|
|
78 if ($input!~/\/$/)
|
|
79 {
|
|
80 $input=$input."/";
|
|
81 }
|
|
82
|
|
83 if ($output!~/\/$/)
|
|
84 {
|
|
85 $output=$output."/";
|
|
86 }
|
|
87
|
|
88 opendir(DIR,"$input") || die "The input directory ( $input ) is not exists!\n";
|
|
89 @list=grep(/faa$/,readdir(DIR));
|
|
90 closedir(DIR);
|
|
91 $_=join("\t",@list);
|
|
92 s/.faa//g;
|
|
93 @list=split(/\t/,$_);
|
|
94
|
|
95 open(PEP,">$output$prefix.pep");
|
|
96 open(NUC,">$output$prefix.nuc");
|
|
97 open(FUN,">$output$prefix.function");
|
|
98
|
|
99 foreach $list (@list)
|
|
100 {
|
|
101 %hash=();
|
|
102 if (!(-e $input.$list.".faa"))
|
|
103 {
|
|
104 print $input.$list.".faa is not exists!\n$list.faa, $list.ffn and $list.ptt are skipped!\n";
|
|
105 next;
|
|
106 }
|
|
107
|
|
108 if (!(-e $input.$list.".ffn"))
|
|
109 {
|
|
110 print $input.$list.".ffn is not exists!\n$list.faa, $list.ffn and $list.ptt are skipped!\n";
|
|
111 next;
|
|
112 }
|
|
113
|
|
114 if (!(-e $input.$list.".ptt"))
|
|
115 {
|
|
116 open(F,$input.$list.".faa");
|
|
117 @_=<F>;
|
|
118 close(F);
|
|
119 @_=grep(/^>/,@_);
|
|
120 if (scalar(@_)>1)
|
|
121 {
|
|
122 print $input.$list.".ptt is not exists!\n";
|
|
123 print "There are more than 1 sequence in $list.faa and $list.ffn, So $list.faa, $list.ffn and $list.ptt are skipped!\n";
|
|
124 next;
|
|
125 }
|
|
126 $pttlost=1;
|
|
127 }else
|
|
128 {
|
|
129 $pttlost=0;
|
|
130 }
|
|
131
|
|
132 $file=$input.$list.".faa";
|
|
133 open(F,$file) or die "could not open $file";
|
|
134 while ($line=<F>)
|
|
135 {
|
|
136 if ($line=~/^>/)
|
|
137 {
|
|
138 @row=split(/\|/,$line);
|
|
139 print PEP ">$row[1]\n";
|
|
140 if ($pttlost ==1)
|
|
141 {
|
|
142 $gi=$row[1];
|
|
143 }
|
|
144 }else
|
|
145 {
|
|
146 print PEP $line;
|
|
147 }
|
|
148 }
|
|
149 close(F);
|
|
150
|
|
151 if ($pttlost ==1)
|
|
152 {
|
|
153 print FUN "$gi\t-\thypothetical protein\n";
|
|
154 }else
|
|
155 {
|
|
156 $file=$input.$list.".ptt";
|
|
157 open(F,"$file") or die "could not open $file";
|
|
158 $_=<F>;
|
|
159 $_=<F>;
|
|
160 $_=<F>;
|
|
161 while ($line=<F>)
|
|
162 {
|
|
163 chomp($line);
|
|
164 @row=split(/\t/,$line);
|
|
165 print FUN $row[3]."\t".$row[7]."\t".$row[8]."\n";
|
|
166 @tmp=split(/\.\./,$row[0]);
|
|
167 if ($row[1] eq "+")
|
|
168 {
|
|
169 $hash{$tmp[0]."-".$tmp[@tmp-1]}=$row[3];
|
|
170 }else
|
|
171 {
|
|
172 $hash{"c".$tmp[@tmp-1]."-".$tmp[0]}=$row[3];
|
|
173 }
|
|
174 }
|
|
175 close(F);
|
|
176 }
|
|
177
|
|
178
|
|
179
|
|
180 $file=$input.$list.".ffn";
|
|
181 open(F,"$file") or die "could not open $file";;
|
|
182 while ($line=<F>)
|
|
183 {
|
|
184 if ($line=~/^>/)
|
|
185 {
|
|
186 if ($pttlost==1)
|
|
187 {
|
|
188 print NUC ">$gi\n";
|
|
189 $flag=1;
|
|
190 }else
|
|
191 {
|
|
192 my $key=&getKey($line);
|
|
193 if (exists($hash{$key}))
|
|
194 {
|
|
195 $flag=1;
|
|
196 print NUC ">$hash{$key}\n";
|
|
197 }else
|
|
198 {
|
|
199 $flag=0;
|
|
200 }
|
|
201 }
|
|
202 }else
|
|
203 {
|
|
204 if ($flag)
|
|
205 {
|
|
206 print NUC $line;
|
|
207 }
|
|
208 }
|
|
209 }
|
|
210 close(F);
|
|
211 }
|
|
212
|
|
213 close(PEP);
|
|
214 close(NUC);
|
|
215 close(FUN);
|
|
216
|
|
217 sub getKey()
|
|
218 {
|
|
219 (my $line)=@_;
|
|
220 my @tmp;
|
|
221 my $strand;
|
|
222 chomp($line);
|
|
223 @tmp=split(/ /,$line);
|
|
224 @tmp=split(/\:/,$tmp[0]);
|
|
225
|
|
226 if($tmp[@tmp-1]=~/c/)
|
|
227 {
|
|
228 $strand="-";
|
|
229 }else
|
|
230 {
|
|
231 $strand="+";
|
|
232 }
|
|
233 $_=$tmp[@tmp-1];
|
|
234 s/c//g;
|
|
235 s/ //g;
|
|
236 @tmp=split(/\,|-/,$_);
|
|
237 @tmp=sort{$a<=>$b} @tmp;
|
|
238 if($strand eq "-")
|
|
239 {
|
|
240 return "c".$tmp[@tmp-1]."-".$tmp[0];
|
|
241 }else
|
|
242 {
|
|
243 return $tmp[0]."-".$tmp[@tmp-1];
|
|
244 }
|
|
245 } |