annotate PGAP-1.2.1/Converter_draft.pl @ 9:2a9923216b4a draft

Uploaded
author dereeper
date Fri, 25 Jun 2021 20:41:52 +0000
parents 83e62a1aeeeb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2 use strict;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
3 use warnings;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
4 use Getopt::Std;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
5
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
6 my %opt;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
7 getopts('N:I:O:',\%opt);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
8
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
9 my @usage=qq(
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
10 Version: 2016042201
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
11 Usage: perl Converter_draft.pl [options]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
12
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
13 Options:
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
14
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
15 -N String Input the strain nickname
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
16 -I String Input file directory
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
17 -O String Output file directory
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
18 );
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
19
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
20 if (!scalar(keys %opt))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
21 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
22 print join("\n",@usage)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
23 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
24 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
25
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
26 my $prefix;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
27 if (exists($opt{"N"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
28 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
29 $prefix=$opt{"N"}
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
30 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
31 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
32 print "-N could not be empty!";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
33 print join("\n",@usage)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
34 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
35 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
36
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
37 my $output;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
38 if (exists($opt{"O"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
39 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
40 $output=$opt{"O"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
41 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
42 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
43 print "-O could not be empty!";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
44 print join("\n",@usage)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
45 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
46 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
47
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
48 my $input;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
49 if (exists($opt{"I"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
50 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
51 $input=$opt{"I"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
52 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
53 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
54 print "-I could not be empty!";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
55 print join("\n",@usage)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
56 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
57 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
58
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
59 my $sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
60 my $line;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
61 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
62 my @tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
63 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
64 my $flag;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
65 my $file;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
66 my $list;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
67 my @list;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
68 my $pttlost;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
69 my $gi;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
70
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
71 if ((-e $output) and ((-d $output)))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
72 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
73 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
74 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
75 mkdir($output);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
76 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
77
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
78 if ($input!~/\/$/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
79 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
80 $input=$input."/";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
81 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
82
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
83 if ($output!~/\/$/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
84 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
85 $output=$output."/";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
86 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
87
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
88 opendir(DIR,"$input") || die "The input directory ( $input ) is not exists!\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
89 @list=grep(/faa$/,readdir(DIR));
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
90 closedir(DIR);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
91 $_=join("\t",@list);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
92 s/.faa//g;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
93 @list=split(/\t/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
94
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
95 open(PEP,">$output$prefix.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
96 open(NUC,">$output$prefix.nuc");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
97 open(FUN,">$output$prefix.function");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
98
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
99 foreach $list (@list)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
100 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
101 %hash=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
102 if (!(-e $input.$list.".faa"))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
103 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
104 print $input.$list.".faa is not exists!\n$list.faa, $list.ffn and $list.ptt are skipped!\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
105 next;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
106 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
107
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
108 if (!(-e $input.$list.".ffn"))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
109 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
110 print $input.$list.".ffn is not exists!\n$list.faa, $list.ffn and $list.ptt are skipped!\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
111 next;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
112 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
113
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
114 if (!(-e $input.$list.".ptt"))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
115 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
116 open(F,$input.$list.".faa");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
117 @_=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
118 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
119 @_=grep(/^>/,@_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
120 if (scalar(@_)>1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
121 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
122 print $input.$list.".ptt is not exists!\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
123 print "There are more than 1 sequence in $list.faa and $list.ffn, So $list.faa, $list.ffn and $list.ptt are skipped!\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
124 next;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
125 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
126 $pttlost=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
127 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
128 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
129 $pttlost=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
130 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
131
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
132 $file=$input.$list.".faa";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
133 open(F,$file) or die "could not open $file";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
134 while ($line=<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
135 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
136 if ($line=~/^>/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
137 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
138 @row=split(/\|/,$line);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
139 print PEP ">$row[1]\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
140 if ($pttlost ==1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
141 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
142 $gi=$row[1];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
143 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
144 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
145 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
146 print PEP $line;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
147 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
148 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
149 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
150
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
151 if ($pttlost ==1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
152 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
153 print FUN "$gi\t-\thypothetical protein\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
154 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
155 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
156 $file=$input.$list.".ptt";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
157 open(F,"$file") or die "could not open $file";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
158 $_=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
159 $_=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
160 $_=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
161 while ($line=<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
162 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
163 chomp($line);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
164 @row=split(/\t/,$line);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
165 print FUN $row[3]."\t".$row[7]."\t".$row[8]."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
166 @tmp=split(/\.\./,$row[0]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
167 if ($row[1] eq "+")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
168 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
169 $hash{$tmp[0]."-".$tmp[@tmp-1]}=$row[3];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
170 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
171 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
172 $hash{"c".$tmp[@tmp-1]."-".$tmp[0]}=$row[3];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
173 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
174 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
175 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
176 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
177
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
178
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
179
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
180 $file=$input.$list.".ffn";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
181 open(F,"$file") or die "could not open $file";;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
182 while ($line=<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
183 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
184 if ($line=~/^>/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
185 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
186 if ($pttlost==1)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
187 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
188 print NUC ">$gi\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
189 $flag=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
190 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
191 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
192 my $key=&getKey($line);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
193 if (exists($hash{$key}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
194 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
195 $flag=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
196 print NUC ">$hash{$key}\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
197 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
198 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
199 $flag=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
200 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
201 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
202 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
203 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
204 if ($flag)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
205 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
206 print NUC $line;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
207 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
208 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
209 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
210 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
211 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
212
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
213 close(PEP);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
214 close(NUC);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
215 close(FUN);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
216
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
217 sub getKey()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
218 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
219 (my $line)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
220 my @tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
221 my $strand;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
222 chomp($line);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
223 @tmp=split(/ /,$line);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
224 @tmp=split(/\:/,$tmp[0]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
225
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
226 if($tmp[@tmp-1]=~/c/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
227 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
228 $strand="-";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
229 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
230 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
231 $strand="+";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
232 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
233 $_=$tmp[@tmp-1];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
234 s/c//g;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
235 s/ //g;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
236 @tmp=split(/\,|-/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
237 @tmp=sort{$a<=>$b} @tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
238 if($strand eq "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
239 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
240 return "c".$tmp[@tmp-1]."-".$tmp[0];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
241 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
242 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
243 return $tmp[0]."-".$tmp[@tmp-1];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
244 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
245 }