Mercurial > repos > dereeper > pgap
comparison PGAP-1.2.1/Converter_draft.pl @ 0:83e62a1aeeeb draft
Uploaded
author | dereeper |
---|---|
date | Thu, 24 Jun 2021 13:51:52 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:83e62a1aeeeb |
---|---|
1 #!/usr/bin/perl | |
2 use strict; | |
3 use warnings; | |
4 use Getopt::Std; | |
5 | |
6 my %opt; | |
7 getopts('N:I:O:',\%opt); | |
8 | |
9 my @usage=qq( | |
10 Version: 2016042201 | |
11 Usage: perl Converter_draft.pl [options] | |
12 | |
13 Options: | |
14 | |
15 -N String Input the strain nickname | |
16 -I String Input file directory | |
17 -O String Output file directory | |
18 ); | |
19 | |
20 if (!scalar(keys %opt)) | |
21 { | |
22 print join("\n",@usage)."\n"; | |
23 exit; | |
24 } | |
25 | |
26 my $prefix; | |
27 if (exists($opt{"N"})) | |
28 { | |
29 $prefix=$opt{"N"} | |
30 }else | |
31 { | |
32 print "-N could not be empty!"; | |
33 print join("\n",@usage)."\n"; | |
34 exit; | |
35 } | |
36 | |
37 my $output; | |
38 if (exists($opt{"O"})) | |
39 { | |
40 $output=$opt{"O"}; | |
41 }else | |
42 { | |
43 print "-O could not be empty!"; | |
44 print join("\n",@usage)."\n"; | |
45 exit; | |
46 } | |
47 | |
48 my $input; | |
49 if (exists($opt{"I"})) | |
50 { | |
51 $input=$opt{"I"}; | |
52 }else | |
53 { | |
54 print "-I could not be empty!"; | |
55 print join("\n",@usage)."\n"; | |
56 exit; | |
57 } | |
58 | |
59 my $sp; | |
60 my $line; | |
61 my @row; | |
62 my @tmp; | |
63 my %hash; | |
64 my $flag; | |
65 my $file; | |
66 my $list; | |
67 my @list; | |
68 my $pttlost; | |
69 my $gi; | |
70 | |
71 if ((-e $output) and ((-d $output))) | |
72 { | |
73 }else | |
74 { | |
75 mkdir($output); | |
76 } | |
77 | |
78 if ($input!~/\/$/) | |
79 { | |
80 $input=$input."/"; | |
81 } | |
82 | |
83 if ($output!~/\/$/) | |
84 { | |
85 $output=$output."/"; | |
86 } | |
87 | |
88 opendir(DIR,"$input") || die "The input directory ( $input ) is not exists!\n"; | |
89 @list=grep(/faa$/,readdir(DIR)); | |
90 closedir(DIR); | |
91 $_=join("\t",@list); | |
92 s/.faa//g; | |
93 @list=split(/\t/,$_); | |
94 | |
95 open(PEP,">$output$prefix.pep"); | |
96 open(NUC,">$output$prefix.nuc"); | |
97 open(FUN,">$output$prefix.function"); | |
98 | |
99 foreach $list (@list) | |
100 { | |
101 %hash=(); | |
102 if (!(-e $input.$list.".faa")) | |
103 { | |
104 print $input.$list.".faa is not exists!\n$list.faa, $list.ffn and $list.ptt are skipped!\n"; | |
105 next; | |
106 } | |
107 | |
108 if (!(-e $input.$list.".ffn")) | |
109 { | |
110 print $input.$list.".ffn is not exists!\n$list.faa, $list.ffn and $list.ptt are skipped!\n"; | |
111 next; | |
112 } | |
113 | |
114 if (!(-e $input.$list.".ptt")) | |
115 { | |
116 open(F,$input.$list.".faa"); | |
117 @_=<F>; | |
118 close(F); | |
119 @_=grep(/^>/,@_); | |
120 if (scalar(@_)>1) | |
121 { | |
122 print $input.$list.".ptt is not exists!\n"; | |
123 print "There are more than 1 sequence in $list.faa and $list.ffn, So $list.faa, $list.ffn and $list.ptt are skipped!\n"; | |
124 next; | |
125 } | |
126 $pttlost=1; | |
127 }else | |
128 { | |
129 $pttlost=0; | |
130 } | |
131 | |
132 $file=$input.$list.".faa"; | |
133 open(F,$file) or die "could not open $file"; | |
134 while ($line=<F>) | |
135 { | |
136 if ($line=~/^>/) | |
137 { | |
138 @row=split(/\|/,$line); | |
139 print PEP ">$row[1]\n"; | |
140 if ($pttlost ==1) | |
141 { | |
142 $gi=$row[1]; | |
143 } | |
144 }else | |
145 { | |
146 print PEP $line; | |
147 } | |
148 } | |
149 close(F); | |
150 | |
151 if ($pttlost ==1) | |
152 { | |
153 print FUN "$gi\t-\thypothetical protein\n"; | |
154 }else | |
155 { | |
156 $file=$input.$list.".ptt"; | |
157 open(F,"$file") or die "could not open $file"; | |
158 $_=<F>; | |
159 $_=<F>; | |
160 $_=<F>; | |
161 while ($line=<F>) | |
162 { | |
163 chomp($line); | |
164 @row=split(/\t/,$line); | |
165 print FUN $row[3]."\t".$row[7]."\t".$row[8]."\n"; | |
166 @tmp=split(/\.\./,$row[0]); | |
167 if ($row[1] eq "+") | |
168 { | |
169 $hash{$tmp[0]."-".$tmp[@tmp-1]}=$row[3]; | |
170 }else | |
171 { | |
172 $hash{"c".$tmp[@tmp-1]."-".$tmp[0]}=$row[3]; | |
173 } | |
174 } | |
175 close(F); | |
176 } | |
177 | |
178 | |
179 | |
180 $file=$input.$list.".ffn"; | |
181 open(F,"$file") or die "could not open $file";; | |
182 while ($line=<F>) | |
183 { | |
184 if ($line=~/^>/) | |
185 { | |
186 if ($pttlost==1) | |
187 { | |
188 print NUC ">$gi\n"; | |
189 $flag=1; | |
190 }else | |
191 { | |
192 my $key=&getKey($line); | |
193 if (exists($hash{$key})) | |
194 { | |
195 $flag=1; | |
196 print NUC ">$hash{$key}\n"; | |
197 }else | |
198 { | |
199 $flag=0; | |
200 } | |
201 } | |
202 }else | |
203 { | |
204 if ($flag) | |
205 { | |
206 print NUC $line; | |
207 } | |
208 } | |
209 } | |
210 close(F); | |
211 } | |
212 | |
213 close(PEP); | |
214 close(NUC); | |
215 close(FUN); | |
216 | |
217 sub getKey() | |
218 { | |
219 (my $line)=@_; | |
220 my @tmp; | |
221 my $strand; | |
222 chomp($line); | |
223 @tmp=split(/ /,$line); | |
224 @tmp=split(/\:/,$tmp[0]); | |
225 | |
226 if($tmp[@tmp-1]=~/c/) | |
227 { | |
228 $strand="-"; | |
229 }else | |
230 { | |
231 $strand="+"; | |
232 } | |
233 $_=$tmp[@tmp-1]; | |
234 s/c//g; | |
235 s/ //g; | |
236 @tmp=split(/\,|-/,$_); | |
237 @tmp=sort{$a<=>$b} @tmp; | |
238 if($strand eq "-") | |
239 { | |
240 return "c".$tmp[@tmp-1]."-".$tmp[0]; | |
241 }else | |
242 { | |
243 return $tmp[0]."-".$tmp[@tmp-1]; | |
244 } | |
245 } |