comparison PGAP-1.2.1/Converter_draft.pl @ 0:83e62a1aeeeb draft

Uploaded
author dereeper
date Thu, 24 Jun 2021 13:51:52 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:83e62a1aeeeb
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4 use Getopt::Std;
5
6 my %opt;
7 getopts('N:I:O:',\%opt);
8
9 my @usage=qq(
10 Version: 2016042201
11 Usage: perl Converter_draft.pl [options]
12
13 Options:
14
15 -N String Input the strain nickname
16 -I String Input file directory
17 -O String Output file directory
18 );
19
20 if (!scalar(keys %opt))
21 {
22 print join("\n",@usage)."\n";
23 exit;
24 }
25
26 my $prefix;
27 if (exists($opt{"N"}))
28 {
29 $prefix=$opt{"N"}
30 }else
31 {
32 print "-N could not be empty!";
33 print join("\n",@usage)."\n";
34 exit;
35 }
36
37 my $output;
38 if (exists($opt{"O"}))
39 {
40 $output=$opt{"O"};
41 }else
42 {
43 print "-O could not be empty!";
44 print join("\n",@usage)."\n";
45 exit;
46 }
47
48 my $input;
49 if (exists($opt{"I"}))
50 {
51 $input=$opt{"I"};
52 }else
53 {
54 print "-I could not be empty!";
55 print join("\n",@usage)."\n";
56 exit;
57 }
58
59 my $sp;
60 my $line;
61 my @row;
62 my @tmp;
63 my %hash;
64 my $flag;
65 my $file;
66 my $list;
67 my @list;
68 my $pttlost;
69 my $gi;
70
71 if ((-e $output) and ((-d $output)))
72 {
73 }else
74 {
75 mkdir($output);
76 }
77
78 if ($input!~/\/$/)
79 {
80 $input=$input."/";
81 }
82
83 if ($output!~/\/$/)
84 {
85 $output=$output."/";
86 }
87
88 opendir(DIR,"$input") || die "The input directory ( $input ) is not exists!\n";
89 @list=grep(/faa$/,readdir(DIR));
90 closedir(DIR);
91 $_=join("\t",@list);
92 s/.faa//g;
93 @list=split(/\t/,$_);
94
95 open(PEP,">$output$prefix.pep");
96 open(NUC,">$output$prefix.nuc");
97 open(FUN,">$output$prefix.function");
98
99 foreach $list (@list)
100 {
101 %hash=();
102 if (!(-e $input.$list.".faa"))
103 {
104 print $input.$list.".faa is not exists!\n$list.faa, $list.ffn and $list.ptt are skipped!\n";
105 next;
106 }
107
108 if (!(-e $input.$list.".ffn"))
109 {
110 print $input.$list.".ffn is not exists!\n$list.faa, $list.ffn and $list.ptt are skipped!\n";
111 next;
112 }
113
114 if (!(-e $input.$list.".ptt"))
115 {
116 open(F,$input.$list.".faa");
117 @_=<F>;
118 close(F);
119 @_=grep(/^>/,@_);
120 if (scalar(@_)>1)
121 {
122 print $input.$list.".ptt is not exists!\n";
123 print "There are more than 1 sequence in $list.faa and $list.ffn, So $list.faa, $list.ffn and $list.ptt are skipped!\n";
124 next;
125 }
126 $pttlost=1;
127 }else
128 {
129 $pttlost=0;
130 }
131
132 $file=$input.$list.".faa";
133 open(F,$file) or die "could not open $file";
134 while ($line=<F>)
135 {
136 if ($line=~/^>/)
137 {
138 @row=split(/\|/,$line);
139 print PEP ">$row[1]\n";
140 if ($pttlost ==1)
141 {
142 $gi=$row[1];
143 }
144 }else
145 {
146 print PEP $line;
147 }
148 }
149 close(F);
150
151 if ($pttlost ==1)
152 {
153 print FUN "$gi\t-\thypothetical protein\n";
154 }else
155 {
156 $file=$input.$list.".ptt";
157 open(F,"$file") or die "could not open $file";
158 $_=<F>;
159 $_=<F>;
160 $_=<F>;
161 while ($line=<F>)
162 {
163 chomp($line);
164 @row=split(/\t/,$line);
165 print FUN $row[3]."\t".$row[7]."\t".$row[8]."\n";
166 @tmp=split(/\.\./,$row[0]);
167 if ($row[1] eq "+")
168 {
169 $hash{$tmp[0]."-".$tmp[@tmp-1]}=$row[3];
170 }else
171 {
172 $hash{"c".$tmp[@tmp-1]."-".$tmp[0]}=$row[3];
173 }
174 }
175 close(F);
176 }
177
178
179
180 $file=$input.$list.".ffn";
181 open(F,"$file") or die "could not open $file";;
182 while ($line=<F>)
183 {
184 if ($line=~/^>/)
185 {
186 if ($pttlost==1)
187 {
188 print NUC ">$gi\n";
189 $flag=1;
190 }else
191 {
192 my $key=&getKey($line);
193 if (exists($hash{$key}))
194 {
195 $flag=1;
196 print NUC ">$hash{$key}\n";
197 }else
198 {
199 $flag=0;
200 }
201 }
202 }else
203 {
204 if ($flag)
205 {
206 print NUC $line;
207 }
208 }
209 }
210 close(F);
211 }
212
213 close(PEP);
214 close(NUC);
215 close(FUN);
216
217 sub getKey()
218 {
219 (my $line)=@_;
220 my @tmp;
221 my $strand;
222 chomp($line);
223 @tmp=split(/ /,$line);
224 @tmp=split(/\:/,$tmp[0]);
225
226 if($tmp[@tmp-1]=~/c/)
227 {
228 $strand="-";
229 }else
230 {
231 $strand="+";
232 }
233 $_=$tmp[@tmp-1];
234 s/c//g;
235 s/ //g;
236 @tmp=split(/\,|-/,$_);
237 @tmp=sort{$a<=>$b} @tmp;
238 if($strand eq "-")
239 {
240 return "c".$tmp[@tmp-1]."-".$tmp[0];
241 }else
242 {
243 return $tmp[0]."-".$tmp[@tmp-1];
244 }
245 }