Mercurial > repos > dereeper > pgap
diff PGAP-1.2.1/Converter_draft.pl @ 0:83e62a1aeeeb draft
Uploaded
author | dereeper |
---|---|
date | Thu, 24 Jun 2021 13:51:52 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PGAP-1.2.1/Converter_draft.pl Thu Jun 24 13:51:52 2021 +0000 @@ -0,0 +1,245 @@ +#!/usr/bin/perl +use strict; +use warnings; +use Getopt::Std; + +my %opt; +getopts('N:I:O:',\%opt); + +my @usage=qq( +Version: 2016042201 +Usage: perl Converter_draft.pl [options] + +Options: + + -N String Input the strain nickname + -I String Input file directory + -O String Output file directory +); + +if (!scalar(keys %opt)) + { + print join("\n",@usage)."\n"; + exit; + } + +my $prefix; +if (exists($opt{"N"})) + { + $prefix=$opt{"N"} + }else + { + print "-N could not be empty!"; + print join("\n",@usage)."\n"; + exit; + } + +my $output; +if (exists($opt{"O"})) + { + $output=$opt{"O"}; + }else + { + print "-O could not be empty!"; + print join("\n",@usage)."\n"; + exit; + } + +my $input; +if (exists($opt{"I"})) + { + $input=$opt{"I"}; + }else + { + print "-I could not be empty!"; + print join("\n",@usage)."\n"; + exit; + } + +my $sp; +my $line; +my @row; +my @tmp; +my %hash; +my $flag; +my $file; +my $list; +my @list; +my $pttlost; +my $gi; + +if ((-e $output) and ((-d $output))) + { + }else + { + mkdir($output); + } + +if ($input!~/\/$/) + { + $input=$input."/"; + } + +if ($output!~/\/$/) + { + $output=$output."/"; + } + +opendir(DIR,"$input") || die "The input directory ( $input ) is not exists!\n"; +@list=grep(/faa$/,readdir(DIR)); +closedir(DIR); +$_=join("\t",@list); +s/.faa//g; +@list=split(/\t/,$_); + +open(PEP,">$output$prefix.pep"); +open(NUC,">$output$prefix.nuc"); +open(FUN,">$output$prefix.function"); + +foreach $list (@list) + { + %hash=(); + if (!(-e $input.$list.".faa")) + { + print $input.$list.".faa is not exists!\n$list.faa, $list.ffn and $list.ptt are skipped!\n"; + next; + } + + if (!(-e $input.$list.".ffn")) + { + print $input.$list.".ffn is not exists!\n$list.faa, $list.ffn and $list.ptt are skipped!\n"; + next; + } + + if (!(-e $input.$list.".ptt")) + { + open(F,$input.$list.".faa"); + @_=<F>; + close(F); + @_=grep(/^>/,@_); + if (scalar(@_)>1) + { + print $input.$list.".ptt is not exists!\n"; + print "There are more than 1 sequence in $list.faa and $list.ffn, So $list.faa, $list.ffn and $list.ptt are skipped!\n"; + next; + } + $pttlost=1; + }else + { + $pttlost=0; + } + + $file=$input.$list.".faa"; + open(F,$file) or die "could not open $file"; + while ($line=<F>) + { + if ($line=~/^>/) + { + @row=split(/\|/,$line); + print PEP ">$row[1]\n"; + if ($pttlost ==1) + { + $gi=$row[1]; + } + }else + { + print PEP $line; + } + } + close(F); + + if ($pttlost ==1) + { + print FUN "$gi\t-\thypothetical protein\n"; + }else + { + $file=$input.$list.".ptt"; + open(F,"$file") or die "could not open $file"; + $_=<F>; + $_=<F>; + $_=<F>; + while ($line=<F>) + { + chomp($line); + @row=split(/\t/,$line); + print FUN $row[3]."\t".$row[7]."\t".$row[8]."\n"; + @tmp=split(/\.\./,$row[0]); + if ($row[1] eq "+") + { + $hash{$tmp[0]."-".$tmp[@tmp-1]}=$row[3]; + }else + { + $hash{"c".$tmp[@tmp-1]."-".$tmp[0]}=$row[3]; + } + } + close(F); + } + + + + $file=$input.$list.".ffn"; + open(F,"$file") or die "could not open $file";; + while ($line=<F>) + { + if ($line=~/^>/) + { + if ($pttlost==1) + { + print NUC ">$gi\n"; + $flag=1; + }else + { + my $key=&getKey($line); + if (exists($hash{$key})) + { + $flag=1; + print NUC ">$hash{$key}\n"; + }else + { + $flag=0; + } + } + }else + { + if ($flag) + { + print NUC $line; + } + } + } + close(F); + } + +close(PEP); +close(NUC); +close(FUN); + +sub getKey() +{ + (my $line)=@_; + my @tmp; + my $strand; + chomp($line); + @tmp=split(/ /,$line); + @tmp=split(/\:/,$tmp[0]); + + if($tmp[@tmp-1]=~/c/) + { + $strand="-"; + }else + { + $strand="+"; + } + $_=$tmp[@tmp-1]; + s/c//g; + s/ //g; + @tmp=split(/\,|-/,$_); + @tmp=sort{$a<=>$b} @tmp; + if($strand eq "-") + { + return "c".$tmp[@tmp-1]."-".$tmp[0]; + }else + { + return $tmp[0]."-".$tmp[@tmp-1]; + } +} \ No newline at end of file