view PGAP-1.2.1/Converter_draft.pl @ 0:83e62a1aeeeb draft

Uploaded
author dereeper
date Thu, 24 Jun 2021 13:51:52 +0000
parents
children
line wrap: on
line source

#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Std;

my %opt;
getopts('N:I:O:',\%opt);

my @usage=qq(
Version: 2016042201
Usage:   perl Converter_draft.pl [options]

Options: 

  -N String    Input the strain nickname
  -I String    Input file directory
  -O String    Output file directory
);

if (!scalar(keys %opt)) 
	{
		print join("\n",@usage)."\n";
		exit;
	}

my $prefix;
if (exists($opt{"N"})) 
	{
		$prefix=$opt{"N"}
	}else
		{
			print "-N could not be empty!";
			print join("\n",@usage)."\n";
			exit;
		}

my $output;
if (exists($opt{"O"})) 
	{
		$output=$opt{"O"};
	}else
		{
			print "-O could not be empty!";
			print join("\n",@usage)."\n";
			exit;
		}

my $input;
if (exists($opt{"I"})) 
	{
		$input=$opt{"I"};
	}else
		{
			print "-I could not be empty!";
			print join("\n",@usage)."\n";
			exit;
		}

my $sp;
my $line;
my @row;
my @tmp;
my %hash;
my $flag;
my $file;
my $list;
my @list;
my $pttlost;
my $gi;

if ((-e $output) and ((-d $output))) 
	{
	}else
	{
		mkdir($output);
	}

if ($input!~/\/$/) 
	{
		$input=$input."/";
	}

if ($output!~/\/$/) 
	{
		$output=$output."/";
	}

opendir(DIR,"$input") || die "The input directory ( $input ) is not exists!\n";
@list=grep(/faa$/,readdir(DIR));
closedir(DIR);
$_=join("\t",@list);
s/.faa//g;
@list=split(/\t/,$_);

open(PEP,">$output$prefix.pep");
open(NUC,">$output$prefix.nuc");
open(FUN,">$output$prefix.function");

foreach $list (@list) 
	{
		%hash=();
		if (!(-e $input.$list.".faa")) 
			{
				print $input.$list.".faa is not exists!\n$list.faa, $list.ffn and $list.ptt are skipped!\n";
				next;
			}

		if (!(-e $input.$list.".ffn")) 
			{
				print $input.$list.".ffn is not exists!\n$list.faa, $list.ffn and $list.ptt are skipped!\n";
				next;
			}

		if (!(-e $input.$list.".ptt")) 
			{
				open(F,$input.$list.".faa");
				@_=<F>;
				close(F);
				@_=grep(/^>/,@_);
				if (scalar(@_)>1) 
					{
					print $input.$list.".ptt is not exists!\n";
					print "There are more than 1 sequence in $list.faa and $list.ffn, So $list.faa, $list.ffn and $list.ptt are skipped!\n";
					next;
					}
				$pttlost=1;
			}else
				{
					$pttlost=0;
				}

		$file=$input.$list.".faa";
		open(F,$file) or die "could not open $file";
		while ($line=<F>) 
			{
				if ($line=~/^>/) 
					{
						@row=split(/\|/,$line);
						print PEP ">$row[1]\n";
						if ($pttlost ==1) 
							{
								$gi=$row[1];
							}
					}else
						{
							print PEP $line;
						}
			}
		close(F);

		if ($pttlost ==1) 
			{
				print FUN "$gi\t-\thypothetical protein\n";
			}else
				{
				$file=$input.$list.".ptt";
				open(F,"$file") or die "could not open $file";
				$_=<F>;
				$_=<F>;
				$_=<F>;
				while ($line=<F>) 
					{
						chomp($line);
						@row=split(/\t/,$line);
						print FUN $row[3]."\t".$row[7]."\t".$row[8]."\n";
						@tmp=split(/\.\./,$row[0]);
						if ($row[1] eq "+") 
							{
								$hash{$tmp[0]."-".$tmp[@tmp-1]}=$row[3];
							}else
								{
									$hash{"c".$tmp[@tmp-1]."-".$tmp[0]}=$row[3];
								}
					}
				close(F);
				}



		$file=$input.$list.".ffn";
		open(F,"$file") or die "could not open $file";;
		while ($line=<F>) 
			{
				if ($line=~/^>/) 
					{
						if ($pttlost==1) 
							{
								print NUC ">$gi\n";
								$flag=1;
							}else
								{
									my $key=&getKey($line);
									if (exists($hash{$key})) 
										{
											$flag=1;
											print NUC ">$hash{$key}\n";
										}else
											{
												$flag=0;
											}
								}
					}else
						{
							if ($flag) 
								{
									print NUC $line;
								}
						}
			}
		close(F);
	}

close(PEP);
close(NUC);
close(FUN);

sub getKey()
{
	(my $line)=@_;
	my @tmp;
	my $strand;
	chomp($line);
	@tmp=split(/ /,$line);
	@tmp=split(/\:/,$tmp[0]);

	if($tmp[@tmp-1]=~/c/)
	{
		$strand="-";
	}else
	{
		$strand="+";
	}
	$_=$tmp[@tmp-1];
	s/c//g;
	s/ //g;
	@tmp=split(/\,|-/,$_);
	@tmp=sort{$a<=>$b} @tmp;
	if($strand eq "-")
	{
		return "c".$tmp[@tmp-1]."-".$tmp[0];
	}else
	{
		return $tmp[0]."-".$tmp[@tmp-1];
	}
}