0
|
1 #!/usr/bin/perl
|
|
2
|
|
3 use strict;
|
|
4 use Getopt::Long;
|
|
5 use Bio::SeqIO;
|
|
6 use File::Basename;
|
|
7 my $dirname = dirname(__FILE__);
|
|
8 use Cwd qw(cwd);
|
|
9 my $dir = cwd;
|
|
10
|
|
11 my $usage = qq~Usage:$0 <args> [<opts>]
|
|
12
|
|
13 where <args> are:
|
|
14
|
|
15 -g, --genes <list of gene fasta files. Comma separated list>
|
|
16 -p, --proteins <list of protein fasta files. Comma separated list>
|
2
|
17 -l, --list <list of samples in the same order. Comma separated list>
|
0
|
18 -o, --out <output name>
|
|
19 ~;
|
|
20 $usage .= "\n";
|
|
21
|
|
22 my ($genes,$proteins,$out,$order);
|
|
23
|
|
24 GetOptions(
|
|
25 "genes=s" => \$genes,
|
|
26 "proteins=s" => \$proteins,
|
|
27 "out=s" => \$out,
|
2
|
28 "list=s" => \$order
|
0
|
29 );
|
|
30
|
|
31
|
|
32 die $usage
|
|
33 if ( !$proteins || !$genes || !$out || !$order);
|
|
34
|
5
|
35 if ($order =~/^,(.*)$/){
|
|
36 $order = $1;
|
|
37 }
|
0
|
38 my @names = split(",",$order);
|
|
39 mkdir("tmpdir$$");
|
|
40 my @gene_files = split(/,/,$genes);
|
|
41 my $n = 0;
|
|
42 foreach my $gene_file(@gene_files){
|
1
|
43 my $particule = $names[$n];
|
0
|
44 system("cp $gene_file tmpdir$$/$particule.nuc");
|
|
45 $n++;
|
|
46 }
|
|
47 $n = 0;
|
|
48 my @protein_files = split(/,/,$proteins);
|
|
49 foreach my $protein_file(@protein_files){
|
5
|
50 my $particule = $names[$n];
|
0
|
51 system("cp $protein_file tmpdir$$/$particule.pep");
|
|
52 open(F,"$protein_file");
|
|
53 open(F2,">tmpdir$$/$particule.function");
|
|
54 while(<F>){
|
|
55 if (/>(.*)/){
|
|
56 print F2 "$1 - unknown\n";
|
|
57 }
|
|
58 }
|
|
59 close(F);
|
|
60 close(F2);
|
|
61 $n++;
|
|
62 }
|
|
63
|
|
64 #chdir("$dirname/PGAP-1.2.1");
|
5
|
65 my $cmd = "perl $dirname/PGAP-1.2.1/PGAP.pl --input tmpdir$$ --output outdir --cluster --pangenome --variation --evolution --function --strains ".join("+",@names)." --method GF";
|
0
|
66 system($cmd);
|
|
67 system("cp -rf outdir/1.Orthologs_Cluster.txt $out");
|