| 
3
 | 
     1 #!/usr/bin/perl
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 use strict;
 | 
| 
 | 
     4 
 | 
| 
 | 
     5 my $indir = $ARGV[0];
 | 
| 
 | 
     6 my $matrix = $ARGV[1];
 | 
| 
 | 
     7 my $out = $ARGV[2];
 | 
| 
 | 
     8 my $strain_names = $ARGV[3];
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 my %strains_of_gb;
 | 
| 
 | 
    11 open(F,$strain_names);
 | 
| 
 | 
    12 while(<F>){
 | 
| 
 | 
    13 	my $line = $_;
 | 
| 
 | 
    14 	$line =~s/\n//g;$line =~s/\r//g;
 | 
| 
 | 
    15 	my ($gb,$strain) = split(/\t/,$line);
 | 
| 
 | 
    16 	$strains_of_gb{$gb} = $strain;
 | 
| 
 | 
    17 }
 | 
| 
 | 
    18 close(F);
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 
 | 
| 
 | 
    21 my %corr;
 | 
| 
 | 
    22 open(D,"ls $indir/*rmdup.gff |");
 | 
| 
 | 
    23 while(<D>){
 | 
| 
 | 
    24 	my $file = $_;
 | 
| 
 | 
    25 	open(F,"$file");
 | 
| 
 | 
    26         while(<F>){
 | 
| 
 | 
    27                 my @infos = split(/\t/,$_);
 | 
| 
 | 
    28                 if ($infos[2] eq 'CDS' && /ID=([^;]*);.*protein_id=([^;]*);/){
 | 
| 
 | 
    29                         my $id = $1;
 | 
| 
 | 
    30                         my $protid = $2;
 | 
| 
 | 
    31                         $corr{$id} = $protid;
 | 
| 
 | 
    32                 }
 | 
| 
 | 
    33         }
 | 
| 
 | 
    34         close(F);
 | 
| 
 | 
    35 }
 | 
| 
 | 
    36 close(D);
 | 
| 
 | 
    37 
 | 
| 
 | 
    38 my $cl_num = 0;
 | 
| 
 | 
    39 my $nb_strains = 1;
 | 
| 
 | 
    40 open(O,">$out");
 | 
| 
 | 
    41 open(F,$matrix);
 | 
| 
 | 
    42 my $firstline = <F>;
 | 
| 
 | 
    43 $firstline =~s/\n//g;$firstline =~s/\r//g;
 | 
| 
 | 
    44 my @infos = split(/\t/,$firstline);
 | 
| 
 | 
    45 print O "ClutserID";
 | 
| 
 | 
    46 print U "ClutserID";
 | 
| 
 | 
    47 print M "Gene";
 | 
| 
 | 
    48 for (my $j=1; $j <= $#infos; $j++){
 | 
| 
 | 
    49         my $gbfile = $infos[$j];
 | 
| 
 | 
    50         $gbfile =~s/\"//g;
 | 
| 
 | 
    51         $gbfile =~s/\.gb\.filt//g;
 | 
| 
 | 
    52 	$gbfile =~s/\.gb\.rmdup//g;
 | 
| 
 | 
    53 	
 | 
| 
 | 
    54         my $strain = $strains_of_gb{$gbfile};
 | 
| 
 | 
    55         print O "\t".$strain;
 | 
| 
 | 
    56         print U "\t".$strain;
 | 
| 
 | 
    57         print M "\t".$strain;
 | 
| 
 | 
    58         $nb_strains++;
 | 
| 
 | 
    59 }
 | 
| 
 | 
    60 print O "\n";
 | 
| 
 | 
    61 while(<F>){
 | 
| 
 | 
    62 	print O $_;
 | 
| 
 | 
    63 }
 | 
| 
 | 
    64 close(F);
 | 
| 
 | 
    65 close(O);
 |