| 
3
 | 
     1 #!/usr/bin/perl
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 use strict;
 | 
| 
 | 
     4 
 | 
| 
 | 
     5 my $indir = $ARGV[0];
 | 
| 
 | 
     6 my $matrix = $ARGV[1];
 | 
| 
 | 
     7 my $out = $ARGV[2];
 | 
| 
 | 
     8 my $strain_names = $ARGV[3];
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 my %strains_of_gb;
 | 
| 
 | 
    11 open(F,$strain_names);
 | 
| 
 | 
    12 while(<F>){
 | 
| 
 | 
    13 	my $line = $_;
 | 
| 
 | 
    14 	$line =~s/\n//g;$line =~s/\r//g;
 | 
| 
 | 
    15 	my ($gb,$strain) = split(/\t/,$line);
 | 
| 
 | 
    16 	$strains_of_gb{$gb} = $strain;
 | 
| 
 | 
    17 }
 | 
| 
 | 
    18 close(F);
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 
 | 
| 
 | 
    21 my %corr;
 | 
| 
 | 
    22 open(D,"ls $indir/*rmdup.gff |");
 | 
| 
 | 
    23 while(<D>){
 | 
| 
 | 
    24 	my $file = $_;
 | 
| 
 | 
    25 	open(F,"$file");
 | 
| 
 | 
    26         while(<F>){
 | 
| 
 | 
    27                 my @infos = split(/\t/,$_);
 | 
| 
 | 
    28                 if ($infos[2] eq 'CDS' && /ID=([^;]*);.*protein_id=([^;]*);/){
 | 
| 
 | 
    29                         my $id = $1;
 | 
| 
 | 
    30                         my $protid = $2;
 | 
| 
 | 
    31                         $corr{$id} = $protid;
 | 
| 
 | 
    32                 }
 | 
| 
 | 
    33         }
 | 
| 
 | 
    34         close(F);
 | 
| 
 | 
    35 }
 | 
| 
 | 
    36 close(D);
 | 
| 
 | 
    37 
 | 
| 
 | 
    38 my $cl_num = 0;
 | 
| 
 | 
    39 my $nb_strains = 1;
 | 
| 
 | 
    40 open(O,">$out");
 | 
| 
 | 
    41 open(U,">$out.upsetr.txt");
 | 
| 
 | 
    42 open(M,">$out.accessory_01matrix.txt");
 | 
| 
 | 
    43 open(F,$matrix);
 | 
| 
 | 
    44 my $firstline = <F>;
 | 
| 
 | 
    45 $firstline =~s/\n//g;$firstline =~s/\r//g;
 | 
| 
 | 
    46 my @infos = split(/,/,$firstline);
 | 
| 
 | 
    47 print O "ClutserID";
 | 
| 
 | 
    48 print U "ClutserID";
 | 
| 
 | 
    49 print M "Gene";
 | 
| 
 | 
    50 for (my $j=14; $j <= $#infos; $j++){
 | 
| 
 | 
    51         my $gbfile = $infos[$j];
 | 
| 
 | 
    52         $gbfile =~s/\"//g;
 | 
| 
 | 
    53         $gbfile =~s/\.gb\.filt//g;
 | 
| 
 | 
    54 	$gbfile =~s/\.gb\.rmdup//g;
 | 
| 
 | 
    55 	
 | 
| 
 | 
    56         my $strain = $strains_of_gb{$gbfile};
 | 
| 
 | 
    57         print O "\t".$strain;
 | 
| 
 | 
    58         print U "\t".$strain;
 | 
| 
 | 
    59         print M "\t".$strain;
 | 
| 
 | 
    60         $nb_strains++;
 | 
| 
 | 
    61 }
 | 
| 
 | 
    62 print O "\n";
 | 
| 
 | 
    63 print U "\n";
 | 
| 
 | 
    64 print M "\n";
 | 
| 
 | 
    65 while(<F>){
 | 
| 
 | 
    66         $cl_num++;
 | 
| 
 | 
    67         my $line = $_;
 | 
| 
 | 
    68         $line =~s/\n//g;$line =~s/\r//g;
 | 
| 
 | 
    69         my @infos = split(/,/,$line);
 | 
| 
 | 
    70         print O $cl_num;
 | 
| 
 | 
    71         print U $cl_num;
 | 
| 
 | 
    72         my $concat_accessory = "";
 | 
| 
 | 
    73         for (my $i = 14; $i <= $#infos; $i++){
 | 
| 
 | 
    74                 my $val = $infos[$i];
 | 
| 
 | 
    75                 $val =~s/\"//g;
 | 
| 
 | 
    76                 if ($val =~/\w+/){
 | 
| 
 | 
    77                         print U "\t1";
 | 
| 
 | 
    78                         $concat_accessory .= "\t1";
 | 
| 
 | 
    79                 }
 | 
| 
 | 
    80                 else{
 | 
| 
 | 
    81                         print U "\t0";
 | 
| 
 | 
    82                         $concat_accessory .= "\t0";
 | 
| 
 | 
    83                 }
 | 
| 
 | 
    84                 my @genes = split(/;/,$val);
 | 
| 
 | 
    85                 my $concat = "";
 | 
| 
 | 
    86                 foreach my $gene(@genes){
 | 
| 
 | 
    87                         my $prot_id = $corr{$gene};
 | 
| 
 | 
    88 			if (!$prot_id){$prot_id = $gene;}
 | 
| 
 | 
    89                         $concat .= "$prot_id,"
 | 
| 
 | 
    90                 }
 | 
| 
 | 
    91                 chop($concat);
 | 
| 
 | 
    92                 if (scalar @genes == 0){
 | 
| 
 | 
    93                         $concat = "-";
 | 
| 
 | 
    94                 }
 | 
| 
 | 
    95                 print O "\t".$concat;
 | 
| 
 | 
    96         }
 | 
| 
 | 
    97         if ($concat_accessory =~/0/){
 | 
| 
 | 
    98                 print M $cl_num.$concat_accessory."\n";
 | 
| 
 | 
    99         }
 | 
| 
 | 
   100         print O "\n";
 | 
| 
 | 
   101         print U "\n";
 | 
| 
 | 
   102 }
 | 
| 
 | 
   103 close(F);
 | 
| 
 | 
   104 close(O);
 | 
| 
 | 
   105 close(U);
 | 
| 
 | 
   106 close(M);
 |