| 
3
 | 
     1 #!/usr/bin/perl
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 use strict;
 | 
| 
 | 
     4 
 | 
| 
 | 
     5 my $indir = $ARGV[0];
 | 
| 
 | 
     6 my $matrix = $ARGV[1];
 | 
| 
 | 
     7 my $out = $ARGV[2];
 | 
| 
 | 
     8 my $strain_names = $ARGV[3];
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 my %strains_of_gb;
 | 
| 
 | 
    11 open(F,$strain_names);
 | 
| 
 | 
    12 while(<F>){
 | 
| 
 | 
    13 	my $line = $_;
 | 
| 
 | 
    14 	$line =~s/\n//g;$line =~s/\r//g;
 | 
| 
 | 
    15 	my ($gb,$strain) = split(/\t/,$line);
 | 
| 
 | 
    16 	$strains_of_gb{$gb} = $strain;
 | 
| 
 | 
    17 }
 | 
| 
 | 
    18 close(F);
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 
 | 
| 
 | 
    21 
 | 
| 
 | 
    22 my $cl_num = 0;
 | 
| 
 | 
    23 my $nb_strains = 1;
 | 
| 
 | 
    24 open(O,">$out");
 | 
| 
 | 
    25 open(U,">$out.upsetr.txt");
 | 
| 
 | 
    26 open(M,">$out.accessory_01matrix.txt");
 | 
| 
 | 
    27 open(F,$matrix);
 | 
| 
 | 
    28 my $firstline = <F>;
 | 
| 
 | 
    29 $firstline =~s/\n//g;$firstline =~s/\r//g;
 | 
| 
 | 
    30 my @infos = split(/\t/,$firstline);
 | 
| 
 | 
    31 print O "ClutserID";
 | 
| 
 | 
    32 print U "ClutserID";
 | 
| 
 | 
    33 print M "Gene";
 | 
| 
 | 
    34 for (my $j=1; $j <= $#infos; $j++){
 | 
| 
 | 
    35         my $gbfile = $infos[$j];
 | 
| 
 | 
    36         $gbfile =~s/\"//g;
 | 
| 
 | 
    37         $gbfile =~s/\.gb\.filt//g;
 | 
| 
 | 
    38 	$gbfile =~s/\.gb\.rmdup//g;
 | 
| 
 | 
    39         my $strain = $strains_of_gb{$gbfile};
 | 
| 
 | 
    40         print O "\t".$strain;
 | 
| 
 | 
    41         print U "\t".$strain;
 | 
| 
 | 
    42         print M "\t".$strain;
 | 
| 
 | 
    43         $nb_strains++;
 | 
| 
 | 
    44 }
 | 
| 
 | 
    45 print O "\n";
 | 
| 
 | 
    46 print U "\n";
 | 
| 
 | 
    47 print M "\n";
 | 
| 
 | 
    48 while(<F>){
 | 
| 
 | 
    49         $cl_num++;
 | 
| 
 | 
    50         my @infos = split(/\t/,$_);
 | 
| 
 | 
    51         print O $cl_num;
 | 
| 
 | 
    52         print U $cl_num;
 | 
| 
 | 
    53         my $concat_accessory = "";
 | 
| 
 | 
    54         for (my $i = 1; $i <= $#infos; $i++){
 | 
| 
 | 
    55                 my $val = $infos[$i];
 | 
| 
 | 
    56                 $val =~s/\"//g;
 | 
| 
 | 
    57 		$val =~s/\n//g;$val =~s/\r//g;
 | 
| 
 | 
    58                 if ($val =~/\w+/){
 | 
| 
 | 
    59                         print U "\t1";
 | 
| 
 | 
    60                         $concat_accessory .= "\t1";
 | 
| 
 | 
    61                 }
 | 
| 
 | 
    62                 else{
 | 
| 
 | 
    63                         print U "\t0";
 | 
| 
 | 
    64                         $concat_accessory .= "\t0";
 | 
| 
 | 
    65                 }
 | 
| 
 | 
    66                 my @genes = split(/, /,$val);
 | 
| 
 | 
    67                 my $concat = "";
 | 
| 
 | 
    68                 foreach my $gene(@genes){
 | 
| 
 | 
    69                         my $prot_id = $gene;
 | 
| 
 | 
    70                         $concat .= "$prot_id,"
 | 
| 
 | 
    71                 }
 | 
| 
 | 
    72                 chop($concat);
 | 
| 
 | 
    73                 if (scalar @genes == 0){
 | 
| 
 | 
    74                         $concat = "-";
 | 
| 
 | 
    75                 }
 | 
| 
 | 
    76                 print O "\t".$concat;
 | 
| 
 | 
    77         }
 | 
| 
 | 
    78         if ($concat_accessory =~/0/){
 | 
| 
 | 
    79                 print M $cl_num.$concat_accessory."\n";
 | 
| 
 | 
    80         }
 | 
| 
 | 
    81         print O "\n";
 | 
| 
 | 
    82         print U "\n";
 | 
| 
 | 
    83 }
 | 
| 
 | 
    84 close(F);
 | 
| 
 | 
    85 close(O);
 | 
| 
 | 
    86 close(U);
 | 
| 
 | 
    87 close(M);
 |