annotate sm_Tax4Fun.pl @ 11:702db6efce51 draft

Uploaded
author sarahinraauzeville
date Thu, 21 Dec 2017 09:09:22 -0500
parents bd5ba02b1f71
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
1 #Declaration de l espace de travail et du chemin de l executable R :
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
2 use strict;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
3 use Carp;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
4 use Statistics::R;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
5 use Cwd qw(abs_path);
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
6 use File::Basename;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
7 use Getopt::Long;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
8
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
9
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
10 #Variables Globales ###########################
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
11 defined $ENV{'MY_GALAXY_DIR'} || die "MY_GALAXY_DIR environment variable not defined";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
12 use lib "$ENV{'MY_GALAXY_DIR'}";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
13 use GalaxyPath;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
14
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
15 my ($r_bin,$log_dir,$R,$cmd,$line, $out, $input, $reference);
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
16
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
17
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
18 #test:
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
19 #perl sm_Tax4Fun.pl --input DATA/testtax4fun---ssu---fingerprint----Total---sim_93---tax_silva---td_20.csv --out DATA/Tax4FunProfile.txt
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
20
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
21
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
22 #Parametres d'entree
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
23 Getopt::Long::Configure( 'no_ignorecase', 'bundling' );
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
24 GetOptions (
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
25 'out=s' => \$out,
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
26 'input=s' => \$input,
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
27 'reference=s' => \$reference
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
28 ) or die "Usage: Error in command line arguments\n";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
29
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
30 # SETTING GLOBAL VARIABLES ####################################################
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
31 my $cfg = GalaxyPath->new( -file => $ENV{"GALAXY_CONFIG_FILE"} );
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
32 my $file_path = $cfg->my_path( 'workPath', 'MYWORKSPACE' );
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
33 $r_bin = $cfg->my_path( 'toolsPath', 'R_BIN_PATH' );
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
34 my $SCRIPTS_R_path = $cfg->my_path( 'toolsPath', 'SCRIPTS_R_path' );
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
35 ###############################################################################
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
36
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
37 #workspace
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
38 my $debug = 0; #Mode debug (0)
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
39 if ($debug == 0)
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
40 {
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
41 print STDOUT "Debug mode OK \n";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
42 }
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
43 else
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
44 {
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
45 $file_path = dirname($out);
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
46 print STDOUT "No debug \n";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
47 }
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
48
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
49
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
50
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
51 my ($nb) = ($out=~/dataset_(\d+)\.\S+$/);
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
52 #For debugging in command line on vm-galaxy-preprod
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
53 #my ($nb) = (1);
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
54 #creation du repertoire de sortie
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
55 `cd $file_path; mkdir $nb; chmod -R 777 $nb `;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
56 my $dirresults= $file_path."/".$nb;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
57
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
58
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
59 print STDOUT "Job working directory : $dirresults \n";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
60
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
61
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
62
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
63 #remplacer les [a-z]__ du fichier entrant $input par rien
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
64 #$str =~ s/$find/$replace/g;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
65 `cp -a $input $dirresults/INPUT.csv`;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
66 open(FILE, "<$dirresults/INPUT.csv") || die "File not found";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
67 my @lines = <FILE>;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
68 close(FILE);
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
69
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
70 my @newlines;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
71 my %Taxo=();
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
72 foreach(@lines) {
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
73 $_ =~ s/[a-z]\_\_//g; #homogeneiser la taxonomie issues de frogs avec celle utilisée par tax4fun
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
74 $_ =~ s/\n/;\n/g; #pour qu'il y ait un point virgule a la fin de chaque ligne
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
75 $_ =~ s/;;/;/g; #remplacer les ;; par un seul ;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
76 $_ =~ s/;;/;/g; #remplacer les ;; par un seul ; une seconde fois pour les triples points virgules
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
77 #push(@newlines,$_);
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
78 /^(\d+)\t(.+)/||die "bad line: $_";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
79 $Taxo{$2}+=$1;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
80 }
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
81
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
82 open(FILE, ">$dirresults/INPUT.csv") || die "File not found";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
83 #print FILE "abondance_sum\tTaxonomy\n";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
84 print FILE "abondance_sum;\n";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
85 #print FILE @newlines;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
86 foreach my $taxo (sort {$a cmp $b} keys %Taxo) {
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
87 printf FILE "%d\t%s\n",$Taxo{$taxo},$taxo;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
88 }
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
89 close(FILE);
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
90
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
91
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
92 #Log dir
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
93 $log_dir = $dirresults;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
94
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
95 #déaration de l'objet $R et ouverture du pont :
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
96 $R = Statistics::R->new(
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
97 "r_bin" => $r_bin,
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
98 "log_dir" => $log_dir,
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
99 ) or die "Problem with R : $!\n";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
100 # Ouverture du pont
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
101 $R->startR;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
102
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
103
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
104 print STDOUT "Ouverture du pont R \n";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
105
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
106 #Read R script
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
107 $cmd="";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
108 #[galaxy-preprod@vm-galaxy-preprod sm_Tax4Fun]$ ln -s /galaxydata/galaxy-preprod/my_tools/sm_Tax4Fun/sm_Tax4Fun.R /galaxydata/galaxy-preprod/my_bin/scripts_R/.
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
109 open IN,"< $SCRIPTS_R_path/sm_Tax4Fun.R"
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
110 or die "Unable to read R script : $!\n";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
111 while ($line=<IN>) {
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
112 $cmd.=$line;
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
113 }
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
114 close(IN);
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
115
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
116 #Declare R function
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
117 print STDOUT "Declaration du script R \n";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
118
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
119 #$dirresults/INPUT.csv #"input.file = '$input', ".
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
120 $R->send(
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
121 "setwd('$dirresults')\n".
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
122 "$cmd\n".
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
123 "GalaxyFrogsTax( ".
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
124 "input.file = '$dirresults/INPUT.csv', ".
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
125 "reference = '$reference', ".
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
126 "output.file = '$out')\n");
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
127
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
128 print STDOUT "Envoi du script R ";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
129
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
130
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
131
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
132
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
133 #Fermeture du pont
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
134 $R->stopR();
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
135
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
136 print STDOUT "Fermeture du pont R \n";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
137
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
138 #Recuperation des outputs dans Galaxy
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
139 my $cmdOUT ='';
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
140 my $outTax= "$dirresults/Tax4FunProfile.txt";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
141 if (! -e $out){print STDERR "Pas de fichier Tax4FunProfile.txt produit \n";}
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
142 else {$cmdOUT = "(mv $outTax $out) >& ./cp_taxout.log 2>&1";
bd5ba02b1f71 Uploaded
sarahinraauzeville
parents:
diff changeset
143 system $cmdOUT;}