Mercurial > repos > sarahinraauzeville > tax4fun
comparison sm_Tax4Fun.pl @ 9:bd5ba02b1f71 draft
Uploaded
author | sarahinraauzeville |
---|---|
date | Thu, 21 Dec 2017 09:08:53 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
8:a6a0c9a8ad51 | 9:bd5ba02b1f71 |
---|---|
1 #Declaration de l espace de travail et du chemin de l executable R : | |
2 use strict; | |
3 use Carp; | |
4 use Statistics::R; | |
5 use Cwd qw(abs_path); | |
6 use File::Basename; | |
7 use Getopt::Long; | |
8 | |
9 | |
10 #Variables Globales ########################### | |
11 defined $ENV{'MY_GALAXY_DIR'} || die "MY_GALAXY_DIR environment variable not defined"; | |
12 use lib "$ENV{'MY_GALAXY_DIR'}"; | |
13 use GalaxyPath; | |
14 | |
15 my ($r_bin,$log_dir,$R,$cmd,$line, $out, $input, $reference); | |
16 | |
17 | |
18 #test: | |
19 #perl sm_Tax4Fun.pl --input DATA/testtax4fun---ssu---fingerprint----Total---sim_93---tax_silva---td_20.csv --out DATA/Tax4FunProfile.txt | |
20 | |
21 | |
22 #Parametres d'entree | |
23 Getopt::Long::Configure( 'no_ignorecase', 'bundling' ); | |
24 GetOptions ( | |
25 'out=s' => \$out, | |
26 'input=s' => \$input, | |
27 'reference=s' => \$reference | |
28 ) or die "Usage: Error in command line arguments\n"; | |
29 | |
30 # SETTING GLOBAL VARIABLES #################################################### | |
31 my $cfg = GalaxyPath->new( -file => $ENV{"GALAXY_CONFIG_FILE"} ); | |
32 my $file_path = $cfg->my_path( 'workPath', 'MYWORKSPACE' ); | |
33 $r_bin = $cfg->my_path( 'toolsPath', 'R_BIN_PATH' ); | |
34 my $SCRIPTS_R_path = $cfg->my_path( 'toolsPath', 'SCRIPTS_R_path' ); | |
35 ############################################################################### | |
36 | |
37 #workspace | |
38 my $debug = 0; #Mode debug (0) | |
39 if ($debug == 0) | |
40 { | |
41 print STDOUT "Debug mode OK \n"; | |
42 } | |
43 else | |
44 { | |
45 $file_path = dirname($out); | |
46 print STDOUT "No debug \n"; | |
47 } | |
48 | |
49 | |
50 | |
51 my ($nb) = ($out=~/dataset_(\d+)\.\S+$/); | |
52 #For debugging in command line on vm-galaxy-preprod | |
53 #my ($nb) = (1); | |
54 #creation du repertoire de sortie | |
55 `cd $file_path; mkdir $nb; chmod -R 777 $nb `; | |
56 my $dirresults= $file_path."/".$nb; | |
57 | |
58 | |
59 print STDOUT "Job working directory : $dirresults \n"; | |
60 | |
61 | |
62 | |
63 #remplacer les [a-z]__ du fichier entrant $input par rien | |
64 #$str =~ s/$find/$replace/g; | |
65 `cp -a $input $dirresults/INPUT.csv`; | |
66 open(FILE, "<$dirresults/INPUT.csv") || die "File not found"; | |
67 my @lines = <FILE>; | |
68 close(FILE); | |
69 | |
70 my @newlines; | |
71 my %Taxo=(); | |
72 foreach(@lines) { | |
73 $_ =~ s/[a-z]\_\_//g; #homogeneiser la taxonomie issues de frogs avec celle utilisée par tax4fun | |
74 $_ =~ s/\n/;\n/g; #pour qu'il y ait un point virgule a la fin de chaque ligne | |
75 $_ =~ s/;;/;/g; #remplacer les ;; par un seul ; | |
76 $_ =~ s/;;/;/g; #remplacer les ;; par un seul ; une seconde fois pour les triples points virgules | |
77 #push(@newlines,$_); | |
78 /^(\d+)\t(.+)/||die "bad line: $_"; | |
79 $Taxo{$2}+=$1; | |
80 } | |
81 | |
82 open(FILE, ">$dirresults/INPUT.csv") || die "File not found"; | |
83 #print FILE "abondance_sum\tTaxonomy\n"; | |
84 print FILE "abondance_sum;\n"; | |
85 #print FILE @newlines; | |
86 foreach my $taxo (sort {$a cmp $b} keys %Taxo) { | |
87 printf FILE "%d\t%s\n",$Taxo{$taxo},$taxo; | |
88 } | |
89 close(FILE); | |
90 | |
91 | |
92 #Log dir | |
93 $log_dir = $dirresults; | |
94 | |
95 #déaration de l'objet $R et ouverture du pont : | |
96 $R = Statistics::R->new( | |
97 "r_bin" => $r_bin, | |
98 "log_dir" => $log_dir, | |
99 ) or die "Problem with R : $!\n"; | |
100 # Ouverture du pont | |
101 $R->startR; | |
102 | |
103 | |
104 print STDOUT "Ouverture du pont R \n"; | |
105 | |
106 #Read R script | |
107 $cmd=""; | |
108 #[galaxy-preprod@vm-galaxy-preprod sm_Tax4Fun]$ ln -s /galaxydata/galaxy-preprod/my_tools/sm_Tax4Fun/sm_Tax4Fun.R /galaxydata/galaxy-preprod/my_bin/scripts_R/. | |
109 open IN,"< $SCRIPTS_R_path/sm_Tax4Fun.R" | |
110 or die "Unable to read R script : $!\n"; | |
111 while ($line=<IN>) { | |
112 $cmd.=$line; | |
113 } | |
114 close(IN); | |
115 | |
116 #Declare R function | |
117 print STDOUT "Declaration du script R \n"; | |
118 | |
119 #$dirresults/INPUT.csv #"input.file = '$input', ". | |
120 $R->send( | |
121 "setwd('$dirresults')\n". | |
122 "$cmd\n". | |
123 "GalaxyFrogsTax( ". | |
124 "input.file = '$dirresults/INPUT.csv', ". | |
125 "reference = '$reference', ". | |
126 "output.file = '$out')\n"); | |
127 | |
128 print STDOUT "Envoi du script R "; | |
129 | |
130 | |
131 | |
132 | |
133 #Fermeture du pont | |
134 $R->stopR(); | |
135 | |
136 print STDOUT "Fermeture du pont R \n"; | |
137 | |
138 #Recuperation des outputs dans Galaxy | |
139 my $cmdOUT =''; | |
140 my $outTax= "$dirresults/Tax4FunProfile.txt"; | |
141 if (! -e $out){print STDERR "Pas de fichier Tax4FunProfile.txt produit \n";} | |
142 else {$cmdOUT = "(mv $outTax $out) >& ./cp_taxout.log 2>&1"; | |
143 system $cmdOUT;} |