annotate sm_STAR2_V2.pl @ 4:f7dbe77bbed5 draft

Uploaded
author sarahinraauzeville
date Tue, 12 Dec 2017 10:16:11 -0500
parents 80e19490ec6a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
1 #!/usr/bin/perl -w
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
2
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
3 # usage : perl sm_STAR.pl <read1.fastq.gz> <read2.fastq.gz>
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
4 # 10/02/2014 - Wrapper du traitement des données RNAseq
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
5 # Sarah Maman
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
6 # Copyright (C) 2014 INRA
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
7 # This program is free software: you can redistribute it and/or modify
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
8 # it under the terms of the GNU General Public License as published by
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
10 # (at your option) any later version.
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
11 #
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
12 # This program is distributed in the hope that it will be useful,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
15 # GNU General Public License for more details.
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
16 #
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
17 # You should have received a copy of the GNU General Public License
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
19 #
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
20 use strict;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
21 use File::Basename;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
22 use Getopt::Long;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
23 use lib "$ENV{'MY_GALAXY_DIR'}";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
24 use GalaxyPath;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
25
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
26 my $cfg = GalaxyPath->new( -file => $ENV{"GALAXY_CONFIG_FILE"});
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
27 my $PATH = $cfg->my_path( 'workPath', 'MYWORKSPACE' );
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
28 my $STAR = $cfg->my_path( 'toolsPath', 'STAR_PATH' );
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
29
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
30
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
31
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
32 my $Nthreads;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
33 my $genome_path;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
34 my $reads_selector;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
35 my $input_read;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
36 my $Read1fastqgz;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
37 my $Read2fastqgz;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
38 my $alignIntronMin;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
39 my $alignIntronMax;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
40 my $outFilterMismatchNmax;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
41 my $orientation;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
42 my $refownfastaref;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
43 my $refselector;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
44 my $refowngtf;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
45 my $compress;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
46 my $cufflinks;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
47 my $outputfile;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
48 my $outputfileT;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
49 my $outputlogSJ;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
50 my $outputlogfinal;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
51
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
52
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
53 Getopt::Long::Configure( 'no_ignorecase', 'bundling' );
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
54 GetOptions (
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
55 'runThreadN=i' => \$Nthreads,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
56 'genomeDir=s' => \$genome_path,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
57 'refselector=s' => \$refselector,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
58 'refownfastaref=s' => \$refownfastaref,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
59 'refowngtf=s' => \$refowngtf,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
60 'compress=s' => \$compress,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
61 'cufflinks=s' => \$cufflinks,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
62 'readsselector=s'=> \$reads_selector,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
63 'readFilesIn1=s' => \$Read1fastqgz,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
64 'readFilesIn2=s' => \$Read2fastqgz,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
65 'readsinputread=s' => \$input_read,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
66 'alignIntronMin=i' => \$alignIntronMin,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
67 'alignIntronMax=i' => \$alignIntronMax,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
68 'outFilterMismatchNmax=i' => \$outFilterMismatchNmax,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
69 'orientation=s' => \$orientation,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
70 'outputfile=s' => \$outputfile,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
71 'outputfileT=s' => \$outputfileT,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
72 'outputlogfinal=s' => \$outputlogfinal,
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
73 'outputlogSJ=s' => \$outputlogSJ
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
74 ) or die "Usage: Error in command line arguments\n";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
75
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
76 my $cmd1 = ''; my $cmd2 ='';
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
77 my $cmd3 = ''; my $cmd4 ='';
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
78
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
79 #STAR --runThreadN 4 --runMode genomeGenerate --genomeDir /work/smaman/TP_RNAseq/INDEX/ --genomeFastaFiles ITAG2.3_genomic_Ch6.fasta --sjdbGTFfile ITAG_pre2.3_gene_models_Ch6.gtf --sjdbOverhang 100
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
80
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
81 #smaman@node001 /work/smaman/TP_RNAseq $ ls -ltrah INDEX
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
82 #-rw-r--r-- 1 smaman BIOINFO 331 17 juil. 11:55 genomeParameters.txt
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
83 #-rw-r--r-- 1 smaman BIOINFO 387K 17 juil. 11:55 exonGeTrInfo.tab
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
84 #-rw-r--r-- 1 smaman BIOINFO 53K 17 juil. 11:55 geneInfo.tab
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
85 #-rw-r--r-- 1 smaman BIOINFO 151K 17 juil. 11:55 transcriptInfo.tab
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
86 #-rw-r--r-- 1 smaman BIOINFO 171K 17 juil. 11:55 exonInfo.tab
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
87 #-rw-r--r-- 1 smaman BIOINFO 325K 17 juil. 11:55 sjdbList.fromGTF.out.tab
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
88 #-rw-r--r-- 1 smaman BIOINFO 272K 17 juil. 11:55 sjdbInfo.txt
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
89 #-rw-r--r-- 1 smaman BIOINFO 325K 17 juil. 11:55 sjdbList.out.tab
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
90 #-rw-r--r-- 1 smaman BIOINFO 11 17 juil. 11:55 chrName.txt
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
91 #-rw-r--r-- 1 smaman BIOINFO 9 17 juil. 11:55 chrLength.txt
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
92 #-rw-r--r-- 1 smaman BIOINFO 11 17 juil. 11:55 chrStart.txt
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
93 #-rw-r--r-- 1 smaman BIOINFO 20 17 juil. 11:55 chrNameLength.txt
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
94 #-rw-r--r-- 1 smaman BIOINFO 47M 17 juil. 11:55 Genome
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
95 #-rw-r--r-- 1 smaman BIOINFO 360M 17 juil. 11:55 SA
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
96 #-rw-r--r-- 1 smaman BIOINFO 1,5G 17 juil. 11:55 SAindex
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
97
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
98
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
99 #STAR --readFilesIn WTr1.fastq WTr2.fastq --genomeDir /work/smaman/TP_RNAseq/INDEX/ --sjdbGTFfile ITAG_pre2.3_gene_models_Ch6.gtf --outSAMtype BAM SortedByCoordinate --alignIntronMin 20 --alignIntronMax 1000000 --outFilterMismatchNmax 10 --outSAMtype BAM SortedByCoordinate --runThreadN 4 --outFileNamePrefix galaxyName --outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonical --outFilterType BySJout --quantMode TranscriptomeSAM
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
100
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
101 #-rw-r--r-- 1 smaman BIOINFO 45M 26 mars 2015 ITAG2.3_genomic_Ch6.fasta
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
102 #-rw-r--r-- 1 smaman BIOINFO 1,6M 26 mars 2015 ITAG_pre2.3_gene_models_Ch6.gtf
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
103 #-rw-r--r-- 1 smaman BIOINFO 29 26 mars 2015 ITAG2.3_genomic_Ch6.fasta.fai
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
104 #-rw-r--r-- 1 smaman BIOINFO 614 17 juil. 10:20 WTr1.fastq
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
105 #-rw-r--r-- 1 smaman BIOINFO 589 17 juil. 10:20 WTr2.fastq
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
106 #-rw-r--r-- 1 smaman BIOINFO 14K 17 juil. 11:55 Log.out
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
107 #-rw-r--r-- 1 smaman BIOINFO 35K 17 juil. 12:03 galaxyNameAligned.toTranscriptome.out.bam
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
108 #-rw-r--r-- 1 smaman BIOINFO 637 17 juil. 12:03 galaxyNameAligned.sortedByCoord.out.bam +++++++++
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
109 #-rw-r--r-- 1 smaman BIOINFO 0 17 juil. 12:03 galaxyNameSJ.out.tab ++++++++++++++++
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
110 #-rw-r--r-- 1 smaman BIOINFO 246 17 juil. 12:03 galaxyNameLog.progress.out
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
111 #-rw-r--r-- 1 smaman BIOINFO 1,7K 17 juil. 12:03 galaxyNameLog.final.out +++++++++++++++
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
112 #-rw-r--r-- 1 smaman BIOINFO 16K 17 juil. 12:03 galaxyNameLog.out
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
113
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
114
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
115
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
116 #workspace
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
117 my $debug = 0; #Mode debug
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
118 if ($debug == 0)
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
119 {
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
120 print STDOUT "Debug mode OK \n";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
121 }
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
122 else
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
123 {
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
124 $PATH = dirname($outputfile);
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
125 print STDOUT "No debug \n";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
126 }
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
127
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
128
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
129 #Récuperer le numero (unique) de l'output afin, si besoin, de créer un répertoire de travail unique dans /work/galaxy-dev/workspace
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
130 my ($nb) = ($outputfile=~/dataset_(\d+)\.\S+$/);
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
131
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
132 #Repertoire de sortie cree par le script, verif des droits d'ecriture sur ce repertoire de sortie
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
133 `cd $PATH/; mkdir $nb/; chmod -R 777 $nb/; cd $nb/;`;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
134 my $dirresults= "$PATH/".$nb;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
135
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
136 print STDOUT "Job working directory : $dirresults \n";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
137
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
138
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
139 if ($refselector eq "ownfasta"){
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
140 my $cmdSTARindex="(cd $dirresults/; mkdir INDEX/; chmod 777 INDEX/; $STAR --runThreadN $Nthreads --runMode genomeGenerate --genomeDir $dirresults/INDEX --genomeFastaFiles $refownfastaref --sjdbGTFfile $refowngtf --sjdbOverhang 100) >& ./out_Starindex.log 2>&1";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
141 system $cmdSTARindex;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
142 #Info pour les biologistes
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
143 print STDOUT "STAR Genome Generate : \n\n $cmdSTARindex \n\n ";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
144 $genome_path = "$dirresults/INDEX/";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
145 }
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
146
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
147 my $addcuff;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
148 if ($cufflinks eq "cuff"){
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
149 $addcuff="--outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonical --outFilterType BySJout --quantMode TranscriptomeSAM ";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
150 }else{
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
151 $addcuff="";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
152 }
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
153
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
154
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
155 my $cat;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
156 if ($reads_selector eq "single"){
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
157
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
158 my $in;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
159 if ($compress eq "compress"){
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
160 #Si besoin, recupération du fichier de configuration avec modification de l extension
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
161 `ln -s $input_read $dirresults/input_read.fastq.gz;`;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
162 $in = "$dirresults/input_read.fastq.gz";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
163 $cat="--readFilesCommand zcat";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
164 }else
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
165 {`ln -s $input_read $dirresults/input_read.fastq;`;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
166 $in = "$dirresults/input_read.fastq";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
167 $cat="";}
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
168
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
169 if ($orientation eq "No"){
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
170 $cmd1 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
171 system $cmd1;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
172 #Info pour les biologistes
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
173 print STDOUT "STAR command run on cluster without oriented reads : \n\n $cmd1 \n\n ";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
174 }
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
175 else
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
176 {
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
177 $cmd2 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
178 system $cmd2;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
179 #Info pour les biologistes
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
180 print STDOUT "STAR command run on cluster with oriented reads : \n\n $cmd2 \n\n
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
181 Instead, you need to run Cufflinks with the library option --library-type options. For example, cufflinks <…> -library-type fr-firststrand should be used for the “standard” dUTP protocol. This option has to be used only for Cufflinks runs and not for STAR runs.\n\n";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
182 }
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
183 }else{
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
184
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
185
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
186 my $in1;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
187 my $in2;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
188 if ($compress eq "compress"){
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
189 #Si besoin, recupération du fichier de configuration avec modification de l extension
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
190 `ln -s $Read1fastqgz $dirresults/Read1.fastq.gz; ln -s $Read2fastqgz $dirresults/Read2.fastq.gz;`;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
191 $in1="$dirresults/Read1.fastq.gz";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
192 $in2="$dirresults/Read2.fastq.gz";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
193 $cat="--readFilesCommand zcat";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
194 }else
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
195 {`ln -s $Read1fastqgz $dirresults/Read1.fastq; ln -s $Read2fastqgz $dirresults/Read2.fastq;`;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
196 $in1="$dirresults/Read1.fastq";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
197 $in2="$dirresults/Read2.fastq";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
198 $cat="";}
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
199
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
200
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
201 if ($orientation eq "No"){
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
202 $cmd3 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in1 $in2 --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
203 system $cmd3;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
204 #Info pour les biologistes
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
205 print STDOUT "STAR command run on cluster without oriented reads : \n\n $cmd3 \n\n ";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
206 }
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
207 else
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
208 {
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
209 $cmd4 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in1 $in2 --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
210 #Info pour les biologistes
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
211 system $cmd4;
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
212 print STDOUT "STAR command run on cluster with oriented reads : \n\n $cmd4 \n\n
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
213 Instead, you need to run Cufflinks with the library option --library-type options. For example, cufflinks <…> -library-type fr-firststrand should be used for the “standard” dUTP protocol. This option has to be used only for Cufflinks runs and not for STAR runs.\n\n";
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
214 }
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
215
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
216
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
217 }
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
218
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
219 #Si besoin :
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
220 #TEST 1 : command ligne on vm-galaxy
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
221 #TEST 2 perl Galaxy file : perl script.pl path/to/tests/files/used/for/galaxy/perl/script out1
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
222
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
223 #Recuperation des fichiers par Galaxy
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
224 #-rw-r--r-- 1 smaman BIOINFO 35K 17 juil. 12:03 galaxyNameAligned.toTranscriptome.out.bam +++++
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
225 #-rw-r--r-- 1 smaman BIOINFO 637 17 juil. 12:03 galaxyNameAligned.sortedByCoord.out.bam +++++++++
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
226 #-rw-r--r-- 1 smaman BIOINFO 0 17 juil. 12:03 galaxyNameSJ.out.tab ++++++++++++++++
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
227 #-rw-r--r-- 1 smaman BIOINFO 1,7K 17 juil. 12:03 galaxyNameLog.final.out +++++++++++++++
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
228 my $bam = glob("$dirresults/*$nb*Aligned.sortedByCoord.out.bam");
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
229 if (! -e $bam){print STDERR "Aligned.sortedByCoord.out.bam file not found. \n";}else{`cp -a $bam $outputfile`;}
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
230 my $bamT = glob("$dirresults/*$nb*Aligned.toTranscriptome.out.bam");
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
231 if (! -e $bamT){print STDERR "Aligned.toTranscriptome.out.bam file not found. \n";}else{`cp -a $bamT $outputfileT`;}
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
232 my $logSJ = glob("$dirresults/$nb*SJ.out.tab");
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
233 if (! -e $logSJ){print STDERR "SJ.out.tab log file not found. \n";}else{`cp -a $logSJ $outputlogSJ`;}
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
234 my $logfinal = glob("$dirresults/$nb*Log.final.out");
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
235 if (! -e $logfinal){print STDERR "Log.final.out log file not found. \n";}else{`cp -a $logfinal $outputlogfinal`;}
80e19490ec6a Uploaded
sarahinraauzeville
parents:
diff changeset
236