comparison ballgown.xml @ 17:05977e96375b draft default tip

Uploaded
author theo.collard
date Tue, 03 Oct 2017 09:25:51 -0400
parents fa62657e9b57
children
comparison
equal deleted inserted replaced
16:4290f0f3d908 17:05977e96375b
1 <tool id="ballgown" name="Ballgown" version="0.5.0" workflow_compatible="true"> 1 <tool id="ballgown" name="Ballgown" version="2.2.0" workflow_compatible="true">
2 <description>Flexible, isoform-level differential expression analysis</description> 2 <description>Flexible, isoform-level differential expression analysis</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="2.2.0">bioconductor-ballgown</requirement> 4 <requirement type="package" version="2.2.0">bioconductor-ballgown</requirement>
5 <requirement type="package" version="0.5.0">r-dplyr</requirement> 5 <requirement type="package" version="0.5.0">r-dplyr</requirement>
6 <requirement type="package" version="1.3.2">r-optparse</requirement> 6 <requirement type="package" version="1.3.2">r-optparse</requirement>
7 7 </requirements>
8 </requirements> 8 <command detect_errors="aggressive"><![CDATA[
9 <command interpreter="Rscript" detect_errors="aggressive"> 9 ##------------------------------------------------------------------------------------
10 ##------------------------------------------------------------------------------------ 10 ## This function reads the input file with the mapping between samples and files
11 ## This function reads the input file with the mapping between samples and files 11 ## E.g. of result:
12 ## E.g. of result: 12 ## mapping = {
13 ## mapping = { 13 ## "e2t.ctab" : "sample1",
14 ## "e2t.ctab" : "sample1", 14 ## "other.ctab" : "sample2",
15 ## "other.ctab" : "sample2", 15 ## "i2t.ctab" : "sample1",
16 ## "i2t.ctab" : "sample1", 16 ## "t_data.ctab": "sample1"
17 ## "t_data.ctab": "sample1" 17 ## ...
18 ## ... 18 ## }
19 ## } 19 ##------------------------------------------------------------------------------------
20 ##------------------------------------------------------------------------------------ 20 #def read_sample_mapping_file(sample_mapping_file):
21 #def read_sample_mapping_file(sample_mapping_file): 21 #try
22 #try 22 #set mapping = {}
23 #set mapping = {} 23 #set file = open($sample_mapping_file.dataset.dataset.get_file_name(),'r')
24 #set file = open($sample_mapping_file.dataset.dataset.get_file_name(),'r') 24 #for $line in $file:
25 #for $line in $file: 25 #set content= $line.strip().split('\t')
26 #set content= $line.strip().split('\t') 26 #for $map in $content:
27 #for $map in $content: 27 #set mapping[$map]= $content[0]
28 #set mapping[$map]= $content[0] 28 #end for
29 #end for 29 #end for
30 #end for 30 #return $mapping
31 #return $mapping 31 #except
32 #except 32 #return None
33 #return None 33 #end try
34 #end try 34 #end def
35 #end def 35
36 36 ##------------------------------------------------------------------------------------
37 ##------------------------------------------------------------------------------------ 37 ## This function returns the name of the sample associated to a given file
38 ## This function returns the name of the sample associated to a given file 38 ##------------------------------------------------------------------------------------
39 ##------------------------------------------------------------------------------------ 39 #def get_sample_name($dataset, $sample_mapping):
40 #def get_sample_name($dataset, $sample_mapping): 40 ##If the file with samples mapping was provided
41 ##If the file with samples mapping was provided 41 #if $sample_mapping != None:
42 #if $sample_mapping != None: 42 #return $sample_mapping.get($dataset.name, None)
43 #return $sample_mapping.get($dataset.name, None) 43 ##Otherwise with extract the sample name from the filename
44 ##Otherwise with extract the sample name from the filename 44 #else:
45 #else: 45 #return str($dataset.element_identifier)
46 #return str($dataset.element_identifier) 46 #end if
47 #end if 47 #end def
48 #end def 48
49 49 ##------------------------------------------------------------------------------------
50 ##------------------------------------------------------------------------------------ 50 ## This function reads a dataset or list of datasets and sets the corresponding value
51 ## This function reads a dataset or list of datasets and sets the corresponding value 51 ## in the $result variable
52 ## in the $result variable 52 ## e.g. of result
53 ## e.g. of result 53 ##'sample1' : {
54 ##'sample1' : { 54 ## 'e_data': '/export/galaxy-central/database/files/000/dataset_13.dat'
55 ## 'e_data': '/export/galaxy-central/database/files/000/dataset_13.dat' 55 ## 'i_data': '/export/galaxy-central/database/files/000/dataset_10.dat',
56 ## 'i_data': '/export/galaxy-central/database/files/000/dataset_10.dat', 56 ## 't_data': '/export/galaxy-central/database/files/000/dataset_12.dat',
57 ## 't_data': '/export/galaxy-central/database/files/000/dataset_12.dat', 57 ## 'e2t': '/export/galaxy-central/database/files/000/dataset_9.dat',
58 ## 'e2t': '/export/galaxy-central/database/files/000/dataset_9.dat', 58 ## 'i2t': '/export/galaxy-central/database/files/000/dataset_11.dat'
59 ## 'i2t': '/export/galaxy-central/database/files/000/dataset_11.dat' 59 ## },
60 ## }, 60 ##------------------------------------------------------------------------------------
61 ##------------------------------------------------------------------------------------ 61 #def read_input_files($param_name, $param_value, $result, $sample_mapping, $create_if_empty):
62 #def read_input_files($param_name, $param_value, $result, $sample_mapping, $create_if_empty): 62 ## If input is a data collection
63 ## If input is a data collection 63 #if isinstance($param_value, list):
64 #if isinstance($param_value, list): 64 ## For each dataset
65 ## For each dataset 65 #for $dataset in $param_value:
66 #for $dataset in $param_value: 66 ## Get the sample name
67 ## Get the sample name 67 #set sample_name = $get_sample_name($dataset, $sample_mapping)
68 #set sample_name = $get_sample_name($dataset, $sample_mapping) 68 ## Check if sample is already registered
69 ## Check if sample is already registered 69 #if not($result.has_key($sample_name)):
70 #if not($result.has_key($sample_name)): 70 #if ($create_if_empty == True):
71 #if ($create_if_empty == True): 71 #set result[$sample_name] = {}
72 #set result[$sample_name] = {} 72 #else:
73 #else: 73 #raise ValueError("Error in input. Please check that input contains all the required files for sample " + $sample_name)
74 #raise ValueError("Error in input. Please check that input contains all the required files for sample " + $sample_name) 74 #end if
75 #end if 75 #end if
76 #end if 76 ## Register the file to the sample
77 ## Register the file to the sample 77 #set result[$sample_name][$param_name] = str($dataset.dataset.dataset.get_file_name())
78 #set result[$sample_name][$param_name] = str($dataset.dataset.dataset.get_file_name()) 78 #end for
79 #end for 79 #else:
80 #else: 80 #if not($result.has_key("sample_1")):
81 #if not($result.has_key("sample_1")): 81 #set result["sample_1"] = {}
82 #set result["sample_1"] = {} 82 #end if
83 #end if 83 #set result["sample_1"][$param_name] = str($param_name.dataset.dataset.get_file_name())
84 #set result["sample_1"][$param_name] = str($param_name.dataset.dataset.get_file_name()) 84 #end if
85 #end if 85 #return $result
86 #return $result 86 #end def
87 #end def 87
88 88 ##------------------------------------------------------------------------------------
89 ##------------------------------------------------------------------------------------ 89 ## Main body of the tool
90 ## Main body of the tool 90 ##------------------------------------------------------------------------------------
91 ##------------------------------------------------------------------------------------ 91 ## Set the params for the next R script
92 ## Set the params for the next R script 92 #set result={}
93 #set result={} 93 #set sample_mapping=None
94 #set sample_mapping=None 94
95 95 ## If the samples mapping file was provided, parse the content
96 ## If the samples mapping file was provided, parse the content 96 #if $samples_names != None and not(isinstance($samples_names, list) and (None in $samples_names)):
97 #if $samples_names != None and not(isinstance($samples_names, list) and (None in $samples_names)): 97 #set sample_mapping = $read_sample_mapping_file($samples_names)
98 #set sample_mapping = $read_sample_mapping_file($samples_names) 98 #end if
99 #end if 99
100 100 ## READ THE CONTENT FOR e_data AND STORE THE FILES
101 ## READ THE CONTENT FOR e_data AND STORE THE FILES 101 ## INDEXED BY THEIR SAMPLE NAME
102 ## INDEXED BY THEIR SAMPLE NAME 102 ## e.g. 'HBR_Rep1' : {
103 ## e.g. 'HBR_Rep1' : { 103 ## 'e_data': '/export/galaxy-central/database/files/000/dataset_13.dat'
104 ## 'e_data': '/export/galaxy-central/database/files/000/dataset_13.dat' 104 ## 'i_data': '/export/galaxy-central/database/files/000/dataset_10.dat',
105 ## 'i_data': '/export/galaxy-central/database/files/000/dataset_10.dat', 105 ## 't_data': '/export/galaxy-central/database/files/000/dataset_12.dat',
106 ## 't_data': '/export/galaxy-central/database/files/000/dataset_12.dat', 106 ## 'e2t': '/export/galaxy-central/database/files/000/dataset_9.dat',
107 ## 'e2t': '/export/galaxy-central/database/files/000/dataset_9.dat', 107 ## 'i2t': '/export/galaxy-central/database/files/000/dataset_11.dat'
108 ## 'i2t': '/export/galaxy-central/database/files/000/dataset_11.dat' 108 ## },
109 ## }, 109 ## 'HBR_Rep2' : {...}
110 ## 'HBR_Rep2' : {...} 110 #set $result = $read_input_files("e_data.ctab", $e_data, $result, $sample_mapping, True)
111 #set $result = $read_input_files("e_data.ctab", $e_data, $result, $sample_mapping, True) 111 #set $result = $read_input_files("i_data.ctab", $i_data, $result, $sample_mapping, False)
112 #set $result = $read_input_files("i_data.ctab", $i_data, $result, $sample_mapping, False) 112 #set $result = $read_input_files("t_data.ctab", $t_data, $result, $sample_mapping, False)
113 #set $result = $read_input_files("t_data.ctab", $t_data, $result, $sample_mapping, False) 113 #set $result = $read_input_files("e2t.ctab", $e2t, $result, $sample_mapping, False)
114 #set $result = $read_input_files("e2t.ctab", $e2t, $result, $sample_mapping, False) 114 #set $result = $read_input_files("i2t.ctab", $i2t, $result, $sample_mapping, False)
115 #set $result = $read_input_files("i2t.ctab", $i2t, $result, $sample_mapping, False) 115
116 116 ## For each input sample, create a directory and link the input files for ballgown
117 ## For each input sample, create a directory and link the input files for ballgown 117 #import os
118 #import os 118 #set n_sample = 1
119 #set n_sample = 1 119 #for $key, $value in $result.iteritems():
120 #for $key, $value in $result.iteritems(): 120 #if str($file_format.format) == 'tsv':
121 #set dir_name = str($output.files_path) + "/" + $key + "/" 121 #set dir_name = str($toutput.files_path) + '/' + $key + '/'
122 $os.makedirs($dir_name) 122 #else:
123 #for $file_name, $file_path in $value.iteritems(): 123 #set dir_name = str($output.files_path) + '/' + $key + '/'
124 $os.symlink($file_path, $dir_name + $file_name) 124 #end if
125 #end for 125 $os.makedirs($dir_name)
126 #set n_sample = $n_sample + 1 126 #for $file_name, $file_path in $value.iteritems():
127 #end for 127 $os.symlink($file_path, $dir_name + $file_name)
128 128 #end for
129 ## Run the R script with the location of the linked files and the name for outpot file 129 #set n_sample = $n_sample + 1
130 ballgown.R --directory $output.files_path --outputtranscript $output --outputgenes $outputgn --texpression $trexpression --phendat $phendata --bgout $bgo 130 #end for
131 </command> 131
132 <inputs> 132 ## Run the R script with the location of the linked files and the name for outpot file
133 <param name="e_data" type="data" multiple="true" format="tabular" label="Exon-level expression measurements" help="One row per exon. See below for more details."/> 133
134 <param name="i_data" type="data" multiple="true" format="tabular" label="Intron- (i.e., junction-) level expression measurements" help="One row per intron. See below for more details."/> 134 Rscript '$__tool_directory__/ballgown.R' --texpression $trexpression --phendat '$phendata' --bgout '$bgo' -f '$file_format.format'
135 <param name="t_data" type="data" multiple="true" format="tabular" label="Transcript-level expression measurements" help="One row per transcript. See below for more details."/> 135 #if str($file_format.format) == 'tsv':
136 <param name="e2t" type="data" multiple="true" format="tabular" label="Exons-transcripts mapping" help="Table with two columns, e_id and t_id, denoting which exons belong to which transcripts. See below for more details."/> 136 --tsvoutputtranscript $toutputtranscript
137 <param name="i2t" type="data" multiple="true" format="tabular" label="Introns-transcripts mapping" help="Table with two columns, i_id and t_id, denoting which introns belong to which transcripts. See below for more details."/> 137 --tsvoutputgenes $toutput
138 <param name="samples_names" type="data" optional="true" multiple="false" format="tabular" label="File names for samples" help="Optional. Use in case that the names for the analysed samples cannot be extracted from the filenames."/> 138 --directory $toutput.files_path
139 <param argument="--phendat" name="phendata" type="data" format="csv" label="phenotype data" /> 139 #else:
140 <param argument="--texpression" name="trexpression" type="float" value="0.5" label="minimal transcript expression to appear in the results"/> 140 --outputtranscript $output
141 </inputs> 141 --outputgenes $outputgn
142 <outputs> 142 --directory $output.files_path
143 <data name="bgo" format="rda" file="ballgown_object.rda" label="${tool.name} on ${on_string}: ballgown object (R data file)"/> 143 #end if
144 <data name="output" format="csv" file="output_transcript.csv" label="${tool.name} on ${on_string}: transcripts expression (tabular)"/> 144 ]]></command>
145 <data name="outputgn" format="csv" file="output_genes.csv" label="${tool.name} on ${on_string}: genes expression (tabular)"/> 145 <inputs>
146 </outputs> 146 <param name="e_data" type="data_collection" collection_type="list" format="tabular" label="Exon-level expression measurements"
147 <tests> 147 help="One row per exon. See below for more details."/>
148 </tests> 148 <param name="i_data" type="data_collection" collection_type="list" format="tabular"
149 <help> 149 label="Intron- (i.e., junction-) level expression measurements"
150 150 help="One row per intron. See below for more details."/>
151 <param name="t_data" type="data_collection" collection_type="list" format="tabular"
152 label="Transcript-level expression measurements" help="One row per transcript. See below for more details."/>
153 <param name="e2t" type="data_collection" collection_type="list" format="tabular"
154 label="Exons-transcripts mapping"
155 help="Table with two columns, e_id and t_id, denoting which exons belong to which transcripts. See below for more details."/>
156 <param name="i2t" type="data_collection" collection_type="list" format="tabular"
157 label="Introns-transcripts mapping"
158 help="Table with two columns, i_id and t_id, denoting which introns belong to which transcripts. See below for more details."/>
159 <param name="samples_names" type="data" optional="true" multiple="false" format="tabular"
160 label="File names for samples"
161 help="Optional. Use in case that the names for the analysed samples cannot be extracted from the filenames."/>
162 <param argument="--phendat" name="phendata" type="data" format="csv" label="phenotype data" />
163 <param argument="--texpression" name="trexpression" type="float" value="0.5" label="minimal transcript expression to appear in the results"/>
164 <conditional name="file_format">
165 <param argument='--format' type="select" label="Output format">
166 <option value="tsv" selected="true">tsv</option>
167 <option value="csv">csv</option>
168 </param>
169 <when value="tsv"/>
170 <when value="csv"/>
171 </conditional>
172 </inputs>
173 <outputs>
174 <data name="bgo" format="rdata" from_work_dir="ballgown_object.rda" label="${tool.name} on ${on_string}: ballgown_object_R_data_file"/>
175 <data name="output" format="csv" from_work_dir="output_transcript.csv" label="${tool.name} on ${on_string}: transcripts_expression_tabular">
176 <filter>file_format['format']=="csv"</filter>
177 </data>
178 <data name="outputgn" format="csv" from_work_dir="output_genes.csv" label="${tool.name} on ${on_string}: genes_expression_tabular">
179 <filter>file_format['format']=="csv"</filter>
180 </data>
181 <data name="toutputtranscript" format="tabular" from_work_dir="output_transcript.tsv" label="${tool.name} on ${on_string}: transcripts_expression_tabular">
182 <filter>file_format['format']=="tsv"</filter>
183 </data>
184 <data name="toutput" format="tabular" from_work_dir="output_genes.tsv" label="${tool.name} on ${on_string}: genes_expression_tabular">
185 <filter>file_format['format']=="tsv"</filter>
186 </data>
187 </outputs>
188 <tests>
189 <test>
190 <param name="e_data">
191 <collection type="list">
192 <element name="HBR_Rep1" value="HBR_Rep1/e_data.ctab"/>
193 <element name="HBR_Rep2" value="HBR_Rep2/e_data.ctab"/>
194 <element name="HBR_Rep3" value="HBR_Rep3/e_data.ctab"/>
195 <element name="UHR_Rep1" value="UHR_Rep1/e_data.ctab"/>
196 <element name="UHR_Rep2" value="UHR_Rep2/e_data.ctab"/>
197 <element name="UHR_Rep3" value="UHR_Rep3/e_data.ctab"/>
198 </collection>
199 </param>
200 <param name="i_data">
201 <collection type="list">
202 <element name="HBR_Rep1" value="HBR_Rep1/i_data.ctab"/>
203 <element name="HBR_Rep2" value="HBR_Rep2/i_data.ctab"/>
204 <element name="HBR_Rep3" value="HBR_Rep3/i_data.ctab"/>
205 <element name="UHR_Rep1" value="UHR_Rep1/i_data.ctab"/>
206 <element name="UHR_Rep2" value="UHR_Rep2/i_data.ctab"/>
207 <element name="UHR_Rep3" value="UHR_Rep3/i_data.ctab"/>
208 </collection>
209 </param>
210 <param name="t_data">
211 <collection type="list">
212 <element name="HBR_Rep1" value="HBR_Rep1/t_data.ctab"/>
213 <element name="HBR_Rep2" value="HBR_Rep2/t_data.ctab"/>
214 <element name="HBR_Rep3" value="HBR_Rep3/t_data.ctab"/>
215 <element name="UHR_Rep1" value="UHR_Rep1/t_data.ctab"/>
216 <element name="UHR_Rep2" value="UHR_Rep2/t_data.ctab"/>
217 <element name="UHR_Rep3" value="UHR_Rep3/t_data.ctab"/>
218 </collection>
219 </param>
220 <param name="e2t">
221 <collection type="list">
222 <element name="HBR_Rep1" value="HBR_Rep1/e2t.ctab"/>
223 <element name="HBR_Rep2" value="HBR_Rep2/e2t.ctab"/>
224 <element name="HBR_Rep3" value="HBR_Rep3/e2t.ctab"/>
225 <element name="UHR_Rep1" value="UHR_Rep1/e2t.ctab"/>
226 <element name="UHR_Rep2" value="UHR_Rep2/e2t.ctab"/>
227 <element name="UHR_Rep3" value="UHR_Rep3/e2t.ctab"/>
228 </collection>
229 </param>
230 <param name="i2t">
231 <collection type="list">
232 <element name="HBR_Rep1" value="HBR_Rep1/i2t.ctab"/>
233 <element name="HBR_Rep2" value="HBR_Rep2/i2t.ctab"/>
234 <element name="HBR_Rep3" value="HBR_Rep3/i2t.ctab"/>
235 <element name="UHR_Rep1" value="UHR_Rep1/i2t.ctab"/>
236 <element name="UHR_Rep2" value="UHR_Rep2/i2t.ctab"/>
237 <element name="UHR_Rep3" value="UHR_Rep3/i2t.ctab"/>
238 </collection>
239 </param>
240 <param name="phendata" value="phendata.csv"/>
241 <output name="outputgn" file="genes_expression_tabular.csv"/>
242 <output name="output" file="transcripts_expression_tabular.csv"/>
243 <output name="bgo" file="ballgown_object_R_data_file.rda"/>
244 </test>
245 </tests>
246 <help><![CDATA[
151 ======================= 247 =======================
152 Ballgown 248 Ballgown
153 ======================= 249 =======================
154 ----------------------- 250 -----------------------
155 **What it does** 251 **What it does**
191 - **i2t**: Tab file or collection of tab files. Table with two columns, i_id and t_id, denoting which introns belong to which transcripts. These ids match the ids in the i_data and t_data tables. 287 - **i2t**: Tab file or collection of tab files. Table with two columns, i_id and t_id, denoting which introns belong to which transcripts. These ids match the ids in the i_data and t_data tables.
192 - samples_names: (optional) Tab file. Table with five columns, one row per sample. Defines which files from the input belong to each sample in the experiment. 288 - samples_names: (optional) Tab file. Table with five columns, one row per sample. Defines which files from the input belong to each sample in the experiment.
193 289
194 .. class:: infomark 290 .. class:: infomark
195 291
196 '''TIP''' *Note* Here's an example of a good phenotype data file for your expirement. 292 '''TIP''' *Note* Here's an example of a good phenotype data file for your experiment.
197 293
198 +--------------+-------------------------+-------------------------+---+ 294 +--------------+-------------------------+-------------------------+---+
199 |ids |experimental variable 1 |experimental variable 2 |...| 295 |ids |experimental variable 1 |experimental variable 2 |...|
200 +==============+=========================+=========================+===+ 296 +==============+=========================+=========================+===+
201 |sample 1 |value 1 |value 2 |...| 297 |sample 1 |value 1 |value 2 |...|
226 - **Ballgown object** : this is the ballgown object created during the process. This file can be re-used later for further analysis in a R console. 322 - **Ballgown object** : this is the ballgown object created during the process. This file can be re-used later for further analysis in a R console.
227 323
228 ---- 324 ----
229 325
230 **Authors**: Théo Collard [SLU Global Bioinformatics Centre], Rafael Hernández de Diego [SLU Global Bioinformatics Centre], and Tomas Klingström [SLU Global Bioinformatics Centre] 326 **Authors**: Théo Collard [SLU Global Bioinformatics Centre], Rafael Hernández de Diego [SLU Global Bioinformatics Centre], and Tomas Klingström [SLU Global Bioinformatics Centre]
231 327 ]]></help>
232 Sources are available at https://github.com/CollardT/Ballgown-Wrapper 328 <citations>
233 329 <citation type="doi">doi:10.1038/nprot.2016.095</citation>
234 </help> 330 </citations>
235 </tool> 331 </tool>