Mercurial > repos > theo.collard > ballgown_wrapper
comparison ballgown.xml @ 17:05977e96375b draft default tip
Uploaded
author | theo.collard |
---|---|
date | Tue, 03 Oct 2017 09:25:51 -0400 |
parents | fa62657e9b57 |
children |
comparison
equal
deleted
inserted
replaced
16:4290f0f3d908 | 17:05977e96375b |
---|---|
1 <tool id="ballgown" name="Ballgown" version="0.5.0" workflow_compatible="true"> | 1 <tool id="ballgown" name="Ballgown" version="2.2.0" workflow_compatible="true"> |
2 <description>Flexible, isoform-level differential expression analysis</description> | 2 <description>Flexible, isoform-level differential expression analysis</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="2.2.0">bioconductor-ballgown</requirement> | 4 <requirement type="package" version="2.2.0">bioconductor-ballgown</requirement> |
5 <requirement type="package" version="0.5.0">r-dplyr</requirement> | 5 <requirement type="package" version="0.5.0">r-dplyr</requirement> |
6 <requirement type="package" version="1.3.2">r-optparse</requirement> | 6 <requirement type="package" version="1.3.2">r-optparse</requirement> |
7 | 7 </requirements> |
8 </requirements> | 8 <command detect_errors="aggressive"><![CDATA[ |
9 <command interpreter="Rscript" detect_errors="aggressive"> | 9 ##------------------------------------------------------------------------------------ |
10 ##------------------------------------------------------------------------------------ | 10 ## This function reads the input file with the mapping between samples and files |
11 ## This function reads the input file with the mapping between samples and files | 11 ## E.g. of result: |
12 ## E.g. of result: | 12 ## mapping = { |
13 ## mapping = { | 13 ## "e2t.ctab" : "sample1", |
14 ## "e2t.ctab" : "sample1", | 14 ## "other.ctab" : "sample2", |
15 ## "other.ctab" : "sample2", | 15 ## "i2t.ctab" : "sample1", |
16 ## "i2t.ctab" : "sample1", | 16 ## "t_data.ctab": "sample1" |
17 ## "t_data.ctab": "sample1" | 17 ## ... |
18 ## ... | 18 ## } |
19 ## } | 19 ##------------------------------------------------------------------------------------ |
20 ##------------------------------------------------------------------------------------ | 20 #def read_sample_mapping_file(sample_mapping_file): |
21 #def read_sample_mapping_file(sample_mapping_file): | 21 #try |
22 #try | 22 #set mapping = {} |
23 #set mapping = {} | 23 #set file = open($sample_mapping_file.dataset.dataset.get_file_name(),'r') |
24 #set file = open($sample_mapping_file.dataset.dataset.get_file_name(),'r') | 24 #for $line in $file: |
25 #for $line in $file: | 25 #set content= $line.strip().split('\t') |
26 #set content= $line.strip().split('\t') | 26 #for $map in $content: |
27 #for $map in $content: | 27 #set mapping[$map]= $content[0] |
28 #set mapping[$map]= $content[0] | 28 #end for |
29 #end for | 29 #end for |
30 #end for | 30 #return $mapping |
31 #return $mapping | 31 #except |
32 #except | 32 #return None |
33 #return None | 33 #end try |
34 #end try | 34 #end def |
35 #end def | 35 |
36 | 36 ##------------------------------------------------------------------------------------ |
37 ##------------------------------------------------------------------------------------ | 37 ## This function returns the name of the sample associated to a given file |
38 ## This function returns the name of the sample associated to a given file | 38 ##------------------------------------------------------------------------------------ |
39 ##------------------------------------------------------------------------------------ | 39 #def get_sample_name($dataset, $sample_mapping): |
40 #def get_sample_name($dataset, $sample_mapping): | 40 ##If the file with samples mapping was provided |
41 ##If the file with samples mapping was provided | 41 #if $sample_mapping != None: |
42 #if $sample_mapping != None: | 42 #return $sample_mapping.get($dataset.name, None) |
43 #return $sample_mapping.get($dataset.name, None) | 43 ##Otherwise with extract the sample name from the filename |
44 ##Otherwise with extract the sample name from the filename | 44 #else: |
45 #else: | 45 #return str($dataset.element_identifier) |
46 #return str($dataset.element_identifier) | 46 #end if |
47 #end if | 47 #end def |
48 #end def | 48 |
49 | 49 ##------------------------------------------------------------------------------------ |
50 ##------------------------------------------------------------------------------------ | 50 ## This function reads a dataset or list of datasets and sets the corresponding value |
51 ## This function reads a dataset or list of datasets and sets the corresponding value | 51 ## in the $result variable |
52 ## in the $result variable | 52 ## e.g. of result |
53 ## e.g. of result | 53 ##'sample1' : { |
54 ##'sample1' : { | 54 ## 'e_data': '/export/galaxy-central/database/files/000/dataset_13.dat' |
55 ## 'e_data': '/export/galaxy-central/database/files/000/dataset_13.dat' | 55 ## 'i_data': '/export/galaxy-central/database/files/000/dataset_10.dat', |
56 ## 'i_data': '/export/galaxy-central/database/files/000/dataset_10.dat', | 56 ## 't_data': '/export/galaxy-central/database/files/000/dataset_12.dat', |
57 ## 't_data': '/export/galaxy-central/database/files/000/dataset_12.dat', | 57 ## 'e2t': '/export/galaxy-central/database/files/000/dataset_9.dat', |
58 ## 'e2t': '/export/galaxy-central/database/files/000/dataset_9.dat', | 58 ## 'i2t': '/export/galaxy-central/database/files/000/dataset_11.dat' |
59 ## 'i2t': '/export/galaxy-central/database/files/000/dataset_11.dat' | 59 ## }, |
60 ## }, | 60 ##------------------------------------------------------------------------------------ |
61 ##------------------------------------------------------------------------------------ | 61 #def read_input_files($param_name, $param_value, $result, $sample_mapping, $create_if_empty): |
62 #def read_input_files($param_name, $param_value, $result, $sample_mapping, $create_if_empty): | 62 ## If input is a data collection |
63 ## If input is a data collection | 63 #if isinstance($param_value, list): |
64 #if isinstance($param_value, list): | 64 ## For each dataset |
65 ## For each dataset | 65 #for $dataset in $param_value: |
66 #for $dataset in $param_value: | 66 ## Get the sample name |
67 ## Get the sample name | 67 #set sample_name = $get_sample_name($dataset, $sample_mapping) |
68 #set sample_name = $get_sample_name($dataset, $sample_mapping) | 68 ## Check if sample is already registered |
69 ## Check if sample is already registered | 69 #if not($result.has_key($sample_name)): |
70 #if not($result.has_key($sample_name)): | 70 #if ($create_if_empty == True): |
71 #if ($create_if_empty == True): | 71 #set result[$sample_name] = {} |
72 #set result[$sample_name] = {} | 72 #else: |
73 #else: | 73 #raise ValueError("Error in input. Please check that input contains all the required files for sample " + $sample_name) |
74 #raise ValueError("Error in input. Please check that input contains all the required files for sample " + $sample_name) | 74 #end if |
75 #end if | 75 #end if |
76 #end if | 76 ## Register the file to the sample |
77 ## Register the file to the sample | 77 #set result[$sample_name][$param_name] = str($dataset.dataset.dataset.get_file_name()) |
78 #set result[$sample_name][$param_name] = str($dataset.dataset.dataset.get_file_name()) | 78 #end for |
79 #end for | 79 #else: |
80 #else: | 80 #if not($result.has_key("sample_1")): |
81 #if not($result.has_key("sample_1")): | 81 #set result["sample_1"] = {} |
82 #set result["sample_1"] = {} | 82 #end if |
83 #end if | 83 #set result["sample_1"][$param_name] = str($param_name.dataset.dataset.get_file_name()) |
84 #set result["sample_1"][$param_name] = str($param_name.dataset.dataset.get_file_name()) | 84 #end if |
85 #end if | 85 #return $result |
86 #return $result | 86 #end def |
87 #end def | 87 |
88 | 88 ##------------------------------------------------------------------------------------ |
89 ##------------------------------------------------------------------------------------ | 89 ## Main body of the tool |
90 ## Main body of the tool | 90 ##------------------------------------------------------------------------------------ |
91 ##------------------------------------------------------------------------------------ | 91 ## Set the params for the next R script |
92 ## Set the params for the next R script | 92 #set result={} |
93 #set result={} | 93 #set sample_mapping=None |
94 #set sample_mapping=None | 94 |
95 | 95 ## If the samples mapping file was provided, parse the content |
96 ## If the samples mapping file was provided, parse the content | 96 #if $samples_names != None and not(isinstance($samples_names, list) and (None in $samples_names)): |
97 #if $samples_names != None and not(isinstance($samples_names, list) and (None in $samples_names)): | 97 #set sample_mapping = $read_sample_mapping_file($samples_names) |
98 #set sample_mapping = $read_sample_mapping_file($samples_names) | 98 #end if |
99 #end if | 99 |
100 | 100 ## READ THE CONTENT FOR e_data AND STORE THE FILES |
101 ## READ THE CONTENT FOR e_data AND STORE THE FILES | 101 ## INDEXED BY THEIR SAMPLE NAME |
102 ## INDEXED BY THEIR SAMPLE NAME | 102 ## e.g. 'HBR_Rep1' : { |
103 ## e.g. 'HBR_Rep1' : { | 103 ## 'e_data': '/export/galaxy-central/database/files/000/dataset_13.dat' |
104 ## 'e_data': '/export/galaxy-central/database/files/000/dataset_13.dat' | 104 ## 'i_data': '/export/galaxy-central/database/files/000/dataset_10.dat', |
105 ## 'i_data': '/export/galaxy-central/database/files/000/dataset_10.dat', | 105 ## 't_data': '/export/galaxy-central/database/files/000/dataset_12.dat', |
106 ## 't_data': '/export/galaxy-central/database/files/000/dataset_12.dat', | 106 ## 'e2t': '/export/galaxy-central/database/files/000/dataset_9.dat', |
107 ## 'e2t': '/export/galaxy-central/database/files/000/dataset_9.dat', | 107 ## 'i2t': '/export/galaxy-central/database/files/000/dataset_11.dat' |
108 ## 'i2t': '/export/galaxy-central/database/files/000/dataset_11.dat' | 108 ## }, |
109 ## }, | 109 ## 'HBR_Rep2' : {...} |
110 ## 'HBR_Rep2' : {...} | 110 #set $result = $read_input_files("e_data.ctab", $e_data, $result, $sample_mapping, True) |
111 #set $result = $read_input_files("e_data.ctab", $e_data, $result, $sample_mapping, True) | 111 #set $result = $read_input_files("i_data.ctab", $i_data, $result, $sample_mapping, False) |
112 #set $result = $read_input_files("i_data.ctab", $i_data, $result, $sample_mapping, False) | 112 #set $result = $read_input_files("t_data.ctab", $t_data, $result, $sample_mapping, False) |
113 #set $result = $read_input_files("t_data.ctab", $t_data, $result, $sample_mapping, False) | 113 #set $result = $read_input_files("e2t.ctab", $e2t, $result, $sample_mapping, False) |
114 #set $result = $read_input_files("e2t.ctab", $e2t, $result, $sample_mapping, False) | 114 #set $result = $read_input_files("i2t.ctab", $i2t, $result, $sample_mapping, False) |
115 #set $result = $read_input_files("i2t.ctab", $i2t, $result, $sample_mapping, False) | 115 |
116 | 116 ## For each input sample, create a directory and link the input files for ballgown |
117 ## For each input sample, create a directory and link the input files for ballgown | 117 #import os |
118 #import os | 118 #set n_sample = 1 |
119 #set n_sample = 1 | 119 #for $key, $value in $result.iteritems(): |
120 #for $key, $value in $result.iteritems(): | 120 #if str($file_format.format) == 'tsv': |
121 #set dir_name = str($output.files_path) + "/" + $key + "/" | 121 #set dir_name = str($toutput.files_path) + '/' + $key + '/' |
122 $os.makedirs($dir_name) | 122 #else: |
123 #for $file_name, $file_path in $value.iteritems(): | 123 #set dir_name = str($output.files_path) + '/' + $key + '/' |
124 $os.symlink($file_path, $dir_name + $file_name) | 124 #end if |
125 #end for | 125 $os.makedirs($dir_name) |
126 #set n_sample = $n_sample + 1 | 126 #for $file_name, $file_path in $value.iteritems(): |
127 #end for | 127 $os.symlink($file_path, $dir_name + $file_name) |
128 | 128 #end for |
129 ## Run the R script with the location of the linked files and the name for outpot file | 129 #set n_sample = $n_sample + 1 |
130 ballgown.R --directory $output.files_path --outputtranscript $output --outputgenes $outputgn --texpression $trexpression --phendat $phendata --bgout $bgo | 130 #end for |
131 </command> | 131 |
132 <inputs> | 132 ## Run the R script with the location of the linked files and the name for outpot file |
133 <param name="e_data" type="data" multiple="true" format="tabular" label="Exon-level expression measurements" help="One row per exon. See below for more details."/> | 133 |
134 <param name="i_data" type="data" multiple="true" format="tabular" label="Intron- (i.e., junction-) level expression measurements" help="One row per intron. See below for more details."/> | 134 Rscript '$__tool_directory__/ballgown.R' --texpression $trexpression --phendat '$phendata' --bgout '$bgo' -f '$file_format.format' |
135 <param name="t_data" type="data" multiple="true" format="tabular" label="Transcript-level expression measurements" help="One row per transcript. See below for more details."/> | 135 #if str($file_format.format) == 'tsv': |
136 <param name="e2t" type="data" multiple="true" format="tabular" label="Exons-transcripts mapping" help="Table with two columns, e_id and t_id, denoting which exons belong to which transcripts. See below for more details."/> | 136 --tsvoutputtranscript $toutputtranscript |
137 <param name="i2t" type="data" multiple="true" format="tabular" label="Introns-transcripts mapping" help="Table with two columns, i_id and t_id, denoting which introns belong to which transcripts. See below for more details."/> | 137 --tsvoutputgenes $toutput |
138 <param name="samples_names" type="data" optional="true" multiple="false" format="tabular" label="File names for samples" help="Optional. Use in case that the names for the analysed samples cannot be extracted from the filenames."/> | 138 --directory $toutput.files_path |
139 <param argument="--phendat" name="phendata" type="data" format="csv" label="phenotype data" /> | 139 #else: |
140 <param argument="--texpression" name="trexpression" type="float" value="0.5" label="minimal transcript expression to appear in the results"/> | 140 --outputtranscript $output |
141 </inputs> | 141 --outputgenes $outputgn |
142 <outputs> | 142 --directory $output.files_path |
143 <data name="bgo" format="rda" file="ballgown_object.rda" label="${tool.name} on ${on_string}: ballgown object (R data file)"/> | 143 #end if |
144 <data name="output" format="csv" file="output_transcript.csv" label="${tool.name} on ${on_string}: transcripts expression (tabular)"/> | 144 ]]></command> |
145 <data name="outputgn" format="csv" file="output_genes.csv" label="${tool.name} on ${on_string}: genes expression (tabular)"/> | 145 <inputs> |
146 </outputs> | 146 <param name="e_data" type="data_collection" collection_type="list" format="tabular" label="Exon-level expression measurements" |
147 <tests> | 147 help="One row per exon. See below for more details."/> |
148 </tests> | 148 <param name="i_data" type="data_collection" collection_type="list" format="tabular" |
149 <help> | 149 label="Intron- (i.e., junction-) level expression measurements" |
150 | 150 help="One row per intron. See below for more details."/> |
151 <param name="t_data" type="data_collection" collection_type="list" format="tabular" | |
152 label="Transcript-level expression measurements" help="One row per transcript. See below for more details."/> | |
153 <param name="e2t" type="data_collection" collection_type="list" format="tabular" | |
154 label="Exons-transcripts mapping" | |
155 help="Table with two columns, e_id and t_id, denoting which exons belong to which transcripts. See below for more details."/> | |
156 <param name="i2t" type="data_collection" collection_type="list" format="tabular" | |
157 label="Introns-transcripts mapping" | |
158 help="Table with two columns, i_id and t_id, denoting which introns belong to which transcripts. See below for more details."/> | |
159 <param name="samples_names" type="data" optional="true" multiple="false" format="tabular" | |
160 label="File names for samples" | |
161 help="Optional. Use in case that the names for the analysed samples cannot be extracted from the filenames."/> | |
162 <param argument="--phendat" name="phendata" type="data" format="csv" label="phenotype data" /> | |
163 <param argument="--texpression" name="trexpression" type="float" value="0.5" label="minimal transcript expression to appear in the results"/> | |
164 <conditional name="file_format"> | |
165 <param argument='--format' type="select" label="Output format"> | |
166 <option value="tsv" selected="true">tsv</option> | |
167 <option value="csv">csv</option> | |
168 </param> | |
169 <when value="tsv"/> | |
170 <when value="csv"/> | |
171 </conditional> | |
172 </inputs> | |
173 <outputs> | |
174 <data name="bgo" format="rdata" from_work_dir="ballgown_object.rda" label="${tool.name} on ${on_string}: ballgown_object_R_data_file"/> | |
175 <data name="output" format="csv" from_work_dir="output_transcript.csv" label="${tool.name} on ${on_string}: transcripts_expression_tabular"> | |
176 <filter>file_format['format']=="csv"</filter> | |
177 </data> | |
178 <data name="outputgn" format="csv" from_work_dir="output_genes.csv" label="${tool.name} on ${on_string}: genes_expression_tabular"> | |
179 <filter>file_format['format']=="csv"</filter> | |
180 </data> | |
181 <data name="toutputtranscript" format="tabular" from_work_dir="output_transcript.tsv" label="${tool.name} on ${on_string}: transcripts_expression_tabular"> | |
182 <filter>file_format['format']=="tsv"</filter> | |
183 </data> | |
184 <data name="toutput" format="tabular" from_work_dir="output_genes.tsv" label="${tool.name} on ${on_string}: genes_expression_tabular"> | |
185 <filter>file_format['format']=="tsv"</filter> | |
186 </data> | |
187 </outputs> | |
188 <tests> | |
189 <test> | |
190 <param name="e_data"> | |
191 <collection type="list"> | |
192 <element name="HBR_Rep1" value="HBR_Rep1/e_data.ctab"/> | |
193 <element name="HBR_Rep2" value="HBR_Rep2/e_data.ctab"/> | |
194 <element name="HBR_Rep3" value="HBR_Rep3/e_data.ctab"/> | |
195 <element name="UHR_Rep1" value="UHR_Rep1/e_data.ctab"/> | |
196 <element name="UHR_Rep2" value="UHR_Rep2/e_data.ctab"/> | |
197 <element name="UHR_Rep3" value="UHR_Rep3/e_data.ctab"/> | |
198 </collection> | |
199 </param> | |
200 <param name="i_data"> | |
201 <collection type="list"> | |
202 <element name="HBR_Rep1" value="HBR_Rep1/i_data.ctab"/> | |
203 <element name="HBR_Rep2" value="HBR_Rep2/i_data.ctab"/> | |
204 <element name="HBR_Rep3" value="HBR_Rep3/i_data.ctab"/> | |
205 <element name="UHR_Rep1" value="UHR_Rep1/i_data.ctab"/> | |
206 <element name="UHR_Rep2" value="UHR_Rep2/i_data.ctab"/> | |
207 <element name="UHR_Rep3" value="UHR_Rep3/i_data.ctab"/> | |
208 </collection> | |
209 </param> | |
210 <param name="t_data"> | |
211 <collection type="list"> | |
212 <element name="HBR_Rep1" value="HBR_Rep1/t_data.ctab"/> | |
213 <element name="HBR_Rep2" value="HBR_Rep2/t_data.ctab"/> | |
214 <element name="HBR_Rep3" value="HBR_Rep3/t_data.ctab"/> | |
215 <element name="UHR_Rep1" value="UHR_Rep1/t_data.ctab"/> | |
216 <element name="UHR_Rep2" value="UHR_Rep2/t_data.ctab"/> | |
217 <element name="UHR_Rep3" value="UHR_Rep3/t_data.ctab"/> | |
218 </collection> | |
219 </param> | |
220 <param name="e2t"> | |
221 <collection type="list"> | |
222 <element name="HBR_Rep1" value="HBR_Rep1/e2t.ctab"/> | |
223 <element name="HBR_Rep2" value="HBR_Rep2/e2t.ctab"/> | |
224 <element name="HBR_Rep3" value="HBR_Rep3/e2t.ctab"/> | |
225 <element name="UHR_Rep1" value="UHR_Rep1/e2t.ctab"/> | |
226 <element name="UHR_Rep2" value="UHR_Rep2/e2t.ctab"/> | |
227 <element name="UHR_Rep3" value="UHR_Rep3/e2t.ctab"/> | |
228 </collection> | |
229 </param> | |
230 <param name="i2t"> | |
231 <collection type="list"> | |
232 <element name="HBR_Rep1" value="HBR_Rep1/i2t.ctab"/> | |
233 <element name="HBR_Rep2" value="HBR_Rep2/i2t.ctab"/> | |
234 <element name="HBR_Rep3" value="HBR_Rep3/i2t.ctab"/> | |
235 <element name="UHR_Rep1" value="UHR_Rep1/i2t.ctab"/> | |
236 <element name="UHR_Rep2" value="UHR_Rep2/i2t.ctab"/> | |
237 <element name="UHR_Rep3" value="UHR_Rep3/i2t.ctab"/> | |
238 </collection> | |
239 </param> | |
240 <param name="phendata" value="phendata.csv"/> | |
241 <output name="outputgn" file="genes_expression_tabular.csv"/> | |
242 <output name="output" file="transcripts_expression_tabular.csv"/> | |
243 <output name="bgo" file="ballgown_object_R_data_file.rda"/> | |
244 </test> | |
245 </tests> | |
246 <help><![CDATA[ | |
151 ======================= | 247 ======================= |
152 Ballgown | 248 Ballgown |
153 ======================= | 249 ======================= |
154 ----------------------- | 250 ----------------------- |
155 **What it does** | 251 **What it does** |
191 - **i2t**: Tab file or collection of tab files. Table with two columns, i_id and t_id, denoting which introns belong to which transcripts. These ids match the ids in the i_data and t_data tables. | 287 - **i2t**: Tab file or collection of tab files. Table with two columns, i_id and t_id, denoting which introns belong to which transcripts. These ids match the ids in the i_data and t_data tables. |
192 - samples_names: (optional) Tab file. Table with five columns, one row per sample. Defines which files from the input belong to each sample in the experiment. | 288 - samples_names: (optional) Tab file. Table with five columns, one row per sample. Defines which files from the input belong to each sample in the experiment. |
193 | 289 |
194 .. class:: infomark | 290 .. class:: infomark |
195 | 291 |
196 '''TIP''' *Note* Here's an example of a good phenotype data file for your expirement. | 292 '''TIP''' *Note* Here's an example of a good phenotype data file for your experiment. |
197 | 293 |
198 +--------------+-------------------------+-------------------------+---+ | 294 +--------------+-------------------------+-------------------------+---+ |
199 |ids |experimental variable 1 |experimental variable 2 |...| | 295 |ids |experimental variable 1 |experimental variable 2 |...| |
200 +==============+=========================+=========================+===+ | 296 +==============+=========================+=========================+===+ |
201 |sample 1 |value 1 |value 2 |...| | 297 |sample 1 |value 1 |value 2 |...| |
226 - **Ballgown object** : this is the ballgown object created during the process. This file can be re-used later for further analysis in a R console. | 322 - **Ballgown object** : this is the ballgown object created during the process. This file can be re-used later for further analysis in a R console. |
227 | 323 |
228 ---- | 324 ---- |
229 | 325 |
230 **Authors**: Théo Collard [SLU Global Bioinformatics Centre], Rafael Hernández de Diego [SLU Global Bioinformatics Centre], and Tomas Klingström [SLU Global Bioinformatics Centre] | 326 **Authors**: Théo Collard [SLU Global Bioinformatics Centre], Rafael Hernández de Diego [SLU Global Bioinformatics Centre], and Tomas Klingström [SLU Global Bioinformatics Centre] |
231 | 327 ]]></help> |
232 Sources are available at https://github.com/CollardT/Ballgown-Wrapper | 328 <citations> |
233 | 329 <citation type="doi">doi:10.1038/nprot.2016.095</citation> |
234 </help> | 330 </citations> |
235 </tool> | 331 </tool> |