annotate ballgown.xml @ 17:05977e96375b draft default tip

Uploaded
author theo.collard
date Tue, 03 Oct 2017 09:25:51 -0400
parents fa62657e9b57
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
17
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
1 <tool id="ballgown" name="Ballgown" version="2.2.0" workflow_compatible="true">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
2 <description>Flexible, isoform-level differential expression analysis</description>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
3 <requirements>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
4 <requirement type="package" version="2.2.0">bioconductor-ballgown</requirement>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
5 <requirement type="package" version="0.5.0">r-dplyr</requirement>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
6 <requirement type="package" version="1.3.2">r-optparse</requirement>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
7 </requirements>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
8 <command detect_errors="aggressive"><![CDATA[
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
9 ##------------------------------------------------------------------------------------
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
10 ## This function reads the input file with the mapping between samples and files
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
11 ## E.g. of result:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
12 ## mapping = {
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
13 ## "e2t.ctab" : "sample1",
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
14 ## "other.ctab" : "sample2",
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
15 ## "i2t.ctab" : "sample1",
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
16 ## "t_data.ctab": "sample1"
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
17 ## ...
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
18 ## }
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
19 ##------------------------------------------------------------------------------------
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
20 #def read_sample_mapping_file(sample_mapping_file):
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
21 #try
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
22 #set mapping = {}
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
23 #set file = open($sample_mapping_file.dataset.dataset.get_file_name(),'r')
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
24 #for $line in $file:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
25 #set content= $line.strip().split('\t')
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
26 #for $map in $content:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
27 #set mapping[$map]= $content[0]
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
28 #end for
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
29 #end for
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
30 #return $mapping
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
31 #except
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
32 #return None
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
33 #end try
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
34 #end def
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
35
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
36 ##------------------------------------------------------------------------------------
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
37 ## This function returns the name of the sample associated to a given file
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
38 ##------------------------------------------------------------------------------------
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
39 #def get_sample_name($dataset, $sample_mapping):
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
40 ##If the file with samples mapping was provided
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
41 #if $sample_mapping != None:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
42 #return $sample_mapping.get($dataset.name, None)
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
43 ##Otherwise with extract the sample name from the filename
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
44 #else:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
45 #return str($dataset.element_identifier)
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
46 #end if
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
47 #end def
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
48
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
49 ##------------------------------------------------------------------------------------
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
50 ## This function reads a dataset or list of datasets and sets the corresponding value
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
51 ## in the $result variable
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
52 ## e.g. of result
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
53 ##'sample1' : {
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
54 ## 'e_data': '/export/galaxy-central/database/files/000/dataset_13.dat'
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
55 ## 'i_data': '/export/galaxy-central/database/files/000/dataset_10.dat',
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
56 ## 't_data': '/export/galaxy-central/database/files/000/dataset_12.dat',
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
57 ## 'e2t': '/export/galaxy-central/database/files/000/dataset_9.dat',
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
58 ## 'i2t': '/export/galaxy-central/database/files/000/dataset_11.dat'
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
59 ## },
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
60 ##------------------------------------------------------------------------------------
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
61 #def read_input_files($param_name, $param_value, $result, $sample_mapping, $create_if_empty):
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
62 ## If input is a data collection
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
63 #if isinstance($param_value, list):
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
64 ## For each dataset
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
65 #for $dataset in $param_value:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
66 ## Get the sample name
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
67 #set sample_name = $get_sample_name($dataset, $sample_mapping)
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
68 ## Check if sample is already registered
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
69 #if not($result.has_key($sample_name)):
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
70 #if ($create_if_empty == True):
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
71 #set result[$sample_name] = {}
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
72 #else:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
73 #raise ValueError("Error in input. Please check that input contains all the required files for sample " + $sample_name)
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
74 #end if
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
75 #end if
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
76 ## Register the file to the sample
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
77 #set result[$sample_name][$param_name] = str($dataset.dataset.dataset.get_file_name())
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
78 #end for
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
79 #else:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
80 #if not($result.has_key("sample_1")):
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
81 #set result["sample_1"] = {}
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
82 #end if
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
83 #set result["sample_1"][$param_name] = str($param_name.dataset.dataset.get_file_name())
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
84 #end if
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
85 #return $result
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
86 #end def
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
87
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
88 ##------------------------------------------------------------------------------------
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
89 ## Main body of the tool
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
90 ##------------------------------------------------------------------------------------
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
91 ## Set the params for the next R script
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
92 #set result={}
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
93 #set sample_mapping=None
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
94
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
95 ## If the samples mapping file was provided, parse the content
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
96 #if $samples_names != None and not(isinstance($samples_names, list) and (None in $samples_names)):
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
97 #set sample_mapping = $read_sample_mapping_file($samples_names)
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
98 #end if
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
99
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
100 ## READ THE CONTENT FOR e_data AND STORE THE FILES
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
101 ## INDEXED BY THEIR SAMPLE NAME
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
102 ## e.g. 'HBR_Rep1' : {
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
103 ## 'e_data': '/export/galaxy-central/database/files/000/dataset_13.dat'
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
104 ## 'i_data': '/export/galaxy-central/database/files/000/dataset_10.dat',
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
105 ## 't_data': '/export/galaxy-central/database/files/000/dataset_12.dat',
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
106 ## 'e2t': '/export/galaxy-central/database/files/000/dataset_9.dat',
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
107 ## 'i2t': '/export/galaxy-central/database/files/000/dataset_11.dat'
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
108 ## },
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
109 ## 'HBR_Rep2' : {...}
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
110 #set $result = $read_input_files("e_data.ctab", $e_data, $result, $sample_mapping, True)
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
111 #set $result = $read_input_files("i_data.ctab", $i_data, $result, $sample_mapping, False)
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
112 #set $result = $read_input_files("t_data.ctab", $t_data, $result, $sample_mapping, False)
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
113 #set $result = $read_input_files("e2t.ctab", $e2t, $result, $sample_mapping, False)
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
114 #set $result = $read_input_files("i2t.ctab", $i2t, $result, $sample_mapping, False)
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
115
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
116 ## For each input sample, create a directory and link the input files for ballgown
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
117 #import os
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
118 #set n_sample = 1
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
119 #for $key, $value in $result.iteritems():
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
120 #if str($file_format.format) == 'tsv':
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
121 #set dir_name = str($toutput.files_path) + '/' + $key + '/'
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
122 #else:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
123 #set dir_name = str($output.files_path) + '/' + $key + '/'
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
124 #end if
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
125 $os.makedirs($dir_name)
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
126 #for $file_name, $file_path in $value.iteritems():
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
127 $os.symlink($file_path, $dir_name + $file_name)
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
128 #end for
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
129 #set n_sample = $n_sample + 1
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
130 #end for
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
131
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
132 ## Run the R script with the location of the linked files and the name for outpot file
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
133
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
134 Rscript '$__tool_directory__/ballgown.R' --texpression $trexpression --phendat '$phendata' --bgout '$bgo' -f '$file_format.format'
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
135 #if str($file_format.format) == 'tsv':
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
136 --tsvoutputtranscript $toutputtranscript
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
137 --tsvoutputgenes $toutput
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
138 --directory $toutput.files_path
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
139 #else:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
140 --outputtranscript $output
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
141 --outputgenes $outputgn
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
142 --directory $output.files_path
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
143 #end if
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
144 ]]></command>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
145 <inputs>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
146 <param name="e_data" type="data_collection" collection_type="list" format="tabular" label="Exon-level expression measurements"
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
147 help="One row per exon. See below for more details."/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
148 <param name="i_data" type="data_collection" collection_type="list" format="tabular"
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
149 label="Intron- (i.e., junction-) level expression measurements"
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
150 help="One row per intron. See below for more details."/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
151 <param name="t_data" type="data_collection" collection_type="list" format="tabular"
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
152 label="Transcript-level expression measurements" help="One row per transcript. See below for more details."/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
153 <param name="e2t" type="data_collection" collection_type="list" format="tabular"
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
154 label="Exons-transcripts mapping"
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
155 help="Table with two columns, e_id and t_id, denoting which exons belong to which transcripts. See below for more details."/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
156 <param name="i2t" type="data_collection" collection_type="list" format="tabular"
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
157 label="Introns-transcripts mapping"
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
158 help="Table with two columns, i_id and t_id, denoting which introns belong to which transcripts. See below for more details."/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
159 <param name="samples_names" type="data" optional="true" multiple="false" format="tabular"
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
160 label="File names for samples"
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
161 help="Optional. Use in case that the names for the analysed samples cannot be extracted from the filenames."/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
162 <param argument="--phendat" name="phendata" type="data" format="csv" label="phenotype data" />
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
163 <param argument="--texpression" name="trexpression" type="float" value="0.5" label="minimal transcript expression to appear in the results"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
164 <conditional name="file_format">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
165 <param argument='--format' type="select" label="Output format">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
166 <option value="tsv" selected="true">tsv</option>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
167 <option value="csv">csv</option>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
168 </param>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
169 <when value="tsv"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
170 <when value="csv"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
171 </conditional>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
172 </inputs>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
173 <outputs>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
174 <data name="bgo" format="rdata" from_work_dir="ballgown_object.rda" label="${tool.name} on ${on_string}: ballgown_object_R_data_file"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
175 <data name="output" format="csv" from_work_dir="output_transcript.csv" label="${tool.name} on ${on_string}: transcripts_expression_tabular">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
176 <filter>file_format['format']=="csv"</filter>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
177 </data>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
178 <data name="outputgn" format="csv" from_work_dir="output_genes.csv" label="${tool.name} on ${on_string}: genes_expression_tabular">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
179 <filter>file_format['format']=="csv"</filter>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
180 </data>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
181 <data name="toutputtranscript" format="tabular" from_work_dir="output_transcript.tsv" label="${tool.name} on ${on_string}: transcripts_expression_tabular">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
182 <filter>file_format['format']=="tsv"</filter>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
183 </data>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
184 <data name="toutput" format="tabular" from_work_dir="output_genes.tsv" label="${tool.name} on ${on_string}: genes_expression_tabular">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
185 <filter>file_format['format']=="tsv"</filter>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
186 </data>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
187 </outputs>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
188 <tests>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
189 <test>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
190 <param name="e_data">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
191 <collection type="list">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
192 <element name="HBR_Rep1" value="HBR_Rep1/e_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
193 <element name="HBR_Rep2" value="HBR_Rep2/e_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
194 <element name="HBR_Rep3" value="HBR_Rep3/e_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
195 <element name="UHR_Rep1" value="UHR_Rep1/e_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
196 <element name="UHR_Rep2" value="UHR_Rep2/e_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
197 <element name="UHR_Rep3" value="UHR_Rep3/e_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
198 </collection>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
199 </param>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
200 <param name="i_data">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
201 <collection type="list">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
202 <element name="HBR_Rep1" value="HBR_Rep1/i_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
203 <element name="HBR_Rep2" value="HBR_Rep2/i_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
204 <element name="HBR_Rep3" value="HBR_Rep3/i_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
205 <element name="UHR_Rep1" value="UHR_Rep1/i_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
206 <element name="UHR_Rep2" value="UHR_Rep2/i_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
207 <element name="UHR_Rep3" value="UHR_Rep3/i_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
208 </collection>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
209 </param>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
210 <param name="t_data">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
211 <collection type="list">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
212 <element name="HBR_Rep1" value="HBR_Rep1/t_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
213 <element name="HBR_Rep2" value="HBR_Rep2/t_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
214 <element name="HBR_Rep3" value="HBR_Rep3/t_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
215 <element name="UHR_Rep1" value="UHR_Rep1/t_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
216 <element name="UHR_Rep2" value="UHR_Rep2/t_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
217 <element name="UHR_Rep3" value="UHR_Rep3/t_data.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
218 </collection>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
219 </param>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
220 <param name="e2t">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
221 <collection type="list">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
222 <element name="HBR_Rep1" value="HBR_Rep1/e2t.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
223 <element name="HBR_Rep2" value="HBR_Rep2/e2t.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
224 <element name="HBR_Rep3" value="HBR_Rep3/e2t.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
225 <element name="UHR_Rep1" value="UHR_Rep1/e2t.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
226 <element name="UHR_Rep2" value="UHR_Rep2/e2t.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
227 <element name="UHR_Rep3" value="UHR_Rep3/e2t.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
228 </collection>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
229 </param>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
230 <param name="i2t">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
231 <collection type="list">
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
232 <element name="HBR_Rep1" value="HBR_Rep1/i2t.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
233 <element name="HBR_Rep2" value="HBR_Rep2/i2t.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
234 <element name="HBR_Rep3" value="HBR_Rep3/i2t.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
235 <element name="UHR_Rep1" value="UHR_Rep1/i2t.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
236 <element name="UHR_Rep2" value="UHR_Rep2/i2t.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
237 <element name="UHR_Rep3" value="UHR_Rep3/i2t.ctab"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
238 </collection>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
239 </param>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
240 <param name="phendata" value="phendata.csv"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
241 <output name="outputgn" file="genes_expression_tabular.csv"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
242 <output name="output" file="transcripts_expression_tabular.csv"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
243 <output name="bgo" file="ballgown_object_R_data_file.rda"/>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
244 </test>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
245 </tests>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
246 <help><![CDATA[
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
247 =======================
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
248 Ballgown
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
249 =======================
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
250 -----------------------
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
251 **What it does**
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
252 -----------------------
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
253
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
254 Ballgown is a software package designed to facilitate flexible differential expression analysis of RNA-seq data.
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
255 The Ballgown package provides functions to organize, visualize, and analyze the expression measurements for your transcriptome assembly.
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
256
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
257 ----
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
258
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
259 -----------------------
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
260 **How to use**
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
261 -----------------------
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
262 The input for this tools consists on 5 files for each sample in your experiment:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
263
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
264 - **e_data**: exon-level expression measurements. Tab file or collection of tab files. One row per exon. Columns are e_id (numeric exon id), chr, strand, start, end (genomic location of the exon), and the following expression measurements for each sample:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
265 * rcount: reads overlapping the exon
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
266 * ucount: uniquely mapped reads overlapping the exon
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
267 * mrcount: multi-map-corrected number of reads overlapping the exon
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
268 * cov average per-base read coverage
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
269 * cov_sd: standard deviation of per-base read coverage
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
270 * mcov: multi-map-corrected average per-base read coverage
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
271 * mcov_sd: standard deviation of multi-map-corrected per-base coverage
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
272 - **i_data**: intron- (i.e., junction-) level expression measurements. Tab file or collection of tab files. One row per intron. Columns are i_id (numeric intron id), chr, strand, start, end (genomic location of the intron), and the following expression measurements for each sample:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
273 * rcount: number of reads supporting the intron
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
274 * ucount: number of uniquely mapped reads supporting the intron
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
275 * mrcount: multi-map-corrected number of reads supporting the intron
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
276 - **t_data**: transcript-level expression measurements. Tab file or collection of tab files. One row per transcript. Columns are:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
277 * t_id: numeric transcript id
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
278 * chr, strand, start, end: genomic location of the transcript
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
279 * t_name: Cufflinks-generated transcript id
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
280 * num_exons: number of exons comprising the transcript
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
281 * length: transcript length, including both exons and introns
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
282 * gene_id: gene the transcript belongs to
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
283 * gene_name: HUGO gene name for the transcript, if known
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
284 * cov: per-base coverage for the transcript (available for each sample)
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
285 * FPKM: Cufflinks-estimated FPKM for the transcript (available for each sample)
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
286 - **e2t**: Tab file or collection of tab files. Table with two columns, e_id and t_id, denoting which exons belong to which transcripts. These ids match the ids in the e_data and t_data tables.
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
287 - **i2t**: Tab file or collection of tab files. Table with two columns, i_id and t_id, denoting which introns belong to which transcripts. These ids match the ids in the i_data and t_data tables.
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
288 - samples_names: (optional) Tab file. Table with five columns, one row per sample. Defines which files from the input belong to each sample in the experiment.
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
289
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
290 .. class:: infomark
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
291
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
292 '''TIP''' *Note* Here's an example of a good phenotype data file for your experiment.
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
293
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
294 +--------------+-------------------------+-------------------------+---+
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
295 |ids |experimental variable 1 |experimental variable 2 |...|
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
296 +==============+=========================+=========================+===+
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
297 |sample 1 |value 1 |value 2 |...|
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
298 +--------------+-------------------------+-------------------------+---+
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
299 |sample 2 |value 2 |value 1 |...|
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
300 +--------------+-------------------------+-------------------------+---+
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
301 |sample 3 |value 1 |value 2 |...|
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
302 +--------------+-------------------------+-------------------------+---+
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
303 |sample 4 |value 2 |value 1 |...|
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
304 +--------------+-------------------------+-------------------------+---+
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
305 |... |value 1 |value 2 |...|
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
306 +--------------+-------------------------+-------------------------+---+
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
307
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
308
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
309 .. class:: infomark
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
310
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
311 *Note* The minimal transcript expression is a number used to filter the transcripts that
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
312 are less or not expressed in our samples when compared to the genome
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
313
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
314 -----------------------
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
315 **Outputs**
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
316 -----------------------
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
317
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
318 This tool has 3 outputs:
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
319
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
320 - **transcripts expression** : this is a csv file containing all the transcripts that are expressed above the transcripts expression value
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
321 - **genes expression** : this is a csv file containing all the genes that are expressed above the transcripts expression value
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
322 - **Ballgown object** : this is the ballgown object created during the process. This file can be re-used later for further analysis in a R console.
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
323
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
324 ----
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
325
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
326 **Authors**: Théo Collard [SLU Global Bioinformatics Centre], Rafael Hernández de Diego [SLU Global Bioinformatics Centre], and Tomas Klingström [SLU Global Bioinformatics Centre]
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
327 ]]></help>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
328 <citations>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
329 <citation type="doi">doi:10.1038/nprot.2016.095</citation>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
330 </citations>
05977e96375b Uploaded
theo.collard
parents: 1
diff changeset
331 </tool>