comparison htseq-count.xml @ 23:6e5c95760ab1 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq_count commit ee302cfd1ae3f7fcb3def4359e372341a7272790
author iuc
date Wed, 21 Sep 2016 10:59:41 -0400
parents 55ed198f2c1c
children 620d5603d1a8
comparison
equal deleted inserted replaced
22:55ed198f2c1c 23:6e5c95760ab1
26 samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for htseq-count" >&2 && 26 samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for htseq-count" >&2 &&
27 #else: 27 #else:
28 #set $reference_fasta_filename = str( $samout_conditional.reference_source.ref_file.fields.path ) 28 #set $reference_fasta_filename = str( $samout_conditional.reference_source.ref_file.fields.path )
29 #end if 29 #end if
30 #end if 30 #end if
31
31 #if $force_sort == "True": 32 #if $force_sort == "True":
32 #if $samfile.extension == 'bam': 33 #if $samfile.extension == 'bam':
33 samtools sort -n "$samfile" "name_sorted_alignment" && 34 samtools sort -n "$samfile" "name_sorted_alignment" &&
34 #else 35 #else
35 samtools view -Su -t "${reference_fasta_filename}.fai" "$samfile" | samtools sort -n - "name_sorted_alignment" && 36 samtools view -Su -t "${reference_fasta_filename}.fai" "$samfile" | samtools sort -n - "name_sorted_alignment" &&
36 #end if 37 #end if
37 #end if 38 #end if
39
38 htseq-count 40 htseq-count
39 --mode=$mode 41 --mode=$mode
40 --stranded=$stranded 42 --stranded=$stranded
41 --minaqual=$minaqual 43 --minaqual=$minaqual
42 --type="$featuretype" 44 --type="$featuretype"
43 --idattr="$idattr" 45 --idattr="$idattr"
44 #if $samout_conditional.samout == "Yes": 46 #if $samout_conditional.samout == "Yes":
45 --samout=$__new_file_path__/${samoutfile.id}_tmp 47 --samout='$__new_file_path__/${samoutfile.id}_tmp'
46 #end if 48 #end if
47 #if $force_sort == "True": 49 #if $force_sort == "True":
48 --order=name 50 --order=name
49 --format=bam 51 --format=bam
50 name_sorted_alignment.bam 52 name_sorted_alignment.bam
51 #else 53 #else
52 --order=pos 54 --order=pos
53 --format=$samfile.extension 55 --format=$samfile.extension
54 $samfile 56 '$samfile'
55 #end if 57 #end if
56 "$gfffile" 58
57 | awk '{if ($1 ~ "no_feature|ambiguous|too_low_aQual|not_aligned|alignment_not_unique") print $0 | "cat 1>&2"; else print $0}' > $counts 2>$othercounts 59 "$gfffile" | awk '{if ($1 ~ "no_feature|ambiguous|too_low_aQual|not_aligned|alignment_not_unique") print $0 | "cat 1>&2"; else print $0}'
60 > '$counts'
61 2> '$othercounts'
62
58 #if $samout_conditional.samout == "Yes": 63 #if $samout_conditional.samout == "Yes":
59 && samtools view -Su -t "${reference_fasta_filename}.fai" "$__new_file_path__/${samoutfile.id}_tmp" | samtools sort -o - sorted > "$samoutfile" 64 && samtools view -Su
65 -t "${reference_fasta_filename}.fai"
66 "$__new_file_path__/${samoutfile.id}_tmp"
67 | samtools sort -o - name_sorted_alignment > "$samoutfile"
60 #end if 68 #end if
61 ]]> 69 ]]>
62 </command> 70 </command>
63 71
64 <inputs> 72 <inputs>
103 <filter type="data_meta" key="dbkey" ref="samfile" column="1"/> 111 <filter type="data_meta" key="dbkey" ref="samfile" column="1"/>
104 </options> 112 </options>
105 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> 113 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
106 </param> 114 </param>
107 </when> 115 </when>
108 <when value="history"> <!-- FIX ME!!!! --> 116 <when value="history">
109 <param name="ref_file" type="data" format="fasta" label="Using reference file" /> 117 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
110 </when> 118 </when>
111 </conditional> 119 </conditional>
112 </when> 120 </when>
113 <when value="No"> 121 <when value="No">
118 <help>This option can be used for for paired-end data that has many unmapped mates. Use this if you get the warning about paired end data missing or not being properly sorted.</help> 126 <help>This option can be used for for paired-end data that has many unmapped mates. Use this if you get the warning about paired end data missing or not being properly sorted.</help>
119 </param> 127 </param>
120 </inputs> 128 </inputs>
121 129
122 <outputs> 130 <outputs>
123 <data format="tabular" name="counts" metadata_source="samfile" label="${tool.name} on ${on_string}"/> 131 <data format="tabular" name="counts" metadata_source="samfile" label="${tool.name} on ${on_string}">
132 <actions>
133 <action name="column_names" type="metadata" default="Geneid,${samfile.name}" />
134 </actions>
135 </data>
124 <data format="tabular" name="othercounts" metadata_source="samfile" label="${tool.name} on ${on_string} (no feature)"/> 136 <data format="tabular" name="othercounts" metadata_source="samfile" label="${tool.name} on ${on_string} (no feature)"/>
125 <data format="bam" name="samoutfile" metadata_source="samfile" label="${tool.name} on ${on_string} (BAM)"> 137 <data format="bam" name="samoutfile" metadata_source="samfile" label="${tool.name} on ${on_string} (BAM)">
126 <filter>samout_conditional['samout'] == "Yes"</filter> 138 <filter>samout_conditional['samout'] == "Yes"</filter>
127 </data> 139 </data>
128 </outputs> 140 </outputs>
165 <param name="samout" value="No" /> 177 <param name="samout" value="No" />
166 <param name="force_sort" value="True" /> 178 <param name="force_sort" value="True" />
167 <output name="counts" file="htseq-test-paired_counts.tsv" /> 179 <output name="counts" file="htseq-test-paired_counts.tsv" />
168 <output name="othercounts" file="htseq-test-paired_othercounts.tsv" /> 180 <output name="othercounts" file="htseq-test-paired_othercounts.tsv" />
169 </test> 181 </test>
170 182
171 <!-- Seems to be an issue setting the $reference_fasta_filename variable during test
172 <test> 183 <test>
173 <param name="samfile" value="htseq-test.sam" /> 184 <param name="samfile" value="htseq-test.sam" />
174 <param name="gfffile" value="htseq-test.gff" /> 185 <param name="gfffile" value="htseq-test.gff" />
175 <param name="samout" value="True" /> 186 <param name="samout" value="Yes" />
176 <param name="reference_source_selector" value="history" /> 187 <param name="reference_source_selector" value="history" />
177 <param name="ref_file" value="htseq-test_reference.fasta" /> 188 <param name="ref_file" value="htseq-test_reference.fasta" />
178 <output name="counts" file="htseq-test_counts.tsv" /> 189 <output name="counts" file="htseq-test_counts.tsv" />
179 <output name="othercounts" file="htseq-test_othercounts.tsv" /> 190 <output name="othercounts" file="htseq-test_othercounts.tsv" />
180 <output name="samoutfile" file="htseq-test_samout.bam" /> 191 <output name="samoutfile" file="htseq-test_samout.bam" />
181 </test> 192 </test>
182 -->
183 </tests> 193 </tests>
184 194
185 <help> 195 <help>
186 <![CDATA[ 196 <![CDATA[
187 Overview 197 Overview
275 Public License v3. Part of the 'HTSeq' framework. 285 Public License v3. Part of the 'HTSeq' framework.
276 ]]> 286 ]]>
277 </help> 287 </help>
278 288
279 <citations> 289 <citations>
280 <citation type="bibtex"> 290 <citation type="doi">10.1093/bioinformatics/btu638</citation>
281 @article{anders_htseqpython_2015,
282 title = {{HTSeq}—a {Python} framework to work with high-throughput sequencing data},
283 volume = {31},
284 issn = {1367-4803, 1460-2059},
285 url = {http://bioinformatics.oxfordjournals.org/content/31/2/166},
286 doi = {10.1093/bioinformatics/btu638},
287 abstract = {Motivation: A large choice of tools exists for many standard tasks in the analysis of high-throughput sequencing (HTS) data. However, once a project deviates from standard workflows, custom scripts are needed.
288 Results: We present HTSeq, a Python library to facilitate the rapid development of such scripts. HTSeq offers parsers for many common data formats in HTS projects, as well as classes to represent data, such as genomic coordinates, sequences, sequencing reads, alignments, gene model information and variant calls, and provides data structures that allow for querying via genomic coordinates. We also present htseq-count, a tool developed with HTSeq that preprocesses RNA-Seq data for differential expression analysis by counting the overlap of reads with genes.
289 Availability and implementation: HTSeq is released as an open-source software under the GNU General Public Licence and available from http://www-huber.embl.de/HTSeq or from the Python Package Index at https://pypi.python.org/pypi/HTSeq.
290 Contact: sanders\{at\}fs.tum.de},
291 language = {en},
292 number = {2},
293 urldate = {2015-04-21},
294 journal = {Bioinformatics},
295 author = {Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang},
296 month = jan,
297 year = {2015},
298 pmid = {25260700},
299 pages = {166--169},
300 }
301 }
302 </citation>
303 </citations> 291 </citations>
304 </tool> 292 </tool>