Mercurial > repos > lparsons > htseq_count
changeset 23:6e5c95760ab1 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq_count commit ee302cfd1ae3f7fcb3def4359e372341a7272790
author | iuc |
---|---|
date | Wed, 21 Sep 2016 10:59:41 -0400 |
parents | 55ed198f2c1c |
children | 620d5603d1a8 |
files | htseq-count.xml test-data/htseq-test_reference.fasta test-data/htseq-test_samout.bam |
diffstat | 3 files changed, 63 insertions(+), 39 deletions(-) [+] |
line wrap: on
line diff
--- a/htseq-count.xml Tue Sep 20 17:51:06 2016 -0400 +++ b/htseq-count.xml Wed Sep 21 10:59:41 2016 -0400 @@ -28,6 +28,7 @@ #set $reference_fasta_filename = str( $samout_conditional.reference_source.ref_file.fields.path ) #end if #end if + #if $force_sort == "True": #if $samfile.extension == 'bam': samtools sort -n "$samfile" "name_sorted_alignment" && @@ -35,14 +36,15 @@ samtools view -Su -t "${reference_fasta_filename}.fai" "$samfile" | samtools sort -n - "name_sorted_alignment" && #end if #end if + htseq-count - --mode=$mode - --stranded=$stranded - --minaqual=$minaqual - --type="$featuretype" - --idattr="$idattr" + --mode=$mode + --stranded=$stranded + --minaqual=$minaqual + --type="$featuretype" + --idattr="$idattr" #if $samout_conditional.samout == "Yes": - --samout=$__new_file_path__/${samoutfile.id}_tmp + --samout='$__new_file_path__/${samoutfile.id}_tmp' #end if #if $force_sort == "True": --order=name @@ -51,12 +53,18 @@ #else --order=pos --format=$samfile.extension - $samfile + '$samfile' #end if - "$gfffile" - | awk '{if ($1 ~ "no_feature|ambiguous|too_low_aQual|not_aligned|alignment_not_unique") print $0 | "cat 1>&2"; else print $0}' > $counts 2>$othercounts + + "$gfffile" | awk '{if ($1 ~ "no_feature|ambiguous|too_low_aQual|not_aligned|alignment_not_unique") print $0 | "cat 1>&2"; else print $0}' + > '$counts' + 2> '$othercounts' + #if $samout_conditional.samout == "Yes": - && samtools view -Su -t "${reference_fasta_filename}.fai" "$__new_file_path__/${samoutfile.id}_tmp" | samtools sort -o - sorted > "$samoutfile" + && samtools view -Su + -t "${reference_fasta_filename}.fai" + "$__new_file_path__/${samoutfile.id}_tmp" + | samtools sort -o - name_sorted_alignment > "$samoutfile" #end if ]]> </command> @@ -105,7 +113,7 @@ <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> </param> </when> - <when value="history"> <!-- FIX ME!!!! --> + <when value="history"> <param name="ref_file" type="data" format="fasta" label="Using reference file" /> </when> </conditional> @@ -120,7 +128,11 @@ </inputs> <outputs> - <data format="tabular" name="counts" metadata_source="samfile" label="${tool.name} on ${on_string}"/> + <data format="tabular" name="counts" metadata_source="samfile" label="${tool.name} on ${on_string}"> + <actions> + <action name="column_names" type="metadata" default="Geneid,${samfile.name}" /> + </actions> + </data> <data format="tabular" name="othercounts" metadata_source="samfile" label="${tool.name} on ${on_string} (no feature)"/> <data format="bam" name="samoutfile" metadata_source="samfile" label="${tool.name} on ${on_string} (BAM)"> <filter>samout_conditional['samout'] == "Yes"</filter> @@ -167,19 +179,17 @@ <output name="counts" file="htseq-test-paired_counts.tsv" /> <output name="othercounts" file="htseq-test-paired_othercounts.tsv" /> </test> - - <!-- Seems to be an issue setting the $reference_fasta_filename variable during test + <test> <param name="samfile" value="htseq-test.sam" /> <param name="gfffile" value="htseq-test.gff" /> - <param name="samout" value="True" /> + <param name="samout" value="Yes" /> <param name="reference_source_selector" value="history" /> <param name="ref_file" value="htseq-test_reference.fasta" /> <output name="counts" file="htseq-test_counts.tsv" /> <output name="othercounts" file="htseq-test_othercounts.tsv" /> <output name="samoutfile" file="htseq-test_samout.bam" /> </test> - --> </tests> <help> @@ -277,28 +287,6 @@ </help> <citations> - <citation type="bibtex"> -@article{anders_htseqpython_2015, - title = {{HTSeq}—a {Python} framework to work with high-throughput sequencing data}, - volume = {31}, - issn = {1367-4803, 1460-2059}, - url = {http://bioinformatics.oxfordjournals.org/content/31/2/166}, - doi = {10.1093/bioinformatics/btu638}, - abstract = {Motivation: A large choice of tools exists for many standard tasks in the analysis of high-throughput sequencing (HTS) data. However, once a project deviates from standard workflows, custom scripts are needed. -Results: We present HTSeq, a Python library to facilitate the rapid development of such scripts. HTSeq offers parsers for many common data formats in HTS projects, as well as classes to represent data, such as genomic coordinates, sequences, sequencing reads, alignments, gene model information and variant calls, and provides data structures that allow for querying via genomic coordinates. We also present htseq-count, a tool developed with HTSeq that preprocesses RNA-Seq data for differential expression analysis by counting the overlap of reads with genes. -Availability and implementation: HTSeq is released as an open-source software under the GNU General Public Licence and available from http://www-huber.embl.de/HTSeq or from the Python Package Index at https://pypi.python.org/pypi/HTSeq. -Contact: sanders\{at\}fs.tum.de}, - language = {en}, - number = {2}, - urldate = {2015-04-21}, - journal = {Bioinformatics}, - author = {Anders, Simon and Pyl, Paul Theodor and Huber, Wolfgang}, - month = jan, - year = {2015}, - pmid = {25260700}, - pages = {166--169}, - } -} - </citation> + <citation type="doi">10.1093/bioinformatics/btu638</citation> </citations> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/htseq-test_reference.fasta Wed Sep 21 10:59:41 2016 -0400 @@ -0,0 +1,36 @@ +>2-micron +actg +>MT +actg +>I +actg +>VI +actg +>III +actg +>IX +actg +>VIII +actg +>V +actg +>XI +actg +>X +actg +>XIV +actg +>II +actg +>XIII +actg +>XVI +actg +>XII +actg +>VII +actg +>XV +actg +>IV +actg