Mercurial > repos > lparsons > htseq_count
diff htseq-count.xml @ 10:5d969cb56112
Version 0.3 - paried-end sorting is now built-in (uses Picard tools)
author | lparsons |
---|---|
date | Fri, 07 Dec 2012 14:35:44 -0500 |
parents | 971e20519fb8 |
children | f320093f1e8e |
line wrap: on
line diff
--- a/htseq-count.xml Fri Oct 26 15:57:08 2012 -0400 +++ b/htseq-count.xml Fri Dec 07 14:35:44 2012 -0500 @@ -1,10 +1,11 @@ -<tool id="htseq_count" name="htseq-count" version="0.2.1"> +<tool id="htseq_count" name="htseq-count" version="0.3"> <description> - Count aligned reads in a BAM file that overlap features in a GFF file</description> <version_command>htseq-count -h | grep version | sed 's/^\(.*\)*\(version .*\)\./\2/'</version_command> <requirements> <requirement type="package" version="1.6.2">numpy</requirement> <requirement type="package" version="0.5.3p9">htseq</requirement> <requirement type="package" version="0.1.18">samtools</requirement> + <requirement type="package" version="1.56.0">picard</requirement> </requirements> <command> ##set up input files @@ -17,9 +18,16 @@ #set $reference_fasta_filename = str( $samout_conditional.reference_source.ref_file.fields.path ) #end if #end if - - #if $samfile.extension == "bam": - samtools view $samfile | + #if str($singlepaired) == "paired": + ln -s $samfile local_input.sam && + java -Xmx2G -jar "\$JAVA_JAR_PATH/SortSam.jar" VALIDATION_STRINGENCY=LENIENT SORT_ORDER=queryname O=prepared_input.sam I=local_input.sam TMP_DIR="${__new_file_path__}" + || echo "Error running Picard MergeSamFiles" >&2 && + #else: + #if $samfile.extension == "bam": + samtools view $samfile | + #else + ln -s $samfile prepared_input.sam && + #end if #end if htseq-count --mode=$mode @@ -30,19 +38,26 @@ #if $samout_conditional.samout: --samout=$__new_file_path__/${samoutfile.id}_tmp #end if - #if $samfile.extension == "bam": - - - #else - $samfile - #end if + #if str($singlepaired) == "paired": + prepared_input.sam + #else: + #if $samfile.extension == "bam": + - + #else: + prepared_input.sam + #end if + #end if $gfffile | awk '{if ($1 ~ "no_feature|ambiguous|too_low_aQual|not_aligned|alignment_not_unique") print $0 | "cat 1>&2"; else print $0}' > $counts 2>$othercounts #if $samout_conditional.samout: && samtools view -Su -t ${reference_fasta_filename}.fai $__new_file_path__/${samoutfile.id}_tmp | samtools sort -o - sorted > $samoutfile #end if</command> <inputs> - <param format="sam, bam" name="samfile" type="data" label="Aligned SAM/BAM File"> - <help>Paired-End data MUST be sorted by QUERY NAME, use "NGS: Picard - Paired Read Mate Fixer" to sort by QUERY NAME and output to SAM (not BAM) before using this tool on paired data.</help> + <param format="sam, bam" name="samfile" type="data" label="Aligned SAM/BAM File"/> + <param name="singlepaired" type="select" label="Is this library mate-paired?"> + <help>Paired libraries will be sorted by read name prior to counting.</help> + <option value="single" selected="true">single-end</option> + <option value="paired">paired-end</option> </param> <param format="gff" name="gfffile" type="data" label="GFF File"/> <param name="mode" type="select" label="Mode"> @@ -93,9 +108,9 @@ </inputs> <outputs> - <data format="tabular" name="counts" label="${tool.name} on ${on_string}"/> - <data format="tabular" name="othercounts" label="${tool.name} on ${on_string} (no feature)"/> - <data format="bam" name="samoutfile" label="${tool.name} on ${on_string} (BAM)"> + <data format="tabular" name="counts" metadata_source="samfile" label="${tool.name} on ${on_string}"/> + <data format="tabular" name="othercounts" metadata_source="samfile" label="${tool.name} on ${on_string} (no feature)"/> + <data format="bam" name="samoutfile" metadata_source="samfile" label="${tool.name} on ${on_string} (BAM)"> <filter>samout_conditional['samout']</filter> </data> </outputs> @@ -107,6 +122,7 @@ <regex match="Error: Feature (.+) does not contain a '(.+)' attribute" source="both" level="fatal" description="Error parsing the GFF file, at least one feature of the specified 'Feature type' does not have a value for the specified 'ID Attribute'" /> <regex match="Error occured in line (\d+) of file" source="stderr" level="fatal" description="Unknown error parsing the GFF file" /> <regex match="Error" source="stderr" level="fatal" description="Unknown error occured" /> + <regex match="Warning: Read (.+) claims to have an aligned mate which could not be found. \(Is the SAM file properly sorted\?\)" source="stderr" level="warning" description="PAIRED DATA MISSING OR NOT PROPERLY SORTED. Try reruning and selecting the paired-end option. See stderr output of this dataset for more information." /> </stdio> <tests> @@ -124,6 +140,14 @@ <output name="counts" file="htseq-test_counts.tsv" /> <output name="othercounts" file="htseq-test_othercounts.tsv" /> </test> + <test> + <param name="samfile" value="htseq-test-paired.bam" /> + <param name="singlepaired" value="paired" /> + <param name="gfffile" value="htseq-test.gff" /> + <param name="samout" value="False" /> + <output name="counts" file="htseq-test-paired_counts.tsv" /> + <output name="othercounts" file="htseq-test-paired_othercounts.tsv" /> + </test> <!-- Seems to be an issue setting the $reference_fasta_filename variable during test <test> <param name="samfile" value="htseq-test.sam" />