view assembly_stats_txt.xml @ 1:7556309ffbaf draft default tip

"planemo upload for repository https://github.com/phac-nml/galaxy_tools commit fb4c29f720748f46ff501140f2cd306bab6614f9"
author nml
date Fri, 29 May 2020 13:51:50 -0400
parents ad2b274663f8
children
line wrap: on
line source

<tool id="assemblystats" name="assemblystats" version="1.1.0">
	<description>Summarise an assembly (e.g. N50 metrics)</description>
	<requirements>
		<requirement type="package" version="1.7.2">perl-bioperl</requirement>
		<requirement type="package" version="5.2.7">gnuplot</requirement>
		<requirement type="package" version="3.7.6">python</requirement>
	</requirements>
	<command detect_errors="exit_code"><![CDATA[
		python3 $__tool_directory__/assembly_stats_txt.py 

		-d '$stats.extra_files_path'
		-t '$type'
		-i '$input'
		-s '$stats'
		-sc '$sortedcontigs'
		-hpng '$histogrampng' 
		-spng '$summedcontigspng'
		-hd '$histogramdata' 
		-scd '$summedcontigdata' 
		
		#if $bucket
			-b
		#end if

		]]></command>
		<inputs>
			<param label="Type of read" name="type" type="select" help="Is this from an genomic (contig) or transcriptomic assembly (isotig) or are these raw reads (read)">
				<option value="contig" selected="yes">Contig (if from genomic assembly)</option>
				<option value="isotig">Isotig (if from transcriptomic assembly)</option>
				<option value="read">Raw reads from sequencer in FASTA format (useful for 454 data)</option>
			</param>
			<param name="bucket" type="boolean" label="Output histogram with bin sizes=1" truevalue="true" falsevalue="false" help="Use this to specify whether or not bin sizes of 1 should be used when plotting histograms"/>
			<param format="fasta" name="input" type="data" label="Source file in FASTA format"/>
			<param name = "all_outputs" type ="boolean" checked="false" label="Return all output files" help="If checked, all output files will be displayed. If not checked, only the file 'Assembly Statistics' will be provided." />
		</inputs>
		<outputs>
			<data format="tabular" name="stats" label="Assembly statistics - $input.display_name"/>
			<data format="fasta" name="sortedcontigs" label="Sorted contigs - $input.display_name" >
				<filter>all_outputs is True</filter>
			</data>
			<data format="png" name="histogrampng" label="Histogram of contig sizes - $input.display_name">
				<filter>all_outputs is True</filter>
			</data>
			<data format="png" name="summedcontigspng" label="Cumulative sum of contig sizes - $input.display_name">
				<filter>all_outputs is True</filter>
			</data>
			<data format="tabular" name="histogramdata" label="Histogram data - $input.display_name">
				<filter>all_outputs is True</filter>
			</data>
			<data format="tabular" name="summedcontigdata" label="Cumulative sum of contig size data - $input.display_name">
				<filter>all_outputs is True</filter>
			</data>
		</outputs>
		<tests>
			<test>
				<param name="input" value="SRR1002850_SMALL.fasta"/>
				<output name="stats" value="Assembly_statistics_-_SRR1002850_SMALL.fasta.tabular"/>
			</test>
		</tests>
		<help><![CDATA[
			**Summarise assembly overview** 

			This script is used to give summary statistics of an assembly or set of reads. Typically this is run after an assembly to evaluate gross features.


			# Gives back
			# - N50
			# - num of contigs > 1 kb
			# - num of contigs
			# - Read or Contig Histogram and graphs.
			# - Summed contig length (by number of contigs, in sorted order)
			]]>
			
		</help>
		<citations>
			<citation type="bibtex">@ARTICLE{a1,
				title = {Summarise an assembly (e.g. N50 metrics)},
				author = {Konrad Paszkiewicz, Sujai Kumar, Mariam Iskander},
				url = {https://github.com/phac-nml/galaxy_tools/}
				}
			}</citation>
		</citations>
</tool>