comparison size_histogram.xml @ 0:234b83159ea8 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
author artbio
date Tue, 11 Jul 2017 11:44:36 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:234b83159ea8
1 <tool id="artbio_size_histogram" name="Generate read size histograms" version="1.0.0">
2 <description>from alignment files</description>
3 <requirements>
4 <requirement type="package" version="1.2.0=py27_0">bowtie</requirement>
5 <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement>
6 <requirement type="package" version="1.9.3">numpy</requirement>
7 <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement>
8 <requirement type="package" version="0.6_28=r3.3.2_0">r-latticeextra</requirement>
9 <requirement type="package" version="2.2.1=r3.3.2_0">r-gridextra</requirement>
10 </requirements>
11 <command detect_errors="exit_code"><![CDATA[
12 python '$__tool_directory__'/size_histogram.py
13 #if $refGenomeSource.genomeSource == "history":
14 --reference_fasta ## sys.argv[2]
15 '$refGenomeSource.ownFile' ## index source
16 #else:
17 #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
18 --reference_bowtie_index
19 '$reference'
20 #end if
21 --output_size_distribution
22 '$size_distribution_dataframe'
23 --minquery
24 $minquery
25 --maxquery
26 $maxquery
27 --input
28 #for $i in $refGenomeSource.series
29 '$i.input'
30 #end for
31 --ext
32 #for $i in $refGenomeSource.series
33 '$i.input.ext'
34 #end for
35 --label
36 #for $i in $refGenomeSource.series
37 "$i.input.element_identifier"
38 #end for
39 #if $gff:
40 --gff '$gff'
41 #end if
42 #if $global.value == 'yes':
43 --global_size
44 #end if
45 #if $collapsestrands.value == 'yes':
46 --collapse
47 #end if
48 --normalization_factor
49 #for $i in $refGenomeSource.series
50 $i.norm
51 #end for
52 &&
53 Rscript '$__tool_directory__'/size_histogram.r
54 --global '$global'
55 --size_distribution_tab '$size_distribution_dataframe'
56 --size_distribution_pdf '$size_PDF'
57 --title '$title'
58 --ylabel '$ylabel'
59 --yrange '$yrange'
60 --rows_per_page '$rows_per_page'
61 ]]></command>
62 <inputs>
63 <conditional name="refGenomeSource">
64 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
65 <option value="indexed">Use a built-in index</option>
66 <option value="history">Use one from the history</option>
67 </param>
68 <when value="indexed">
69 <repeat name="series" title="Add alignment files">
70 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam">
71 <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
72 </param>
73 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
74 </repeat>
75 </when>
76 <when value="history">
77 <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />
78 <repeat name="series" title="Add alignment files">
79 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"/>
80 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
81 </repeat>
82 </when>
83 </conditional>
84 <param name="gff" type="data" format="gff,gff3" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/>
85 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> -->
86 <param name="global" type="select" label="Generate size distribution for each item, or generate a global alignment">
87 <option value="no">for each item</option>
88 <option value="yes">global</option>
89 </param>
90 <param name="collapsestrands" type="select" label="Whether + and - reads should be collapsed or not">
91 <option value="no">Do not collapse</option>
92 <option value="yes">Collapse + and - reads</option>
93 </param>
94 <param name="minquery" type="integer" size="3" value="18" label="Min size of reads to plot" help="'15' = 15 nucleotides"/>
95 <param name="maxquery" type="integer" size="3" value="28" label="Max size of reads to plot" help="'30' = 30 nucleotides"/>
96 <param name="title" type="text" size="15" value="Size distribution" label="Main Titles"/>
97 <param name="xlabel" type="text" size="15" value="Size in nucleotides" label="x axis label"/>
98 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/>
99 <param name="yrange" type="integer" size="3" value="0" label="y axis range for size distributions. 0 means auto-scaling."/>
100 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?">
101 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/>
102 </param>
103 </inputs>
104
105 <outputs>
106 <data format="tabular" name="size_distribution_dataframe" label="Size_distribution_dataframe.tab"/>
107 <data format="pdf" name="size_PDF" label="Size_distribution.pdf"/>
108 </outputs>
109
110 <help>
111
112 **What it does**
113
114 Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a histogram of read sizes,
115 where by default for each "chromosome" a histogram of read sizes is drawn.
116 Reads that map in sense are on the top (red), reads that map antisense are on the bottom (blue).
117
118
119 .. class:: warningmark
120
121 '''TIP''' The input data can be produced using the sRbowtie tool.
122
123 ----
124
125 '''Example'''
126
127 Query sequence::
128 For a SAM file as the following:
129
130 5 16 2L_79 24393 255 17M * 0 0 CCTTCATCTTTTTTTTT IIIIIIIIIIIIIIIII XA:i:0 MD:Z:17 NM:i:0
131
132 11 0 2R_1 12675 255 21M * 0 0 AAAAAAAACGCGTCCTTGTGC IIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:21 NM:i:0
133
134 2 16 2L_5 669 255 23M * 0 0 TGTTGCTGCATTTCTTTTTTTTT IIIIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:23 NM:i:0
135
136 produce a plot like this:
137
138 ----
139
140 .. image:: static/images/size_histogram.png
141 :height: 800
142 :width: 500
143
144 </help>
145 <tests>
146 <test>
147 <param name="genomeSource" value="history" />
148 <param name="ownFile" value="transposons.fasta" ftype="fasta" />
149 <param name="series_0|input" value="sample1.srbowtie_out" ftype="tabular"/>
150 <param name="series_0|norm" value="1" />
151 <param name="series_1|input" value="sample2.srbowtie_out" ftype="tabular"/>
152 <param name="series_1|norm" value="1" />
153 <param name="series_2|input" value="sample3.srbowtie_out" ftype="tabular"/>
154 <param name="series_2|norm" value="1" />
155 <param name="global" value="no" />
156 <param name="collapsestrands" value="no" />
157 <param name="minquery" value="18"/>
158 <param name="maxquery" value="30"/>
159 <param name="title" value="Size distribution"/>
160 <param name="xlabel" value="Size in nucleotides"/>
161 <param name="ylabel" value="Number of reads"/>
162 <param name="rows_per_page" value="10"/>
163 <output name="size_distribution_dataframe" ftype="tabular" file="Size_distribution_dataframe.tab" />
164 <output name="size_PDF" ftype="pdf" file="Size_distribution.pdf" />
165 </test>
166 </tests>
167 </tool>
168