csem: csem.xml annotate

annotate csem.xml @ 11:b0290425de13 default tip

Uploaded

author	dongjun
date	Thu, 03 Nov 2011 21:20:23 -0400
parents	5fd51ab70dad
children

rev	line source
11 b0290425de13 Uploaded dongjun parents: 9 diff changeset	1 <tool id="csem" name="CSEM: Multi-read Allocation for ChIP-seq" version="1.0.1">
9 5fd51ab70dad Uploaded dongjun parents: diff changeset	2
5fd51ab70dad Uploaded dongjun parents: diff changeset	3 <description></description>
5fd51ab70dad Uploaded dongjun parents: diff changeset	4
5fd51ab70dad Uploaded dongjun parents: diff changeset	5 <parallelism method="basic"></parallelism>
5fd51ab70dad Uploaded dongjun parents: diff changeset	6
5fd51ab70dad Uploaded dongjun parents: diff changeset	7 <requirements>
5fd51ab70dad Uploaded dongjun parents: diff changeset	8 <requirement type="binary">csem</requirement>
5fd51ab70dad Uploaded dongjun parents: diff changeset	9 <requirement type="package">bowtie</requirement>
5fd51ab70dad Uploaded dongjun parents: diff changeset	10 </requirements>
5fd51ab70dad Uploaded dongjun parents: diff changeset	11
5fd51ab70dad Uploaded dongjun parents: diff changeset	12 <command interpreter="perl">
5fd51ab70dad Uploaded dongjun parents: diff changeset	13 csem_wrapper.pl
5fd51ab70dad Uploaded dongjun parents: diff changeset	14 ## Input file name
5fd51ab70dad Uploaded dongjun parents: diff changeset	15 $InputParams.Input
5fd51ab70dad Uploaded dongjun parents: diff changeset	16 ## Input file format (FASTA or FASTQ)
5fd51ab70dad Uploaded dongjun parents: diff changeset	17 $InputParams.InfileFormat
5fd51ab70dad Uploaded dongjun parents: diff changeset	18 ## Output file name
5fd51ab70dad Uploaded dongjun parents: diff changeset	19 $out_csem
5fd51ab70dad Uploaded dongjun parents: diff changeset	20 ## Output file format
5fd51ab70dad Uploaded dongjun parents: diff changeset	21 $OutfileFormat
5fd51ab70dad Uploaded dongjun parents: diff changeset	22 ## Reference genome idnex for Bowtie
5fd51ab70dad Uploaded dongjun parents: diff changeset	23 $index.fields.path
5fd51ab70dad Uploaded dongjun parents: diff changeset	24 ## Generate pseudo-tags?
5fd51ab70dad Uploaded dongjun parents: diff changeset	25 $pseudoTag
5fd51ab70dad Uploaded dongjun parents: diff changeset	26 ## Bowtie settings (Max num of mismatches, Max num of aligned positions)
5fd51ab70dad Uploaded dongjun parents: diff changeset	27 #if $bowtieParams.bSettingsType == "preSet"
5fd51ab70dad Uploaded dongjun parents: diff changeset	28 2
5fd51ab70dad Uploaded dongjun parents: diff changeset	29 99
5fd51ab70dad Uploaded dongjun parents: diff changeset	30 #else
5fd51ab70dad Uploaded dongjun parents: diff changeset	31 $bowtieParams.Mismatch
5fd51ab70dad Uploaded dongjun parents: diff changeset	32 $bowtieParams.SuppressAlign
5fd51ab70dad Uploaded dongjun parents: diff changeset	33 #end if
5fd51ab70dad Uploaded dongjun parents: diff changeset	34 ## CSEM settings (window size, number of iterations)
5fd51ab70dad Uploaded dongjun parents: diff changeset	35 #if $csemParams.cSettingsType == "preSet"
5fd51ab70dad Uploaded dongjun parents: diff changeset	36 201
5fd51ab70dad Uploaded dongjun parents: diff changeset	37 200
5fd51ab70dad Uploaded dongjun parents: diff changeset	38 #else
5fd51ab70dad Uploaded dongjun parents: diff changeset	39 $csemParams.windowSize
5fd51ab70dad Uploaded dongjun parents: diff changeset	40 $csemParams.nIteration
5fd51ab70dad Uploaded dongjun parents: diff changeset	41 #end if
5fd51ab70dad Uploaded dongjun parents: diff changeset	42 ## Number of cores to use
5fd51ab70dad Uploaded dongjun parents: diff changeset	43 8
5fd51ab70dad Uploaded dongjun parents: diff changeset	44 </command>
5fd51ab70dad Uploaded dongjun parents: diff changeset	45
5fd51ab70dad Uploaded dongjun parents: diff changeset	46 <inputs>
5fd51ab70dad Uploaded dongjun parents: diff changeset	47 <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed - contact Galaxy team.">
5fd51ab70dad Uploaded dongjun parents: diff changeset	48 <options from_data_table="bowtie_indexes">
5fd51ab70dad Uploaded dongjun parents: diff changeset	49 <filter type="sort_by" column="2" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	50 <validator type="no_options" message="No indexes are available" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	51 </options>
5fd51ab70dad Uploaded dongjun parents: diff changeset	52 </param>
5fd51ab70dad Uploaded dongjun parents: diff changeset	53 <conditional name="InputParams">
5fd51ab70dad Uploaded dongjun parents: diff changeset	54 <param name="InfileFormat" type="select" label="Select file format to process" help="Bowtie accepts FASTA or FASTQ file formats.">
5fd51ab70dad Uploaded dongjun parents: diff changeset	55 <option value="fasta">FASTA</option>
5fd51ab70dad Uploaded dongjun parents: diff changeset	56 <option value="fastq">FASTQ</option>
5fd51ab70dad Uploaded dongjun parents: diff changeset	57 </param>
5fd51ab70dad Uploaded dongjun parents: diff changeset	58 <when value="fasta">
5fd51ab70dad Uploaded dongjun parents: diff changeset	59 <param name="Input" type="data" format="fasta" label="FASTA file"/>
5fd51ab70dad Uploaded dongjun parents: diff changeset	60 </when>
5fd51ab70dad Uploaded dongjun parents: diff changeset	61 <when value="fastq">
5fd51ab70dad Uploaded dongjun parents: diff changeset	62 <param name="Input" type="data" format="fastq,fastqsanger,fastqillumina,fastqsolexa" label="FASTQ file"/>
5fd51ab70dad Uploaded dongjun parents: diff changeset	63 </when>
5fd51ab70dad Uploaded dongjun parents: diff changeset	64 </conditional> <!-- InputParams -->
5fd51ab70dad Uploaded dongjun parents: diff changeset	65 <param name="OutfileFormat" type="select" label="Select file format to export" help="Multi-read allocator can export results into BED or GFF file formats, or as a table.">
5fd51ab70dad Uploaded dongjun parents: diff changeset	66 <option value="bed">BED</option>
5fd51ab70dad Uploaded dongjun parents: diff changeset	67 <option value="gff">GFF</option>
5fd51ab70dad Uploaded dongjun parents: diff changeset	68 <option value="table">table</option>
5fd51ab70dad Uploaded dongjun parents: diff changeset	69 </param>
5fd51ab70dad Uploaded dongjun parents: diff changeset	70 <param name="pseudoTag" type="select" label="Generate pseudo-tags?" help="See section 'Pseudo-tags' in the help below for more details.">
5fd51ab70dad Uploaded dongjun parents: diff changeset	71 <option value="N">NO</option>
5fd51ab70dad Uploaded dongjun parents: diff changeset	72 <option value="Y">YES</option>
5fd51ab70dad Uploaded dongjun parents: diff changeset	73 </param>
5fd51ab70dad Uploaded dongjun parents: diff changeset	74 <conditional name="bowtieParams">
5fd51ab70dad Uploaded dongjun parents: diff changeset	75 <param name="bSettingsType" type="select" label="Bowtie settings to use" help="For most mapping applications, use the 'Commonly used' settings. If you want full control, use 'Full parameter list'.">
5fd51ab70dad Uploaded dongjun parents: diff changeset	76 <option value="preSet">Commonly used</option>
5fd51ab70dad Uploaded dongjun parents: diff changeset	77 <option value="full">Full parameter list</option>
5fd51ab70dad Uploaded dongjun parents: diff changeset	78 </param>
5fd51ab70dad Uploaded dongjun parents: diff changeset	79 <when value="preSet" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	80 <when value="full">
5fd51ab70dad Uploaded dongjun parents: diff changeset	81 <param name="Mismatch" type="integer" value="2" label="Maximum number of mismatches permitted (-v)" help="May be 0, 1, 2, or 3." />
5fd51ab70dad Uploaded dongjun parents: diff changeset	82 <param name="SuppressAlign" type="integer" value="99" label="Suppress all alignments for a read if more than n reportable alignments exist (-m)" help="99 is appropriate for most cases. Use -1 for no limit." />
5fd51ab70dad Uploaded dongjun parents: diff changeset	83 </when> <!-- full -->
5fd51ab70dad Uploaded dongjun parents: diff changeset	84 </conditional> <!-- bowtieParams -->
5fd51ab70dad Uploaded dongjun parents: diff changeset	85 <conditional name="csemParams">
5fd51ab70dad Uploaded dongjun parents: diff changeset	86 <param name="cSettingsType" type="select" label="CSEM settings to use" help="For most multi-read allocation applications, use the 'Commonly used' settings. If you want full control, use 'Full parameter list'.">
5fd51ab70dad Uploaded dongjun parents: diff changeset	87 <option value="preSet">Commonly used</option>
5fd51ab70dad Uploaded dongjun parents: diff changeset	88 <option value="full">Full parameter list</option>
5fd51ab70dad Uploaded dongjun parents: diff changeset	89 </param>
5fd51ab70dad Uploaded dongjun parents: diff changeset	90 <when value="preSet" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	91 <when value="full">
5fd51ab70dad Uploaded dongjun parents: diff changeset	92 <param name="windowSize" type="integer" value="201" label="Window size for the multi-read allocator" help="Set window size to some odd number close to the average fragment length." />
5fd51ab70dad Uploaded dongjun parents: diff changeset	93 <param name="nIteration" type="integer" value="200" label="Number of iterations for the multi-read allocator" help="200 is appropriate for most cases." />
5fd51ab70dad Uploaded dongjun parents: diff changeset	94 </when> <!-- full -->
5fd51ab70dad Uploaded dongjun parents: diff changeset	95 </conditional> <!-- csemParams -->
5fd51ab70dad Uploaded dongjun parents: diff changeset	96 </inputs>
5fd51ab70dad Uploaded dongjun parents: diff changeset	97
5fd51ab70dad Uploaded dongjun parents: diff changeset	98 <outputs>
5fd51ab70dad Uploaded dongjun parents: diff changeset	99 <data format="tabular" name="out_csem">
5fd51ab70dad Uploaded dongjun parents: diff changeset	100 <change_format>
5fd51ab70dad Uploaded dongjun parents: diff changeset	101 <when input="OutfileFormat" value="bed" format="bed" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	102 <when input="OutfileFormat" value="gff" format="gff" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	103 </change_format>
5fd51ab70dad Uploaded dongjun parents: diff changeset	104 </data>
5fd51ab70dad Uploaded dongjun parents: diff changeset	105 </outputs>
5fd51ab70dad Uploaded dongjun parents: diff changeset	106
5fd51ab70dad Uploaded dongjun parents: diff changeset	107 <tests>
5fd51ab70dad Uploaded dongjun parents: diff changeset	108 <test>
5fd51ab70dad Uploaded dongjun parents: diff changeset	109 <param name="index" value="eschColi_K12" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	110 <param name="InfileFormat" value="fasta" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	111 <param name="Input" ftype="fasta" value="csem_test1_in.fa" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	112 <param name="OutfileFormat" value="bed" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	113 <param name="pseudoTag" value="N" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	114 <param name="bSettingsType" value="preSet" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	115 <param name="cSettingsType" value="preSet" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	116 <output name="out_csem" ftype="bed" file="csem_test1_out_original_sorted.bed" sort="True" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	117 </test>
5fd51ab70dad Uploaded dongjun parents: diff changeset	118 <test>
5fd51ab70dad Uploaded dongjun parents: diff changeset	119 <param name="index" value="eschColi_K12" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	120 <param name="InfileFormat" value="fastq" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	121 <param name="Input" ftype="fastq" value="csem_test1_in.fq" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	122 <param name="OutfileFormat" value="bed" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	123 <param name="pseudoTag" value="Y" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	124 <param name="bSettingsType" value="preSet" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	125 <param name="cSettingsType" value="preSet" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	126 <output name="out_csem" ftype="bed" file="csem_test1_out_pseudo_sorted.bed" sort="True" />
5fd51ab70dad Uploaded dongjun parents: diff changeset	127 </test>
5fd51ab70dad Uploaded dongjun parents: diff changeset	128 </tests>
5fd51ab70dad Uploaded dongjun parents: diff changeset	129
5fd51ab70dad Uploaded dongjun parents: diff changeset	130 <help>
5fd51ab70dad Uploaded dongjun parents: diff changeset	131
5fd51ab70dad Uploaded dongjun parents: diff changeset	132 What it does
5fd51ab70dad Uploaded dongjun parents: diff changeset	133
5fd51ab70dad Uploaded dongjun parents: diff changeset	134 CSEM (ChIP-Seq multi-read allocation using E-M algorithm) is a multi-read allocation algorithm. Multi-reads are the reads that map to multiple locations on the reference genome. Most common analysis of ChIP-seq data relies on using only reads that map uniquely to relevant reference genome (uni-reads). This can lead to the omission of up to 30 % of alignable reads. Chung et al. (2011) illustrated that incorporation of multi-reads significantly increases sequencing depths, leads to detection of novel peaks that are not otherwise identifiable with uni-reads, and improves detection of peaks in low mappable regions. The computational and experimental results established that multi-reads can be of critical importance for studying DNA-protein interactions in highly repetitive regions of genomes with ChIP-seq experiments. Output from CSEM can be used with other peak callers such as MOSAiCS and MACS to identify peaks that are in both high and low mappable regions of genomes.
5fd51ab70dad Uploaded dongjun parents: diff changeset	135
5fd51ab70dad Uploaded dongjun parents: diff changeset	136 Please cite: Chung D, Kuan PF, Li B, SanalKumar R, Liang K, Bresnick E, Dewey C, and Keles S (2011),
5fd51ab70dad Uploaded dongjun parents: diff changeset	137 "Discovering transcription factor binding sites in highly repetitive regions of genomes
5fd51ab70dad Uploaded dongjun parents: diff changeset	138 with multi-read analysis of ChIP-Seq data," PLoS Computational Biology, 7(7): e1002111.
5fd51ab70dad Uploaded dongjun parents: diff changeset	139
5fd51ab70dad Uploaded dongjun parents: diff changeset	140 ------
5fd51ab70dad Uploaded dongjun parents: diff changeset	141
5fd51ab70dad Uploaded dongjun parents: diff changeset	142 Input formats
5fd51ab70dad Uploaded dongjun parents: diff changeset	143
5fd51ab70dad Uploaded dongjun parents: diff changeset	144 CSEM accepts short reads aligned using bowtie as input. Bowtie accepts single-end reads, in FASTA or FASTQ format, as input. Quality scores of reads are ignored.
5fd51ab70dad Uploaded dongjun parents: diff changeset	145
5fd51ab70dad Uploaded dongjun parents: diff changeset	146 ------
5fd51ab70dad Uploaded dongjun parents: diff changeset	147
5fd51ab70dad Uploaded dongjun parents: diff changeset	148 Pseudo-tags
5fd51ab70dad Uploaded dongjun parents: diff changeset	149
5fd51ab70dad Uploaded dongjun parents: diff changeset	150 For each read in the alignment file, CSEM estimates the fraction of the read allocated to each of its alignments. This fraction reflects the degree of confidence in each particular alignment. Currently, only the peak caller MOSAiCS can accept fractional of reads as input. However, you can incorporate multi-reads into ChIP-seq analysis with your favoriate peak-caller by utilizing this pseudo-tag functionality. Pseudo-tags are generated by assigning each multi-read to the location it maps to with the largest weight and filtering out multi-reads with weights less than 0.5. Although summarizing CSEM output as pseudo-tags decreases the number of utilized multi-reads, it still leads to a significant increase in the sequencing depth compared to using uni-reads alone and facilitates identification of peaks in repetitive regions.
5fd51ab70dad Uploaded dongjun parents: diff changeset	151
5fd51ab70dad Uploaded dongjun parents: diff changeset	152 ------
5fd51ab70dad Uploaded dongjun parents: diff changeset	153
5fd51ab70dad Uploaded dongjun parents: diff changeset	154 Outputs
5fd51ab70dad Uploaded dongjun parents: diff changeset	155
5fd51ab70dad Uploaded dongjun parents: diff changeset	156 Currently, results from CSEM can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single alignment. The lines of the output file are ordered such that all of the unique read alignments appear first. If pseudo-tags are generated, FRAC equals to 1 for all reads if the output is a table and score is set to 1000 for all the reads in the BED and GFF formats.
5fd51ab70dad Uploaded dongjun parents: diff changeset	157
5fd51ab70dad Uploaded dongjun parents: diff changeset	158 If the output is a table, it has the following columns::
5fd51ab70dad Uploaded dongjun parents: diff changeset	159
5fd51ab70dad Uploaded dongjun parents: diff changeset	160 Column Description
5fd51ab70dad Uploaded dongjun parents: diff changeset	161 -------- --------------------------------------------------------
5fd51ab70dad Uploaded dongjun parents: diff changeset	162 1 RID ID of a read
5fd51ab70dad Uploaded dongjun parents: diff changeset	163 2 CID Chromosome of the alignment
5fd51ab70dad Uploaded dongjun parents: diff changeset	164 3 DIR Strand of the alignment (+ or -)
5fd51ab70dad Uploaded dongjun parents: diff changeset	165 4 POS Left-most position of the aligned read (the first base in a chromosome is numbered 1)
5fd51ab70dad Uploaded dongjun parents: diff changeset	166 5 FRAC Fraction of the read allocated to the alignment (which is 1 for uni-reads)
5fd51ab70dad Uploaded dongjun parents: diff changeset	167
5fd51ab70dad Uploaded dongjun parents: diff changeset	168 If the output is in BED format, it has the following columns::
5fd51ab70dad Uploaded dongjun parents: diff changeset	169
5fd51ab70dad Uploaded dongjun parents: diff changeset	170 Column Description
5fd51ab70dad Uploaded dongjun parents: diff changeset	171 ------------ --------------------------------------------------------
5fd51ab70dad Uploaded dongjun parents: diff changeset	172 1 chrom Chromosome of the alignment
5fd51ab70dad Uploaded dongjun parents: diff changeset	173 2 chromStart Start position of the aligned read (the first base in a chromosome is numbered 0)
5fd51ab70dad Uploaded dongjun parents: diff changeset	174 3 chromEnd End position of the aligned read (the first base in a chromosome is numbered 0)
5fd51ab70dad Uploaded dongjun parents: diff changeset	175 4 name ID of a read
5fd51ab70dad Uploaded dongjun parents: diff changeset	176 5 score 1000 * fraction of the read allocated to the alignment (which is 1000 for uni-reads)
5fd51ab70dad Uploaded dongjun parents: diff changeset	177 6 strand Strand of the alignment (+ or -)
5fd51ab70dad Uploaded dongjun parents: diff changeset	178
5fd51ab70dad Uploaded dongjun parents: diff changeset	179 If the output is in GFF format, it has the following columns::
5fd51ab70dad Uploaded dongjun parents: diff changeset	180
5fd51ab70dad Uploaded dongjun parents: diff changeset	181 Column Description
5fd51ab70dad Uploaded dongjun parents: diff changeset	182 --------- --------------------------------------------------------
5fd51ab70dad Uploaded dongjun parents: diff changeset	183 1 seqname Chromosome of the alignment
5fd51ab70dad Uploaded dongjun parents: diff changeset	184 2 source Always "CSEM"
5fd51ab70dad Uploaded dongjun parents: diff changeset	185 3 feature ID of a read
5fd51ab70dad Uploaded dongjun parents: diff changeset	186 4 start Start position of the aligned read (the first base in a chromosome is numbered 1)
5fd51ab70dad Uploaded dongjun parents: diff changeset	187 5 end End position of the aligned read (the first base in a chromosome is numbered 1)
5fd51ab70dad Uploaded dongjun parents: diff changeset	188 6 score 1000 * fraction of the read allocated to the alignment (which is 1000 for uni-reads)
5fd51ab70dad Uploaded dongjun parents: diff changeset	189 7 strand Strand of the alignment (+ or -)
5fd51ab70dad Uploaded dongjun parents: diff changeset	190 8 frame Always "."
5fd51ab70dad Uploaded dongjun parents: diff changeset	191 9 group Always "."
5fd51ab70dad Uploaded dongjun parents: diff changeset	192
5fd51ab70dad Uploaded dongjun parents: diff changeset	193
5fd51ab70dad Uploaded dongjun parents: diff changeset	194 </help>
5fd51ab70dad Uploaded dongjun parents: diff changeset	195 </tool>

Mercurial > repos > dongjun > csem

annotate csem.xml @ 11:b0290425de13 default tip