Mercurial > repos > eiriche > bsmap
changeset 10:4f9b7eaecbd4 draft
Deleted selected files
author | eiriche |
---|---|
date | Fri, 30 Nov 2012 05:28:16 -0500 |
parents | 385d004f3cb1 |
children | 413c742682f7 |
files | bsmap.xml bsmap_fasta.loc.sample bsmap_meth_caller.sh bsmap_meth_caller.xml bsmap_wrapper.sh tool_data_table_conf.xml.sample |
diffstat | 6 files changed, 0 insertions(+), 494 deletions(-) [+] |
line wrap: on
line diff
--- a/bsmap.xml Fri Nov 30 05:10:53 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,246 +0,0 @@ -<tool id="bsmap" name="BSMAP Mapper"> - <requirements> - <requirement type='package'> - bsmap - </requirement> - </requirements> - <command interpreter="bash"> - bsmap_wrapper.sh - ##Reference genome - ##ref="${reference.fields.path}" - #if $refGenomeSource.genomeSource == "history": - ref="${refGenomeSource.myFile.extra_files_path}/${refGenomeSource.myFile.metadata.base_name}" - #else - ref="${refGenomeSource.builtin.fields.path}" - #end if - ##Output files (SAM output, BSMAP summary) - mapped=$mapped - ##Temp directory - tempdir=$mapped.files_path - summary=$summary - #if str($singlePaired.sPaired) == "single": - library="single" - mate1=$singlePaired.sInput1 - #if str($singlePaired.sParams.sSettingsType) == "full": - fullparam=true - qual=$singlePaired.sParams.qual - threshold=$singlePaired.sParams.threshold - lowqual=$singlePaired.sParams.lowqual - adapter=$singlePaired.sParams.adapter - firstn=$singlePaired.sParams.firstn - repeat_reads=$singlePaired.sParams.repeat_reads - seed_size=$singlePaired.sParams.seed_size - mismatch=$singlePaired.sParams.mismatch - equal_best=$singlePaired.sParams.equal_best - start=$singlePaired.sParams.start - end=$singlePaired.sParams.end - index_interval=$singlePaired.sParams.index_interval - seed_random=$singlePaired.sParams.seed_random - rrbs=$singlePaired.sParams.rrbs - mode=$singlePaired.sParams.mode - align_info=$singlePaired.sParams.align_info - #end if - #else: - library="paired" - mate1=$singlePaired.pInput1 - mate2=$singlePaired.pInput2 - unpaired=$unpaired - #if str($singlePaired.pParams.pSettingsType) == "full": - fullparam=true - qual=$singlePaired.pParams.qual - threshold=$singlePaired.pParams.threshold - lowqual=$singlePaired.pParams.lowqual - adapter=$singlePaired.pParams.adapter - firstn=$singlePaired.pParams.firstn - repeat_reads=$singlePaired.pParams.repeat_reads - seed_size=$singlePaired.pParams.seed_size - mismatch=$singlePaired.pParams.mismatch - equal_best=$singlePaired.pParams.equal_best - start=$singlePaired.pParams.start - end=$singlePaired.pParams.end - index_interval=$singlePaired.pParams.index_interval - seed_random=$singlePaired.pParams.seed_random - rrbs=$singlePaired.pParams.rrbs - mode=$singlePaired.pParams.mode - align_info=$singlePaired.pParams.align_info - maxinsert=$singlePaired.pParams.maxinsert - mininsert=$singlePaired.pParams.mininsert - #end if - #end if - </command> - <inputs> - - <conditional name="refGenomeSource"> - <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in reference?"> - <option value="builtin">Use a built-in index</option> - <option value="history">Use one from the history</option> - </param> - <when value="builtin"> - <param name="index" type="select" label="Select a reference genome"> - <options from_data_table="bsmap_fasta"> - <filter type="sort_by" column="2" /> - <validator type="no_options" message="No reference genomes are available" /> - </options> - </param> - </when> - <when value="history"> - <param name="myFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> - </when> - </conditional> - - <conditional name="singlePaired"> - <param name="sPaired" type="select" label="Is this library mate-paired?"> - <option value="single">Single-end</option> - <option value="paired">Paired-end</option> - </param> - <when value="single"> - <param name="sInput1" type="data" format="fastq,fasta" label="FASTQ file" help="Must have ASCII encoded quality scores"/> - <conditional name="sParams"> - <param name="sSettingsType" type="select" label="BSMAP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list"> - <option value="preSet">Commonly used</option> - <option value="full">Full parameter list</option> - </param> - <when value="preSet" /> - <when value="full"> - <param name="qual" type="select" label="Select the type of FastQ qualities"> - <option value="33">phred33-quals</option> - <option value="64">phred64-quals</option> - </param> - <param name="threshold" type="integer" value="0" label="Quality threshold in trimming" help="0-40, default=0 (no trim)" min="0" max="40" /> - <param name="lowqual" type="integer" value="5" label="Filter low-quality reads containing >n Ns" help="default=5" /> - <param name="adapter" type="text" value="none" label="3-end adapter sequence" help="default: none (no trim)" /> - <param name="firstn" type="integer" value="144" label="Map the first N nucleotides of the read" help="default:144 (map the whole read)" /> - - <param name="repeat_reads" type="select" label="How to report repeat hits"> - <option value="0">none(unique hit only)</option> - <option value="1">random one</option> - </param> - - <param name="seed_size" type="integer" value="16" label="Seed size" min="8" max="16" help="default=16(WGBS mode), 12(RRBS mode)" /> - <param name="mismatch" type="integer" value="2" label="Maximum number of mismatches allowed on a read" max="15" /> - <param name="equal_best" type="integer" value="20" label="Maximum number of equal best hits to count" max="1000" /> - <param name="start" type="integer" value="1" label="Start from the Nth read or read pair" /> - <param name="end" type="integer" value="4294967295" label="End at the Nth read or read pair" /> - <param name="index_interval" type="integer" value="4" label="Index interval" /> - <param name="seed_random" type="integer" value="-1" label="Seed for random number generation used in selecting multiple hits" help="other seed values generate pseudo random number based on read index number, to allow reproducible mapping results" /> - <param name="rrbs" type="text" value="none" label="Activating RRBS mapping mode and set restriction enzyme digestion sites" help="digestion position marked by '-', example: -D C-CGG for MspI digestion. default: none (whole genome shotgun bisulfite mapping mode)" /> - <param name="mode" type="select" label="Set mapping strand information"> - <option value="0">only map to 2 forward strands</option> - <option value="1">map SE or PE reads to all 4 strands</option> - </param> - <param name="align_info" type="text" value="none" label="Set alignment information for the additional nucleotide transition" help="is in the form of two different nucleotides N1N2,indicating N1 in the reads could be mapped to N2 in the reference sequences. default: -M TC, corresponds to C=>U(T) transition in bisulfite conversion. example: -M GA could be used to detect A=>I(G) transition in RNA editing." /> - </when> <!-- full --> - </conditional> <!-- sParams --> - </when> <!-- single --> - - <when value="paired"> - <param name="pInput1" type="data" format="fastq,fasta" label="Forward FASTQ file" /> - <param name="pInput2" type="data" format="fastq,fasta" label="Reverse FASTQ file" /> - - <conditional name="pParams"> - <param name="pSettingsType" type="select" label="BSMAP settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list"> - <option value="preSet">Commonly used</option> - <option value="full">Full parameter list</option> - </param> - <when value="preSet" /> - <when value="full"> - <param name="qual" type="select" label="Select the type of FastQ qualities"> - <option value="33">phred33-quals</option> - <option value="64">phred64-quals</option> - </param> - - <param name="mininsert" type="integer" value="28" label="Minimal insert size allowed" /> - <param name="maxinsert" type="integer" value="500" label="Maximal insert size allowed" /> - - <param name="threshold" type="integer" value="0" label="Quality threshold in trimming" help="0-40, default=0 (no trim)" min="0" max="40" /> - <param name="lowqual" type="integer" value="5" label="Filter low-quality reads containing >n Ns" help="default=5" /> - <param name="adapter" type="text" value="none" label="3-end adapter sequence" help="default: none (no trim)" /> - <param name="firstn" type="integer" value="144" label="Map the first N nucleotides of the read" help="default:144 (map the whole read)" /> - - <param name="repeat_reads" type="select" label="How to report repeat hits"> - <option value="0">none(unique hit only)</option> - <option value="1">random one</option> - </param> - - <param name="seed_size" type="integer" value="16" label="Seed size" min="8" max="16" help="default=16(WGBS mode), 12(RRBS mode)" /> - <param name="mismatch" type="integer" value="2" label="Maximum number of mismatches allowed on a read" max="15" /> - <param name="equal_best" type="integer" value="20" label="Maximum number of equal best hits to count" max="1000" /> - <param name="start" type="integer" value="1" label="Start from the Nth read or read pair" /> - <param name="end" type="integer" value="4294967295" label="End at the Nth read or read pair" /> - <param name="index_interval" type="integer" value="4" label="Index interval" /> - <param name="seed_random" type="integer" value="-1" label="Seed for random number generation used in selecting multiple hits" help="other seed values generate pseudo random number based on read index number, to allow reproducible mapping results" /> - <param name="rrbs" type="text" value="none" label="Activating RRBS mapping mode and set restriction enzyme digestion sites" help="digestion position marked by '-', example: -D C-CGG for MspI digestion. default: none (whole genome shotgun bisulfite mapping mode)" /> - <param name="mode" type="select" label="Set mapping strand information"> - <option value="0">only map to 2 forward strands</option> - <option value="1">map SE or PE reads to all 4 strands</option> - </param> - <param name="align_info" type="text" value="none" label="Set alignment information for the additional nucleotide transition" help="is in the form of two different nucleotides N1N2,indicating N1 in the reads could be mapped to N2 in the reference sequences. default: -M TC, corresponds to C=>U(T) transition in bisulfite conversion. example: -M GA could be used to detect A=>I(G) transition in RNA editing." /> - - - </when> <!-- full --> - </conditional> <!-- pParams --> - </when> <!-- paired --> - </conditional> <!-- singlePaired --> - - - </inputs> - <outputs> - <data name="mapped" format="sam" label="BSMAP Mapped Reads"> - <actions> - <action type="metadata" name="dbkey"> - <option type="from_data_table" name="bsmap_fasta" column="1" offset="0"> - <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> - <filter type="param_value" ref="reference" column="0"/> - </option> - </action> - </actions> - </data> - <data name="summary" format="txt" label="BSMAP Mapping Summary" /> - <data name="unpaired" format ="sam" label="BSMAP Unpaired Hits"> - <filter>(singlePaired['sPaired'] == 'paired')</filter> - </data> - - </outputs> - <help> -**What it does** - -BSMAP is a short reads mapping software for bisulfite sequencing reads. It has the following features: - - - read length up to 144 nt, allow up to 15 mismatches, gap size up to 3 bp. - - - support single end and pair end mapping. support multi-thread mapping. - - - support both "Lister protocol" (sequence 2 forward strands only) and "Cokus protocol" (sequence all 4 bisulfite converted strands) - - - reads are directly mapped to original reference genome sequence, no need to preprocess the reads and reference genome to convert C to T. - - - support both whole genome bisulfite sequencing (WGBS) mode and reduced representation bisulfite sequencing (RRBS) mode, allow changing the digestion site information to support different digestion enzymes for RRBS. - - - allow trimming adapter sequences and low quality nucleotides from the 3'end of reads - - - allow trade off between speed/memory usage/mapping sensitivity. For human genome, the RRBS mode uses ~3GB. In WGBS mode, the typical memory usage is ~9GB, but can be as low as 5GB. - - - allow alignment for other nucleotide transitions, for example, can be set to detect the A=>I(G) transition in RNA editing. - -.. _BSMAP: http://code.google.com/p/bsmap/ - -**Input formats** - -BSMAP accepts files in FASTA/FASTQ format. - -**Outputs** - -The output contains the following files: - - - mapped reads in SAM format - - - mapping summary - - - unpaired hits (only for paired-end mapping) - - </help> - - <tests> - </tests> -</tool> -
--- a/bsmap_fasta.loc.sample Fri Nov 30 05:10:53 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ -#This is a sample file distributed with Galaxy that enables BSMAP -#to use a directory of reference FastA sequences data files.The bsmap_fasta.loc -#file has this format (longer white space characters are TAB characters): -# -#<unique_build_id> <dbkey> <display_name> <file_path> -# -# -#Your bsmap_fasta.loc file should include an entry per line for each -reference you have stored. For example: -# -#phiX174 phiX phiX174 /depot/data2/galaxy/phiX/base/phiX.fasta -#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/hg18/base/hg18canon.fasta -#hg18full hg18 hg18 Full /depot/data2/galaxy/hg18/base/hg18full.fasta -#/orig/path/hg19.fa hg19 hg19 /depot/data2/galaxy/hg19/base/hg19.fasta -#...etc... -#
--- a/bsmap_meth_caller.sh Fri Nov 30 05:10:53 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ -#!/bin/bash -# -# Galaxy wrapper for BSMAP Methylation Caller -# - -set -e - -#get parameters - -until [ $# -eq 0 ] -do - case $1 in - input=*) - input=${1#input=} - ;; - method=*) - method=${1#method=} - ;; - output=*) - output=${1#output=} - ;; - tempdir=*) - tempdir=${1#tempdir=} - ;; - ref=*) - ref=${1#ref=} - ;; - esac - shift -done - -methratio.py -o $output -d $ref -q $input \ No newline at end of file
--- a/bsmap_meth_caller.xml Fri Nov 30 05:10:53 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -<tool id="bsmap_meth_caller" name="BSMAP Methylation Caller"> - <requirements> - <requirement type='package'> - bsmap - </requirement> - </requirements> - <requirements> - <requirement type='package'> - samtools - </requirement> - </requirements> - <command interpreter="bash"> - bsmap_meth_caller.sh - input=$bsmap_sam - unique=$unique - properly=$properly - zero_meth = $zero_meth - rem_dup = $rem_dup - combine_cpg = $combine_cpg - trimN = $trimN - depth = $depth - output=$output - tempdir=$output.files_path - ref="${ filter( lambda x: str( x[1] ) == str( $bsmap_sam.metadata.dbkey ), $__app__.tool_data_tables['bsmap_fasta'].get_fields() )[0][3] }" - </command> - <inputs> - <param name="bsmap_sam" format="sam" type="data" label="BSMAP mapping output file" help="Must be in SAM format" /> - <param name="unique" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Process only unique mappings/pairs" /> - <param name="properly" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Process only properly paired mappings" /> - <param name="zero_meth" type="boolean" truevalue="true" falsevalue="false" checked="True" label="report loci with zero methylation ratios" /> - <param name="rem_dup" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Remove duplicated reads" /> - <param name="combine_cpg" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Combine CpG methylaion ratios on both strands" /> - <param name="trimN" type="integer" value="2" label="Trim N fill-in nucleotides in DNA fragment end-repairing" help="This option is only for pair-end mapping. For RRBS, N could be detetmined by the distance between cuttings sites on forward and reverse strands. For WGBS, N is usually between 0~3" /> - <param name="depth" type="integer" value="1" label="Minimum sequencing depth to report loci" /> - </inputs> - <outputs> - <data name="output" format ="bed" label="BSMAP methylation output" /> - </outputs> - <help> -**What it does** - -This methylation caller parses the BSMAP SAM output file into bed format. - - -**Output format** :: - - - Column Description - ---------------------- -------------------------------------- - 1 chr chromosome - 2 pos position - 3 strand strand - 4 context context (CHH,CHG,CpG) - 5 coverage totally sequenced Cs at that position - 6 methylated methylated Cs at that position - 7 percentage methylated percentage of 6 - </help> -</tool> -
--- a/bsmap_wrapper.sh Fri Nov 30 05:10:53 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,133 +0,0 @@ -#!/bin/bash -# -# Galaxy wrapper for BSMAP -# Written by Eugen Eirich @ Institute for Molecular Biology Mainz -# - -set -e - -#get parameters - -until [ $# -eq 0 ] -do - case $1 in - ref=*) - ref=${1#ref=} - ;; - library=*) - library=${1#library=} - ;; - unpaired=*) - unpaired=${1#unpaired=} - ;; - mapped=*) - mapped=${1#mapped=} - ;; - fullparam=*) - fullparam=${1#fullparam=} - ;; - mate1=*) - mate1=${1#mate1=} - ;; - mate2=*) - mate2=${1#mate2=} - ;; - qual=*) - qual="-z ${1#qual=}" - ;; - threshold=*) - threshold="-q ${1#threshold=}" - ;; - lowqual=*) - lowqual="-f ${1#lowqual=}" - ;; - adapter=*) - adapter=${1#adapter=} - ;; - firstn=*) - firstn="-L ${1#firstn=}" - ;; - repeat_reads=*) - repeat_reads="-r ${1#repeat_reads=}" - ;; - seed_size=*) - seed_size="-s ${1#seed_size=}" - ;; - mismatch=*) - mismatch="-v ${1#mismatch=}" - ;; - equal_best=*) - equal_best="-w ${1#equal_best=}" - ;; - start=*) - start="-B ${1#start=}" - ;; - end=*) - end="-E ${1#end=}" - ;; - index_interval=*) - index_interval="-I ${1#index_interval=}" - ;; - seed_random=*) - seed_random=${1#seed_random=} - ;; - rrbs=*) - rrbs=${1#rrbs=} - ;; - mode=*) - mode="-n ${1#mode=}" - ;; - align_info=*) - align_info=${1#align_info=} - ;; - maxinsert=*) - maxinsert="-x ${1#maxinsert=}" - ;; - mininsert=*) - mininsert="-m ${1#mininsert=}" - ;; - summary=*) - summary=${1#summary=} - ;; - esac - shift -done - - -if [ "$rrbs" != "" ] -then - rrbs="-D $rrbs" -fi - -if [ "$align_info" != "" ] -then - align_info="-M $align_info" -fi - -if [ "$adapter" != "" ] -then - adapter="-A $adapter" -fi - -if [ "$seed_random" != "" ] -then - seed_random="-S $seed_random" -fi - - -if [ "$library" == "single" ] -then - if [ "$fullparam" == 'false' ] - then - bsmap -a $mate1 -d $ref -o $mapped -R -r 0 -p 4 > $summary - else - bsmap -a $mate1 -d $ref -o $mapped -R -r 0 -p 4 $qual $threshold $lowqual $adapter $firstn $repeat_reads $seed_size $mismatch $equal_best $start $end $index_interval $mode > $summary - fi -else - if [ "$fullparam" == 'false' ] - then - bsmap -a $mate1 -b $mate2 -2 $unpaired -d $ref -o $mapped -R -r 0 -p 4 > $summary - else - bsmap -a $mate1 -b $mate2 -2 $unpaired -d $ref -o $mapped -R -r 0 -p 4 $qual $threshold $lowqual $adapter $firstn $repeat_reads $seed_size $mismatch $equal_best $start $end $index_interval $mode $maxinsert $mininsert > $summary - fi -fi
--- a/tool_data_table_conf.xml.sample Fri Nov 30 05:10:53 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> -<tables> - <!-- Locations of FastA genomes for BSMAP --> - <table name="bsmap_fasta" comment_char="#"> - <columns>value, dbkey, name, path</columns> - <file path="tool-data/bsmap_fasta.loc" /> - </table> -</tables>