Mercurial > repos > devteam > rmap
changeset 0:ee49255302d8 draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 19 May 2014 12:33:09 -0400 |
parents | |
children | 264b036c0832 |
files | rmap_wrapper.py rmap_wrapper.xml test-data/rmap_wrapper_test1.bed test-data/rmap_wrapper_test1.fasta tool-data/faseq.loc.sample tool_dependencies.xml |
diffstat | 6 files changed, 234 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rmap_wrapper.py Mon May 19 12:33:09 2014 -0400 @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +import os, sys, tempfile + +assert sys.version_info[:2] >= (2.4) + +def stop_err( msg ): + + sys.stderr.write( "%s\n" % msg ) + sys.exit() + + +def __main__(): + + # I/O + target_path = sys.argv[1] + infile = sys.argv[2] + read_len = sys.argv[3] # -w + align_len = sys.argv[4] # -h + mismatch = sys.argv[5] # -m + output_file = sys.argv[6] + + # first guess the read length + guess_read_len = 0 + seq = '' + for i, line in enumerate(open(infile)): + line = line.rstrip('\r\n') + if line.startswith('>'): + if seq: + guess_read_len = len(seq) + break + else: + seq += line + + try: + test = int(read_len) + if test == 0: + read_len = str(guess_read_len) + else: + assert test >= 20 and test <= 64 + except: + stop_err('Invalid value for read length. Must be between 20 and 64.') + + try: + int(align_len) + except: + stop_err('Invalid value for minimal length of a hit.') + + try: + int(mismatch) + #assert test >= 0 and test <= int(0.1*int(read_len)) + except: + stop_err('Invalid value for mismatch numbers in an alignment.') + + all_files = [] + if os.path.isdir(target_path): + + # check target genome + fa_files = os.listdir(target_path) + + for file in fa_files: + file = "%s/%s" % ( target_path, file ) + file = os.path.normpath(file) + all_files.append(file) + else: + stop_err("No sequences for %s are available for search, please report this error." %(target_path)) + + for detail_file_path in all_files: + output_tempfile = tempfile.NamedTemporaryFile().name + command = "rmap -h %s -w %s -m %s -c %s %s -o %s 2>&1" % ( align_len, read_len, mismatch, detail_file_path, infile, output_tempfile ) + #print command + try: + os.system( command ) + except Exception, e: + stop_err( str( e ) ) + + try: + os.system( 'cat %s >> %s' % ( output_tempfile, output_file ) ) + except Exception, e: + stop_err( str( e ) ) + + try: + os.remove( output_tempfile ) + except: + pass + + +if __name__ == '__main__': __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rmap_wrapper.xml Mon May 19 12:33:09 2014 -0400 @@ -0,0 +1,86 @@ +<tool id="rmap_wrapper" name="RMAP" version="1.0.0"> + <description>for Solexa Short Reads Alignment</description> + <requirements> + <requirement type="package" version="2.05">rmap</requirement> + </requirements> + <command interpreter="python"> + #if $trim.choice=="No": + rmap_wrapper.py $database $input_seq 0 $align_len $mismatch $output1 + #else: + rmap_wrapper.py $database $input_seq $trim.read_len $align_len $mismatch $output1 + #end if + </command> + <inputs> + <param name="database" type="select" display="radio" label="Target database"> + <options from_file="faseq.loc"> + <column name="name" index="0"/> + <column name="value" index="0"/> + </options> + </param> + <param name="input_seq" type="data" format="fasta" label="Sequence file"/> + <param name="align_len" type="integer" size="15" value="11" label="Minimal length of a hit (-h)" help="seed" /> + <param name="mismatch" type="select" label="Number of mismatches allowed (-m)"> + <option value="0">0</option> + <option value="1">1</option> + <option value="3">3</option> + <option value="5">5</option> + </param> + <conditional name="trim"> + <param name="choice" type="select" label="To trim the reads"> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No"> + </when> + <when value="Yes"> + <param name="read_len" type="integer" size="15" value="36" label="Read length (-w)"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="output1" format="bed"/> + </outputs> + <!-- + <tests> + <test> + <param name="database" value="/galaxy/data/faseq/test" /> + <param name="input_seq" value="rmap_wrapper_test1.fasta" ftype="fasta"/> + <param name="read_len" value="36" /> + <param name="align_len" value="36" /> + <param name="mismatch" value="3" /> + <output name="output1" file="rmap_wrapper_test1.bed"/> + </test> + </tests> + --> + <help> + +.. class:: warningmark + + RMAP was developed for **Solexa** reads. + +.. class:: infomark + +**TIP**. The tool will guess the length of the reads, however, if you select to trim the reads, the *Reads length* must be between 20 and 64. Reads with lengths longer than the specified value will be trimmed at the 3'end. + +----- + +**What it does** + +This tool runs **rmap** (for more information, please see the reference below), mapping Solexa reads onto a genome build. + +----- + +**Parameters** + +- *Minimal Length of a Hit* (**-h**) : this is the seed length or the minimal exact match length +- *Number of Mismatches Allowed* (**-m**) : the maximal number of mismatches allowed in an alignment +- *Read Length* (**-w**) : maximal length of the reads; reads longer than the threshold will be truncated at 3' end. + +----- + +**Reference** + + **RMAP** is developed by Dr. Andrew D Smith and Dr. Zhenyu Xuan at the Cold Spring Harbor Laboratory. Please see http://rulai.cshl.edu/rmap/ + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rmap_wrapper_test1.bed Mon May 19 12:33:09 2014 -0400 @@ -0,0 +1,8 @@ +phix 360 396 seq1 1 - +phix 4188 4224 seq2 1 + +phix 4908 4944 seq4 0 - +phix 2811 2847 seq5 2 + +phix 3847 3883 seq6 0 - +phix 91 127 seq7 0 + +phix 2302 2338 seq8 2 + +phix 2448 2484 seq9 0 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rmap_wrapper_test1.fasta Mon May 19 12:33:09 2014 -0400 @@ -0,0 +1,20 @@ +>seq1 +GACTCATGATTTCTTACCTATTAGTGGTTGAACATC +>seq2 +GTGATATGTATGTTGACGGCCATAAGGCTGCTTCTT +>seq3 +GTTGTCGATAGAACTTCATGTGCCTGTAAAACAAGT +>seq4 +ACCAACCAGAACGTGAAAAAGCGTCCTGCGTGTAGC +>seq5 +GTTTATGTTGGTTTCATGGTTTTGTCTAACTTTATC +>seq6 +GCTTTACCGTCTTTCCAGAAATTGTTCCAAGTATCG +>seq7 +GCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGC +>seq8 +GTTATAACGCCGAAGCGGTAAAAATTTTTATTTTTT +>seq9 +GTTCTCACTTCTGTTACTCCAGCTTCTTCGGCACCT +>seq10 +GTGGCCTGTTGATTCTAAAGGTTAGTTTCTTCACGC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/faseq.loc.sample Mon May 19 12:33:09 2014 -0400 @@ -0,0 +1,26 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use genome fasta sequence files. The faseq.loc file has this format +#(white space characters are TAB characters): +# +# <GenomeBuild> <dir> +# +# In the dir, each file is fasta format and contains only one sequence. So, +#for example, if you had hg18 fasta sequences stored in /depot/data2/galaxy/faseq/hg18, +#then your faseq.loc entry would look like this: +# +#hg18 /depot/data2/galaxy/faseq/hg18 +# +#and your /depot/data2/galaxy/faseq/hg18 directory would contain all of +#your fasta sequence files (e.g.): +# +#-rw-r--r-- 1 wychung galaxy 138082251 2008-04-16 11:57 chr10.fa +#-rw-r--r-- 1 wychung galaxy 115564 2008-04-16 11:57 chr10_random.fa +#-rw-r--r-- 1 wychung galaxy 137141451 2008-04-16 11:58 chr11.fa +#...etc... +#Your faseq.loc file should include an entry per line for each set of fasta +#sequence files you have stored. For example: +# +#hg18 /depot/data2/galaxy/faseq/hg18 +#mm9 /depot/data2/galaxy/faseq/mm9 +#Arabidopsis /depot/data2/galaxy/faseq/Arabidopsis +#...etc...
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon May 19 12:33:09 2014 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="rmap" version="2.05"> + <repository changeset_revision="e0eed0cb34bf" name="package_rmap_2_05" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>