Mercurial > repos > yating-l > snap
changeset 0:57299471d6c1 draft default tip
planemo upload commit 402a746f69e9f1dbb57007536fc36dc6ce3180de
author | yating-l |
---|---|
date | Wed, 12 Apr 2017 17:37:47 -0400 |
parents | |
children | |
files | Group.py README.md gff2Togff3.py readme.rst snap.xml test-data/thale.dna.gz tool_dependencies.xml |
diffstat | 7 files changed, 361 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Group.py Wed Apr 12 17:37:47 2017 -0400 @@ -0,0 +1,59 @@ +from operator import itemgetter + +# Input: A group: a list that contains lines belonging to the same gene +class Group: + # Modify "type" column and "attributes" colunm, initialize id, gene, source, stream + def __init__(self, group): + self.group = group + self.id = str(group[0][0]) + self.source = str(group[0][1]) + self.stream = str(group[0][6]) + self.gene = str(group[0][8]) + for x in range(0, len(group)): + self.group[x][2] = "CDS" + self.group[x][8] = "Parent=mRNA_" + self.gene + self.group[x][3] = int(self.group[x][3]) + self.group[x][4] = int(self.group[x][4]) + + # Order the group elements accoriding to Stream, +: ascanding order, -: descanding order + def order(self): + self.num = len(self.group) + if self.stream == "+": + self.group = sorted(self.group, key=itemgetter(3)) + self.min_item = self.group[0][3] + self.max_item = self.group[self.num-1][4] + elif self.stream == "-": + self.group = sorted(self.group, key=itemgetter(3), reverse=True) + self.min_item = self.group[self.num-1][3] + self.max_item = self.group[0][4] + else: + print("Stream in invalid!\n") + + def phaseCalculator(self, i, donor = 0): + if i >= self.num: + pass + else: + self.type = self.group[i][2] + self.size = self.group[i][4] - self.group[i][3] + 1 + if self.num == 1: + if self.type == "Eterm": + self.group[i][7] = str(self.size % 3) + else: + self.group[i][7] = "0" + elif self.num > 1 and i < self.num: + accept = (3 - donor) % 3 + self.group[i][7] = str(accept) + donor = (self.size - accept) % 3 + i = i + 1 + self.phaseCalculator(i, donor) + + + def writer(self, gff3): + self.order() + self.phaseCalculator(0) + gff3.write(self.id + "\t" + self.source + "\tgene\t" + str(self.min_item) + "\t" + str(self.max_item) + "\t.\t" + self.stream + "\t.\t" + "ID=" + self.gene + "\n") + gff3.write(self.id + "\t" + self.source + "\tmRNA\t" + str(self.min_item) + "\t" + str(self.max_item) + "\t.\t" + self.stream + "\t.\t" + "ID=mRNA_" + self.gene + ";Parent=" + self.gene + "\n") + for x in range(0, len(self.group)): + self.group[x][3] = str(self.group[x][3]) + self.group[x][4] = str(self.group[x][4]) + gff3.write("\t".join(self.group[x]) + "\n") \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Wed Apr 12 17:37:47 2017 -0400 @@ -0,0 +1,2 @@ +# SNAP +Galaxy wrapper for SNAP gene prediction tool
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff2Togff3.py Wed Apr 12 17:37:47 2017 -0400 @@ -0,0 +1,59 @@ +import argparse +import sys +import fileinput +from Group import Group + +def main(): + parser = argparse.ArgumentParser(description='Get a gff file and the output gff3 file') + parser.add_argument('--input', help='input gff file') + parser.add_argument('--output', help='output gff3 file', required=True) + args = parser.parse_args() + input = args.input + output = args.output + if not sys.stdin.isatty(): + c = Convertor(sys.stdin, output) + else: + c = Convertor(input, output) + c.convert() + +class Convertor: + def __init__(self, input, output): + if type(input) is str: + with open(input) as self.f: + self.li = [line.rstrip().split("\t") for line in self.f] + else: + self.li = [line.rstrip().split("\t") for line in input] + self.gff3 = open(output, "w") + self.gff3.write("##gff-version 3\n") + + def convert(self): + index = 0 + while index in range(0, len(self.li)): + index = self.groupAsgene(index) + self.gff3.close() + + + def groupAsgene(self, start = 0): + gene = self.li[start][8] + index = len(self.li) + for i in range(start+1, len(self.li)): + line = self.li[i] + if gene != line[8]: + index = i + break + if index >= len(self.li): + group = self.li[start:len(self.li)] + else: + group = self.li[start:index] + g = Group(group) + g.writer(self.gff3) + return index + + + + +if __name__ == "__main__": + main() + + + \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.rst Wed Apr 12 17:37:47 2017 -0400 @@ -0,0 +1,46 @@ +Galaxy wrapper for SNAP +======================== + +This wrapper is copyright 2016-2017 by Yating Liu + +This is a wrapper for the gene prediction tool SNAP. SNAP is a general purpose gene finding program suitable for both eukaryotic and prokaryotic genomes. SNAP is an acroynm for Semi-HMM-based Nucleic Acid +Parser. + +Reference +---------------------- + + Korf I. Gene finding in novel Genomes. BMC Bioinformatics 2004, 5:59 + +Installation +----------------------- + +To install SNAP, please download SNAP from + +http://korflab.ucdavis.edu/Software/snap-2013-11-29.tar.gz + +and follow the installation instractions. The software is routinely compiled and tested on Mac OS X. It should compile +fine on any Linux/Unix type operating systems. +The default compiler is gcc. If you have gcc installed, the easiest is to just compile as: +``` + make +``` + +The ZOE environment variable is used by SNAP to find the HMM files. Set this +to the directory containing this file. For example, if you unpackaged the tar-ball in /usr/local/snap, set the ZOE environment variable to /usr/local/snap + +``` + setenv ZOE /usr/local/snap # csh, tcsh, etc +``` + or +``` + export ZOE=/usr/local/snap # sh, bash, etc +``` +To install the wrapper copy the snap folder in the galaxy tools and modify the $GALAXY_ROOT/config/tool_conf.xml file to make the tool available to Galaxy. For example: +``` +<tool file="galaxy/tools/myTools/snap/snap.xml" /> +``` + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/snap.xml Wed Apr 12 17:37:47 2017 -0400 @@ -0,0 +1,170 @@ +<tool id="snap" name="Semi-HMM-based Nucleic Acid Parser (SNAP)" version="1.0"> + <requirements> + <requirement type="package" version="1.0">snap</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command><![CDATA[ + snap + #if $settings.advanced == "advanced" + $settings.lcmask + #if $settings.strand == "true" + $settings.strand.onestrand + #end if + #end if + #if $proteins == "-aa": + $proteins "$output2" + #end if + #if $transcripts == "-tx": + $transcripts "$output3" + #end if + -gff + -quiet + $organism + $input1 + | python $__tool_directory__/gff2Togff3.py --output $output1 + ]]></command> + <inputs> + <param type="data" name="input1" format="fasta" /> + <param name="organism" label="Model Organism" type="select" multiple="false" format="text" help="Choose a specialised trainingset."> + <option value="A.gambiae.hmm">A.gambiae</option> + <option value="A.mellifera.hmm">A.mellifera</option> + <option value="A.thaliana.hmm">Arabidopsis thaliana</option> + <option value="Acanium.hmm">Acanium</option> + <option value="At.hmm">Thale</option> + <option value="B.malayi.hmm">Brugia</option> + <option value="B.mori.hmm">B.mori</option> + <option value="C.elegans.hmm">C.elegans</option> + <option value="C.intestinalis.hmm">Ciona</option> + <option value="Ce.hmm">Ce</option> + <option value="D.melanogaster.hmm">Drosophila melanogaster</option> + <option value="Dm.hmm">Dm</option> + <option value="ixodesA.hmm">ixodesA</option> + <option value="ixodesB.hmm">ixodesB</option> + <option value="mam39-ro.hmm">mam39-ro</option> + <option value="mam39.hmm">mam39</option> + <option value="mam46-ro.hmm">mam46-ro</option> + <option value="mam46.hmm">mam46</option> + <option value="mam54-ro.hmm">mam54-ro</option> + <option value="mam54.hmm">mam54</option> + <option value="mamiso.hmm">mamiso</option> + <option value="minimal.hmm">minimal</option> + <option value="Nasonia.hmm">Nasonia</option> + <option value="nGASP.hmm">nGASP</option> + <option value="nGASPr.hmm">nGASPr</option> + <option value="O.sativa.hmm">O.sativa</option> + <option value="Os.hmm">Os</option> + </param> + <param name="proteins" type="select" label="Create FASTA file of proteins"> + <option value="">No</option> + <option value="-aa">Yes</option> + </param> + <param name="transcripts" type="select" label="Create FASTA file of transcripts"> + <option value="">No</option> + <option value="-tx">Yes</option> + </param> + <conditional name="settings"> + <param name="advanced" type="select" label="Specify advanced parameters"> + <option value="simple" selected="true">No, use program defaults.</option> + <option value="advanced">Yes, see full parameter list.</option> + </param> + <when value="simple"> + </when> + <when value="advanced"> + <param name="lcmask" type="boolean" label="treat lowercase as N" truevalue="-lcmask" falsevalue="" /> + <conditional name="strand"> + <param name="strand" type="boolean" label="predict on one strand only" /> + <when value="true"> + <param name="onestrand" type="select" label="Specify which strand to predict"> + <option value="-plus">predict on plus strand only.</option> + <option value="-minus">predict on minus strand only.</option> + </param> + </when> + <when value="false"> + </when> + </conditional> + </when> + </conditional> + </inputs> + <outputs> + <data name="output1" format="gff3" label="${tool.name} on ${on_string}: GTF/GFF3"> + </data> + <data name="output2" format="fasta" label="${tool.name} on ${on_string}: Protein sequence"> + <filter>proteins == "-aa"</filter> + </data> + <data name="output3" format="fasta" label="${tool.name} on ${on_string}: Coding sequence"> + <filter>transcripts == "-tx"</filter> + </data> + </outputs> + <tests> + <test> + <param name="input1" value="thale.dna.gz"/> + <param name="organism" value="At.hmm" /> + <output name="output1" file="thale.gff3"/> + </test> + </tests> + <help><![CDATA[ + The general form of the snap command line is: + + snap <HMM file> <FASTA file> [options] + +HMM file: + + The most convenient way to specify the HMM file is by name. This requires + that the ZOE environment variable is set. In this case, snap will look + for the HMM file in $ZOE/HMM. You may also specify the HMM file by an + explicit path. The following are equivalent if $ZOE is in /usr/local: + + snap C.elegans.hmm ... + snap /usr/local/Zoe/HMM/C.elegans.hmm ... + snap worm ... # there are a few convenient aliases in $ZOE/HMM + +FASTA file: + + If you have several sequences to analyze, it is more efficient to run + snap on a concatenated FASTA file rather than separate runs on single + sequence files. The seqeuence may be in a compressed format + + If sequences have been masked with lowercase letters, use -lcmask to + prevent exons from appearing in masked DNA. + +Output: + + Annotation is reported to stdout in a non-standard format (ZFF). You can + change to GFF or ACEDB with the -gff or -ace options. Proteins and + transcripts are reported to FASTA files with the -aa and -tx options. + +External definitions: + + SNAP allows you to adjust the score of any sequence model at any point + in a sequence. This behavior is invoked by giving a ZFF file to SNAP: + + snap <hmm> <sequence> -xdef <ZFF file> + + Each feature description uses the 'group' field to issue a command: + + SET set the score + ADJ adjust the score up or down + OK set non-cannonical scores + + >FOO + Acceptor 120 120 + +50 . . . SET (sets an Acceptor to 50) + Donor 212 212 + -20 . . . ADJ (lowers a Donor by -20) + Inter 338 579 + -2 . . . ADJ (lowers Inter by -2 in a range) + Coding 440 512 - +3 . . . ADJ (raises Coding by +3 in a range) + Donor 625 638 + -5 . . . OK (sets range of odd Donors to -5) + +If the output has scrolled off your screen, try 'snap -help | more' + + ]]></help> + <citations> + <citation type="bibtex"> +@misc{renameTODO, + author = {LastTODO, FirstTODO}, + year = {TODO}, + title = {TODO}, + url = {http://korflab.ucdavis.edu/Software/snap-2013-11-29.tar.gz}, +}</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Apr 12 17:37:47 2017 -0400 @@ -0,0 +1,25 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="snap" version="1.0"> + <install version="1.0"> + <actions_group> + <actions> + <action type="download_by_url">http://korflab.ucdavis.edu/Software/snap-2013-11-29.tar.gz</action> + <action type="shell_command">make</action> + <action type="move_directory_files"> + <source_directory>.</source_directory> + <destination_directory>$INSTALL_DIR</destination_directory> + </action> + <action type="set_environment"> + <environment_variable name="ZOE" action="set_to">$INSTALL_DIR</environment_variable> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR</environment_variable> + </action> + </actions> + </actions_group> + </install> + <readme>SNAP is a general purpose gene finding program suitable for both eukaryotic + and prokaryotic genomes. SNAP is an acroynm for Semi-HMM-based Nucleic Acid + Parser. + </readme> + </package> +</tool_dependency>