Mercurial > repos > jfallmann > aresite2
changeset 3:755662977150 draft
Uploaded repo
author | jfallmann |
---|---|
date | Wed, 01 Feb 2017 09:59:29 -0500 |
parents | 9b1beb18b477 |
children | fccb58bd1fae |
files | aresite2/RestAPI.pl aresite2/aresite2.py aresite2/aresite2.xml aresite2/test-data/AREsite2_Rest_ATTTA_cxcl2_Homo_sapiens.bed |
diffstat | 4 files changed, 318 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/aresite2/RestAPI.pl Wed Feb 01 09:59:29 2017 -0500 @@ -0,0 +1,135 @@ +#!/usr/bin/env perl + +#Copyright (C) 2015 Joerg Fallmann E<lt>joerg.fallmann@univie.ac.atE<gt> +#This library is free software; you can redistribute it and/or modify +#it under the same terms as Perl itself, either Perl version 5.10.0 or, +#at your option, any later version of Perl 5 you may have available. +#This program is distributed in the hope that it will be useful, but +#WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +#General Public License for more details. +#With code fragments from https://github.com/Ensembl/ensembl-rest/wiki/Example-Perl-Client +#Last changed Time-stamp: <2015-10-30 15:25:52> by joerg.fallmann@univie.ac.at + +########## Load Modules ########## +#### We use HTTP::Tiny simple to send a GET request to aresite2 +#### The returned JSON object is parsed with the module JSON +#### Data::Dumper is used to print the returned hash ref +use strict; +use warnings; +use HTTP::Tiny; +use Time::HiRes; +#use LWP::Simple; +use JSON qw( decode_json ); +use Data::Dumper; +use Getopt::Long qw( :config posix_default bundling no_ignore_case ); +use Pod::Usage; + +########## MAIN ########## + +########## Define variables ########## +my ($VERBOSE, $species, @motifs, $gene, $list); + +########## Process Commandline ########## +Getopt::Long::config('no_ignore_case'); +pod2usage(-verbose => 0) + unless GetOptions( + "species|s=s" => \$species, + "motifs|o=s" => \@motifs, + "gene|g=s" => \$gene, + "help|h" => sub{pod2usage(-verbose => 1)}, + "man|m" => sub{pod2usage(-verbose => 2)}, + "verbose" => sub{ $VERBOSE++ } + ); + +########## Get PID and print command ########## +my $pid = $$; +(my $job = `cat /proc/$pid/cmdline`)=~ s/\0/ /g; +print STDERR "You called ",$job,"\n"; + +########## Send request ########## +my $http = HTTP::Tiny->new(); +### Define url +my $url = 'http://rna.tbi.univie.ac.at/AREsite2/api/'; +my $urltest = 'https://rest.ensembl.org/info/species?content-type=application/json'; +my $global_headers = { 'Content-Type' => 'application/json' }; +my $last_request_time = Time::HiRes::time(); +my $request_count = 0; + +### Define gene, species, and a comma-separated list of motifs +$species = "Homo_sapiens" unless (defined $species); +$list = join(",",split(/,/,join(",",@motifs))) if (@motifs); +$list = "ATTTA" unless (defined $list); +$gene = "cxcl2" unless (defined $gene); + +### Fetch response and create hash ref from response +my $aresite = run(); + +### print hash dump to STDOUT +print Dumper (\$aresite); + +### Check for request errors +print STDOUT "ERROR:\t".$aresite->{message}."\n" if (defined $aresite->{reason}); + +### print some values to STDOUT if no error +print STDOUT "GENE: $aresite->{id}\tENSEMBL: $aresite->{ensid}\tCoords: $aresite->{coordinates}\n" unless (defined $aresite->{reason}); + + +sub run { + + ### Create query + my $query = join("&","?query=$gene","species=$species","list=$list"); + $url .= $query; + my $aresite = perform_json_action($url); + return $aresite; +} + +sub perform_json_action { + my ($endpoint) = @_; + my $headers = $global_headers; + my $content = perform_rest_action($endpoint, $headers); + return {} unless $content; + my $json = decode_json($content); + return $json; +} + +sub perform_rest_action { + my ($endpoint, $headers) = @_; + $headers ||= {}; + $headers->{'Content-Type'} = 'application/json' unless exists $headers->{'Content-Type'}; + if($request_count == 15) { # check every 15 + my $current_time = Time::HiRes::time(); + my $diff = $current_time - $last_request_time; + # if less than a second then sleep for the remainder of the second + if($diff < 1) { + Time::HiRes::sleep(1-$diff); + } + # reset + $last_request_time = Time::HiRes::time(); + $request_count = 0; + } + + my $url = $endpoint; + print STDERR "Fetching from ",$url,"\n"; + my $response = $http->get($url, {headers => $headers}); + my $status = $response->{status}; + if(!$response->{success}) { + # Quickly check for rate limit exceeded & Retry-After (lowercase due to our client) + if($status == 429 && exists $response->{headers}->{'retry-after'}) { + my $retry = $response->{headers}->{'retry-after'}; + Time::HiRes::sleep($retry); + # After sleeping see that we re-request + return perform_rest_action($endpoint, $headers); + } + else { + my ($status, $reason) = ($response->{status}, $response->{reason}); + die "Failed for $endpoint! Status code: ${status}. Reason: ${reason}\n"; + } + } + $request_count++; + if(length $response->{content}) { + return $response->{content}; + } + return; +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/aresite2/aresite2.py Wed Feb 01 09:59:29 2017 -0500 @@ -0,0 +1,119 @@ +# A simple tool to connect to the AREsite server and retrieve feature +# information using the AREsite REST Interface. +# Parts of this code are from https://toolshed.g2.bx.psu.edu/repos/earlhaminst/ensembl_get_feature_info +from __future__ import print_function + +import json +import optparse +import sys +import urllib +import urllib2 +import time +import requests +from six.moves.urllib.parse import urljoin + +usage = "usage: %prog [options] arg1 arg2" +parser = optparse.OptionParser(usage=usage) +parser.add_option('-g', '--gene', help='Gene ID to search for') +parser.add_option('-m', '--motif', help='Motif to look for', default='ATTTA', type=str) +parser.add_option('-s', '--species', type='choice', + choices=['Homo_sapiens', 'Mus_musculus', 'Danio_rerio', 'Drosophila_melanogaster', 'Caenorhabditis_elegans'], default='Homo_sapiens', + help='Specify the species to investigate') +options, args = parser.parse_args() + +if options.gene is None: + raise Exception('- Specify the gene you want to look for!') + +if "," in options.motif : + raise Exception('- Please only search for single motifs at once') + +class AREsiteRestClient(object): + def __init__(self, server='http://rna.tbi.univie.ac.at/AREsite2/api/', reqs_per_sec=1): + self.server = server + self.reqs_per_sec = reqs_per_sec + self.req_count = 0 + self.last_req = 0 + + def perform_rest_action(self, endpoint, hdrs=None, params=None): + if hdrs is None: + hdrs = {} + + if 'Content-Type' not in hdrs: + hdrs['Content-Type'] = 'application/json' + + if params: + endpoint += '?' + urllib.urlencode(params) + + data = None + + # check if we need to rate limit ourselves + if self.req_count >= self.reqs_per_sec: + delta = time.time() - self.last_req + if delta < 1: + time.sleep(1 - delta) + self.last_req = time.time() + self.req_count = 0 + + try: + request = urllib2.Request(self.server + endpoint, headers=hdrs) + response = urllib2.urlopen(request) + content = response.read() + if content: + data = json.loads(content) + self.req_count += 1 + + except urllib2.HTTPError, e: + # check if we are being rate limited by the server + if e.code == 429: + if 'Retry-After' in e.headers: + retry = e.headers['Retry-After'] + time.sleep(float(retry)) + self.perform_rest_action(endpoint, hdrs, params) + else: + sys.stderr.write('Request failed for {0}: Status code: {1.code} Reason: {1.reason}\n'.format(endpoint, e)) + + return data + + def get_motifs(self, species, gene, motifs): + query = str('?query={0}&species={1}&list={2}'.format(gene, species, motifs)) + if query: + aresite = self.perform_rest_action( + query + ) + return aresite + return None + +def run(species, gene, motifs): + client = AREsiteRestClient() + aresite = client.get_motifs(species, gene, motifs) + if aresite: + + mots = aresite["exact_motifs"] + starts = aresite["motif_starts"] + ends = aresite["motif_ends"] + chrs = aresite["chromosomes"] + strands = aresite["strands"] + transcripts = aresite["transcripts"] + genes = aresite["genes"] + evh = aresite["hur_evidence"] + evt = aresite["ttp_evidence"] + eva = aresite["auf_evidence"] + anno = aresite["annotation"] + + aresite = zip(chrs,starts,ends,mots,anno,strands,genes,transcripts,evh,evt,eva) + + def getKey(item): + return item[1] + + aresite = sorted(aresite, key=getKey) + +# outfile = 'AREsite2_Rest_{0}_{1}_{2}.bed'.format(motifs,gene,species) +# f = open(outfile, 'w') + + for i in range(len(aresite)): + # f.write("\t".join(aresite[i])+"\n") + print ("\t".join(aresite[i])+"\n") + + +if __name__ == '__main__': + run(options.species, options.gene, options.motif)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/aresite2/aresite2.xml Wed Feb 01 09:59:29 2017 -0500 @@ -0,0 +1,53 @@ +<tool id="AREsite2_REST" name="Get motifs from AREsite2" version="0.1.2"> + <description>AREsite2 REST Interface</description> + <requirements> + <requirement type="package" version="2.12.4">requests</requirement> + <requirement type="package" version="1.10.0">six</requirement> + </requirements> + <command> +<![CDATA[ +python $__tool_directory__/aresite2.py +-g $gene +-m "$motif" +-s $species_selector +> $AREsite2_REST_out +]]> + </command> + + <inputs> + <param name="gene" type="text" label="Gene ID to search for" help="Try e.g. cxcl2" /> + <param name="species_selector" type="select" label="Select species"> + <option value="Homo_sapiens" selected="true"></option> + <option value="Mus_musculus"></option> + <option value="Danio_rerio"></option> + <option value="Drosophila_melanogaster"></option> + <option value="Caenorhabditis_elegans"></option> + </param> + <param name="motif" type="text" label="Motif to search for" help="Choose a single motif, e.g. ATTTA" /> + </inputs> + + <outputs> + <data name="AREsite2_REST_out" label="AREsite2_Rest_${motif}_${gene}_${species_selector}" type="text" format="bed" /> + </outputs> + + <tests> + <test> + <param name="species" value="Homo_sapiens" /> + <param name="motif" value="ATTTA" /> + <param name="gene" value="Cxcl2" /> + <output name="outputFile" file="AREsite2_Rest_ATTTA_cxcl2_Homo_sapiens.bed" /> + </test> + </tests> + + <help> +<![CDATA[ +**What it does** + +Retrieve motif information in BED format from AREsite2 using its REST API. + +]]> + </help> + <citations> + <citation type="doi">10.1093/nar/gkv1238</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/aresite2/test-data/AREsite2_Rest_ATTTA_cxcl2_Homo_sapiens.bed Wed Feb 01 09:59:29 2017 -0500 @@ -0,0 +1,11 @@ +chr4 74097045 74097050 ATTTA Exon^3UTR - ENSG00000081041 ENST00000508487 NA NA NA +chr4 74097083 74097088 ATTTA Exon^3UTR - ENSG00000081041 ENST00000508487 NA NA NA +chr4 74097257 74097262 ATTTA Exon^3UTR - ENSG00000081041 ENST00000508487 NA NA NA +chr4 74097449 74097454 ATTTA Exon^3UTR - ENSG00000081041 ENST00000508487 NA NA NA +chr4 74097499 74097504 ATTTA Exon^3UTR - ENSG00000081041 ENST00000508487 NA <a href="http://www.genomebiology.com/content/supplementary/gb-2014-15-1-r12-s2.csv">http://www.genomebiology.com/content/supplementary/gb-2014-15-1-r12-s2.csv</a> NA +chr4 74097589 74097594 ATTTA Exon^3UTR - ENSG00000081041 ENST00000508487 NA NA NA +chr4 74097593 74097598 ATTTA Exon^3UTR - ENSG00000081041 ENST00000508487 NA NA NA +chr4 74097597 74097602 ATTTA Exon^3UTR - ENSG00000081041 ENST00000508487 NA NA NA +chr4 74097601 74097606 ATTTA Exon^3UTR - ENSG00000081041 ENST00000508487 NA NA NA +chr4 74097605 74097610 ATTTA Exon^3UTR - ENSG00000081041 ENST00000508487 NA NA NA +chr4 74098127 74098132 ATTTA Intron^CDS - ENSG00000081041 ENST00000508487 NA NA NA