changeset 3:755662977150 draft

Uploaded repo
author jfallmann
date Wed, 01 Feb 2017 09:59:29 -0500
parents 9b1beb18b477
children fccb58bd1fae
files aresite2/RestAPI.pl aresite2/aresite2.py aresite2/aresite2.xml aresite2/test-data/AREsite2_Rest_ATTTA_cxcl2_Homo_sapiens.bed
diffstat 4 files changed, 318 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aresite2/RestAPI.pl	Wed Feb 01 09:59:29 2017 -0500
@@ -0,0 +1,135 @@
+#!/usr/bin/env perl
+
+#Copyright (C) 2015 Joerg Fallmann E<lt>joerg.fallmann@univie.ac.atE<gt>
+#This library is free software; you can redistribute it and/or modify
+#it under the same terms as Perl itself, either Perl version 5.10.0 or,
+#at your option, any later version of Perl 5 you may have available.
+#This program is distributed in the hope that it will be useful, but
+#WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+#General Public License for more details.
+#With code fragments from https://github.com/Ensembl/ensembl-rest/wiki/Example-Perl-Client
+#Last changed Time-stamp: <2015-10-30 15:25:52> by joerg.fallmann@univie.ac.at
+
+########## Load Modules ##########
+#### We use HTTP::Tiny simple to send a GET request to aresite2
+#### The returned JSON object is parsed with the module JSON
+#### Data::Dumper is used to print the returned hash ref
+use strict;
+use warnings;
+use HTTP::Tiny;
+use Time::HiRes;
+#use LWP::Simple;
+use JSON qw( decode_json );
+use Data::Dumper;
+use Getopt::Long qw( :config posix_default bundling no_ignore_case );
+use Pod::Usage;
+
+########## MAIN ##########
+
+########## Define variables ##########
+my ($VERBOSE, $species, @motifs, $gene, $list);
+
+########## Process Commandline ##########
+Getopt::Long::config('no_ignore_case');
+pod2usage(-verbose => 0)
+	unless GetOptions(
+		"species|s=s" => \$species,
+		"motifs|o=s"  => \@motifs,
+		"gene|g=s"	  => \$gene,
+		"help|h"			=> sub{pod2usage(-verbose => 1)},
+		"man|m"				=> sub{pod2usage(-verbose => 2)},      
+		"verbose"			=> sub{ $VERBOSE++ }
+	);
+
+########## Get PID and print command ##########
+my $pid = $$;
+(my $job = `cat /proc/$pid/cmdline`)=~ s/\0/ /g;
+print STDERR "You called ",$job,"\n";
+
+########## Send request ##########
+my $http = HTTP::Tiny->new();
+### Define url
+my $url = 'http://rna.tbi.univie.ac.at/AREsite2/api/';
+my $urltest = 'https://rest.ensembl.org/info/species?content-type=application/json';
+my $global_headers = { 'Content-Type' => 'application/json' };
+my $last_request_time = Time::HiRes::time();
+my $request_count = 0;
+
+### Define gene, species, and a comma-separated list of motifs
+$species = "Homo_sapiens" unless (defined $species);
+$list = join(",",split(/,/,join(",",@motifs))) if (@motifs);
+$list = "ATTTA" unless (defined $list);
+$gene = "cxcl2" unless (defined $gene);
+
+### Fetch response and create hash ref from response
+my $aresite = run();
+
+### print hash dump to STDOUT
+print Dumper (\$aresite);
+
+### Check for request errors
+print STDOUT "ERROR:\t".$aresite->{message}."\n" if (defined $aresite->{reason});
+
+### print some values to STDOUT if no error
+print STDOUT "GENE: $aresite->{id}\tENSEMBL: $aresite->{ensid}\tCoords: $aresite->{coordinates}\n" unless (defined $aresite->{reason});
+
+
+sub run {
+
+	### Create query
+	my $query = join("&","?query=$gene","species=$species","list=$list");
+	$url .= $query;
+	my $aresite = perform_json_action($url);
+	return $aresite;
+}
+
+sub perform_json_action {
+	my ($endpoint) = @_;
+	my $headers = $global_headers;
+	my $content = perform_rest_action($endpoint, $headers);
+	return {} unless $content;
+	my $json = decode_json($content);
+	return $json;
+}
+
+sub perform_rest_action {
+	my ($endpoint, $headers) = @_;
+	$headers ||= {};
+	$headers->{'Content-Type'} = 'application/json' unless exists $headers->{'Content-Type'};
+	if($request_count == 15) { # check every 15
+		my $current_time = Time::HiRes::time();
+		my $diff = $current_time - $last_request_time;
+		# if less than a second then sleep for the remainder of the second
+		if($diff < 1) {
+			Time::HiRes::sleep(1-$diff);
+		}
+		# reset
+		$last_request_time = Time::HiRes::time();
+		$request_count = 0;
+	}
+	
+	my $url = $endpoint;
+	print STDERR "Fetching from ",$url,"\n";
+	my $response = $http->get($url, {headers => $headers});
+	my $status = $response->{status};
+	if(!$response->{success}) {
+		# Quickly check for rate limit exceeded & Retry-After (lowercase due to our client)
+		if($status == 429 && exists $response->{headers}->{'retry-after'}) {
+			my $retry = $response->{headers}->{'retry-after'};
+			Time::HiRes::sleep($retry);
+			# After sleeping see that we re-request
+			return perform_rest_action($endpoint, $headers);
+		}
+		else {
+			my ($status, $reason) = ($response->{status}, $response->{reason});
+			die "Failed for $endpoint! Status code: ${status}. Reason: ${reason}\n";
+		}
+	}
+	$request_count++;
+	if(length $response->{content}) {
+		return $response->{content};
+	}
+	return;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aresite2/aresite2.py	Wed Feb 01 09:59:29 2017 -0500
@@ -0,0 +1,119 @@
+# A simple tool to connect to the AREsite server and retrieve feature
+# information using the AREsite REST Interface.
+# Parts of this code are from https://toolshed.g2.bx.psu.edu/repos/earlhaminst/ensembl_get_feature_info
+from __future__ import print_function
+
+import json
+import optparse
+import sys
+import urllib
+import urllib2
+import time
+import requests
+from six.moves.urllib.parse import urljoin
+
+usage = "usage: %prog [options] arg1 arg2"
+parser = optparse.OptionParser(usage=usage)
+parser.add_option('-g', '--gene', help='Gene ID to search for')
+parser.add_option('-m', '--motif', help='Motif to look for', default='ATTTA', type=str)
+parser.add_option('-s', '--species', type='choice',
+                  choices=['Homo_sapiens', 'Mus_musculus', 'Danio_rerio', 'Drosophila_melanogaster', 'Caenorhabditis_elegans'], default='Homo_sapiens',
+                  help='Specify the species to investigate')
+options, args = parser.parse_args()
+
+if options.gene is None:
+    raise Exception('- Specify the gene you want to look for!')
+
+if "," in options.motif :
+    raise Exception('- Please only search for single motifs at once')
+
+class AREsiteRestClient(object):
+    def __init__(self, server='http://rna.tbi.univie.ac.at/AREsite2/api/', reqs_per_sec=1):
+        self.server = server
+        self.reqs_per_sec = reqs_per_sec
+        self.req_count = 0
+        self.last_req = 0
+
+    def perform_rest_action(self, endpoint, hdrs=None, params=None):
+        if hdrs is None:
+            hdrs = {}
+
+        if 'Content-Type' not in hdrs:
+            hdrs['Content-Type'] = 'application/json'
+
+        if params:
+            endpoint += '?' + urllib.urlencode(params)
+
+        data = None
+
+        # check if we need to rate limit ourselves
+        if self.req_count >= self.reqs_per_sec:
+            delta = time.time() - self.last_req
+            if delta < 1:
+                time.sleep(1 - delta)
+            self.last_req = time.time()
+            self.req_count = 0
+
+        try:
+            request = urllib2.Request(self.server + endpoint, headers=hdrs)
+            response = urllib2.urlopen(request)
+            content = response.read()
+            if content:
+                data = json.loads(content)
+            self.req_count += 1
+
+        except urllib2.HTTPError, e:
+            # check if we are being rate limited by the server
+            if e.code == 429:
+                if 'Retry-After' in e.headers:
+                    retry = e.headers['Retry-After']
+                    time.sleep(float(retry))
+                    self.perform_rest_action(endpoint, hdrs, params)
+            else:
+                sys.stderr.write('Request failed for {0}: Status code: {1.code} Reason: {1.reason}\n'.format(endpoint, e))
+
+        return data
+
+    def get_motifs(self, species, gene, motifs):
+        query = str('?query={0}&species={1}&list={2}'.format(gene, species, motifs))
+        if query:
+            aresite = self.perform_rest_action(
+                query
+            )
+            return aresite
+        return None
+
+def run(species, gene, motifs):
+    client = AREsiteRestClient()
+    aresite = client.get_motifs(species, gene, motifs)
+    if aresite:
+
+        mots        = aresite["exact_motifs"]
+        starts      = aresite["motif_starts"]
+        ends        = aresite["motif_ends"]
+        chrs        = aresite["chromosomes"]
+        strands     = aresite["strands"]
+        transcripts = aresite["transcripts"]
+        genes       = aresite["genes"]
+        evh         = aresite["hur_evidence"]
+        evt         = aresite["ttp_evidence"]
+        eva         = aresite["auf_evidence"]
+        anno        = aresite["annotation"]
+        
+        aresite = zip(chrs,starts,ends,mots,anno,strands,genes,transcripts,evh,evt,eva)
+
+        def getKey(item):
+            return item[1]
+
+        aresite = sorted(aresite, key=getKey)
+        
+#        outfile = 'AREsite2_Rest_{0}_{1}_{2}.bed'.format(motifs,gene,species)       
+#        f = open(outfile, 'w')
+        
+        for i in range(len(aresite)):
+            #            f.write("\t".join(aresite[i])+"\n")
+            print ("\t".join(aresite[i])+"\n")
+                
+            
+if __name__ == '__main__':
+    run(options.species, options.gene, options.motif)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aresite2/aresite2.xml	Wed Feb 01 09:59:29 2017 -0500
@@ -0,0 +1,53 @@
+<tool id="AREsite2_REST" name="Get motifs from AREsite2" version="0.1.2">
+    <description>AREsite2 REST Interface</description>
+    <requirements>
+        <requirement type="package" version="2.12.4">requests</requirement>
+        <requirement type="package" version="1.10.0">six</requirement>
+    </requirements>
+    <command>
+<![CDATA[
+python $__tool_directory__/aresite2.py
+-g $gene
+-m "$motif"
+-s $species_selector
+> $AREsite2_REST_out
+]]>
+    </command>
+
+    <inputs>
+    <param name="gene" type="text" label="Gene ID to search for" help="Try e.g. cxcl2" />
+    <param name="species_selector" type="select" label="Select species">
+        <option value="Homo_sapiens" selected="true"></option>
+        <option value="Mus_musculus"></option>
+	<option value="Danio_rerio"></option>
+	<option value="Drosophila_melanogaster"></option>
+	<option value="Caenorhabditis_elegans"></option>
+    </param>
+    <param name="motif" type="text" label="Motif to search for" help="Choose a single motif, e.g. ATTTA" />
+    </inputs>
+
+    <outputs>
+        <data name="AREsite2_REST_out" label="AREsite2_Rest_${motif}_${gene}_${species_selector}" type="text" format="bed" />
+    </outputs>
+
+    <tests>
+        <test>
+          <param name="species" value="Homo_sapiens" />
+          <param name="motif" value="ATTTA" />
+	  <param name="gene" value="Cxcl2" />
+          <output name="outputFile" file="AREsite2_Rest_ATTTA_cxcl2_Homo_sapiens.bed" />
+        </test>
+    </tests>
+
+    <help>
+<![CDATA[
+**What it does**
+
+Retrieve motif information in BED format from AREsite2 using its REST API.
+
+]]>
+    </help>
+    <citations>
+      <citation type="doi">10.1093/nar/gkv1238</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aresite2/test-data/AREsite2_Rest_ATTTA_cxcl2_Homo_sapiens.bed	Wed Feb 01 09:59:29 2017 -0500
@@ -0,0 +1,11 @@
+chr4	74097045	74097050	ATTTA	Exon^3UTR	-	ENSG00000081041	ENST00000508487	NA	NA	NA
+chr4	74097083	74097088	ATTTA	Exon^3UTR	-	ENSG00000081041	ENST00000508487	NA	NA	NA
+chr4	74097257	74097262	ATTTA	Exon^3UTR	-	ENSG00000081041	ENST00000508487	NA	NA	NA
+chr4	74097449	74097454	ATTTA	Exon^3UTR	-	ENSG00000081041	ENST00000508487	NA	NA	NA
+chr4	74097499	74097504	ATTTA	Exon^3UTR	-	ENSG00000081041	ENST00000508487	NA	<a href="http://www.genomebiology.com/content/supplementary/gb-2014-15-1-r12-s2.csv">http://www.genomebiology.com/content/supplementary/gb-2014-15-1-r12-s2.csv</a>	NA
+chr4	74097589	74097594	ATTTA	Exon^3UTR	-	ENSG00000081041	ENST00000508487	NA	NA	NA
+chr4	74097593	74097598	ATTTA	Exon^3UTR	-	ENSG00000081041	ENST00000508487	NA	NA	NA
+chr4	74097597	74097602	ATTTA	Exon^3UTR	-	ENSG00000081041	ENST00000508487	NA	NA	NA
+chr4	74097601	74097606	ATTTA	Exon^3UTR	-	ENSG00000081041	ENST00000508487	NA	NA	NA
+chr4	74097605	74097610	ATTTA	Exon^3UTR	-	ENSG00000081041	ENST00000508487	NA	NA	NA
+chr4	74098127	74098132	ATTTA	Intron^CDS	-	ENSG00000081041	ENST00000508487	NA	NA	NA