Mercurial > repos > fabio > sbtas_se

diff search.py @ 0:00d6e82d74e9 draft
Uploaded 20180122
author: fabio
date: Mon, 22 Jan 2018 16:41:50 -0500
children: 4291c9d1ff07
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/search.py	Mon Jan 22 16:41:50 2018 -0500
@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+
+# https://github.com/ross/requests-futures
+# http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests
+
+import os, uuid
+import optparse
+import requests
+from requests_futures.sessions import FuturesSession
+
+#### UV0 ####
+# proxy to uv0
+#service_url = "http://deputy.bx.psu.edu/";
+# url to query page
+#query_url = service_url+"query.php";
+# url to echo page: just return 'it works!'
+#echo_url = service_url+"echo.php";
+#############
+
+#### NN14 ####
+service_url = "http://nn14.galaxyproject.org:8080/";
+query_url = service_url+"tree/0/query";
+##############
+
+'''
+# synchronous
+def echo( options, args ):
+    # create a session
+    session = requests.Session()
+    # make a sync get request
+    resp = session.get(echo_url)
+    # check for response status code
+    resp_code = resp.status_code;
+    if resp_code == requests.codes.ok:
+        # get output file path
+        output_file_path = options.output;
+        # write response on the output file
+        with open(output_file_path, 'w') as out:
+            #out.write(resp.data);
+            out.write(resp.content);
+        return 0;
+    else:
+        return resp_code;
+'''
+
+# asynchronous
+def async_request( options, args, payload ):
+    # add additional parameters to the payload
+    payload["tree_id"] = str(options.treeid);
+    payload["search_mode"] = str(options.search);
+    payload["exact_algorithm"] = str(options.exact);
+    payload["search_threshold"] = str(options.sthreshold);
+    # create a session
+    session = FuturesSession();
+    # make an async post request with requests-futures
+    future_req = session.post(query_url, data=payload);
+    # wait for the request to complete, if it has not already
+    resp = future_req.result();
+    # check for response status code
+    resp_code = resp.status_code;
+    # get output file path
+    output_file_path = options.output;
+    # write response on the output file
+    with open(output_file_path, 'w') as out:
+        #out.write(resp.data);
+        out.write(str(resp_code)+"\n"+str(resp.content));
+    if resp_code == requests.codes.ok:
+        return 0;
+    else:
+        return resp_code;
+
+def srase_query( options, args ):
+    multiple_files = {};
+    comma_sep_file_paths = options.files;
+    #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths)));
+    # check if options.files contains at least one file path
+    if comma_sep_file_paths is not None:
+        # split file paths
+        file_paths = comma_sep_file_paths.split(",");
+        # split file names
+        comma_sep_file_names = str(options.names);
+        #print("names: "+str(comma_sep_file_names));
+        file_names = comma_sep_file_names.split(",");
+        # populate a dictionary with the files containing the sequences to query
+        for idx, file_path in enumerate(file_paths):
+            file_name = file_names[idx];
+            with open(file_path, 'r') as content_file:
+                content = content_file.read()
+                multiple_files[file_name] = content;
+                #print(file_name+": "+content+"\n");
+        if len(multiple_files) > 0:
+            return async_request( options, args,  multiple_files );
+            #return echo( options, args );
+    else:
+        search_mode = str(options.search);
+        text_content = "";
+        if search_mode == "0":
+            # try with the sequence in --sequence
+            text_content = options.sequences;
+        elif search_mode == "1":
+            # try with the fasta content in --fasta
+            text_content = options.fasta;
+        #print("sequences: "+text_content);
+        # check if options.sequences contains a list of sequences (one for each row)
+        if text_content is not None:
+            text_content = str(text_content);
+            if text_content.strip():
+                if search_mode == "0":
+                    # populate a dictionary with the files containing the sequences to query
+                    seq_counter = 0;
+                    sequences_arr = text_content.split("__cn__");
+                    for seq in sequences_arr:
+                        seq_index = 'sequence'+str(seq_counter);
+                        multiple_files[seq_index] = seq;
+                        #print(str(seq_counter)+": "+seq);
+                        seq_counter += 1;
+                elif search_mode == "1":
+                    multiple_files["fasta"] = text_content;
+                return async_request( options, args, multiple_files );
+                #return echo( options, args );
+            else:
+                return -1;
+    return -1;
+
+def __main__():
+    # Parse the command line options
+    usage = "Usage: search.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --output output_file_path";
+    parser = optparse.OptionParser(usage = usage);
+    parser.add_option("-i", "--treeid", type="string",
+                    action="store", dest="treeid", help="string representing the tree id");
+    parser.add_option("-f", "--files", type="string",
+                    action="store", dest="files", help="comma separated files path");
+    parser.add_option("-n", "--names", type="string",
+                    action="store", dest="names", help="comma separated names associated to the files specified in --files");
+    parser.add_option("-s", "--sequences", type="string",
+                    action="store", dest="sequences", help="contains a list of sequences (one for each row)");
+    parser.add_option("-a", "--fasta", type="string",
+                    action="store", dest="fasta", help="contains the content of a fasta file");
+    parser.add_option("-x", "--search", type="int", default=0,
+                    action="store", dest="search", help="search mode");
+    parser.add_option("-e", "--exact", type="int", default=0,
+                    action="store", dest="exact", help="exact algorithm (required if search is 1 only)");
+    parser.add_option("-t", "--sthreshold", type="string",
+                    action="store", dest="sthreshold", help="threshold applied to the search algrithm");
+    parser.add_option("-o", "--output", type="string",
+                    action="store", dest="output", help="output file path");
+    parser.add_option("-v", "--version", action="store_true", dest="version",
+                    default=False, help="display version and exit");
+    (options, args) = parser.parse_args();
+    if options.version:
+        print __version__;
+    else:
+        srase_query( options, args );
+
+if __name__ == "__main__": __main__()
author	fabio
date	Mon, 22 Jan 2018 16:41:50 -0500
parents
children	4291c9d1ff07