# HG changeset patch # User rnateam # Date 1495471522 14400 # Node ID 07ad2d77f28a06a267d7c96180c878652a6767e0 # Parent dff6a5a172212e6facd30e138f87a4c0d16a8bc8 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6 diff -r dff6a5a17221 -r 07ad2d77f28a preprocessing.xml --- a/preprocessing.xml Sat Mar 25 16:53:38 2017 -0400 +++ b/preprocessing.xml Mon May 22 12:45:22 2017 -0400 @@ -1,4 +1,4 @@ - + graphclust-wrappers @@ -7,41 +7,47 @@ + - + - - - - - - - - - - - - - - + + + + + + + + + + + - - 10.1093/bioinformatics/bts224 - - diff -r dff6a5a17221 -r 07ad2d77f28a splitSHAPE.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splitSHAPE.py Mon May 22 12:45:22 2017 -0400 @@ -0,0 +1,64 @@ +import os +import re +import sys + +shape_file = sys.argv[1] +win_size = int(sys.argv[2]) + +pattern = re.compile("^>.*$") +toWrite = "" + +count_for_id = 1 +seq_counter = 0 +new_id = "" + +seq_id = [] +seq_string = [] +orig_id = [] +name_file = "FASTA/data.names" +array_all_chunks = [] +with open(name_file, 'r') as f: + content = f.read() + lines = content.split('\n')[:-1] + for line in lines: + seq_id.append(int(line.split()[0])) + seq_string.append(line.split()[1]) + orig_id_srt = line.split()[3] + orig_id_srt = orig_id_srt.rsplit('_',1)[0] + orig_id.append(orig_id_srt) + + +react_dict = {} +react_arr = [] + +with open(shape_file, 'r') as shape: + content = shape.read() + lines = content.split('\n') + for line in lines: + if pattern.match(line): + line = line.replace('>','').strip() + react_arr=[] + react_dict[line] = react_arr + continue + else: + react_arr.append(line) + +toWrite = "" +chunks = [] +for i in range(len(orig_id)): + if not orig_id[i] in react_dict: + raise RuntimeError('Error key {} not found'.format(orig_id)) + + react_val = react_dict[orig_id[i]] + toWrite += '>' + str(seq_id[i]) + " " + seq_string[i] + "\n" + chunks = re.findall(r'\d+', seq_string[i]) + + for j in react_val[int(chunks[1])-1:int(chunks[2])]: + id_s = int(j.split()[0]) + + if id_s > win_size: + id_s = id_s - int(chunks[1]) + toWrite += str(id_s) + '\t' + j.split()[1] + "\n" + +with open("shape_data_split.react", 'w') as out: + out.write(toWrite)