Mercurial > repos > guerler > springsuite
changeset 23:5469e19f1f96 draft
"planemo upload commit 37a4c6844fd7ab1071ddf90f51915ec1a13c26b3"
author | guerler |
---|---|
date | Thu, 29 Oct 2020 13:04:47 +0000 (2020-10-29) |
parents | acaff61a09b2 |
children | 802daf2993b0 |
files | spring_minz.py spring_minz.xml test-data/dbCAN_log.txt test-data/pdb70_log.txt |
diffstat | 4 files changed, 168 insertions(+), 147 deletions(-) [+] |
line wrap: on
line diff
--- a/spring_minz.py Wed Oct 28 06:49:58 2020 +0000 +++ b/spring_minz.py Thu Oct 29 13:04:47 2020 +0000 @@ -3,120 +3,125 @@ import os def main(args): - inputs = list() - with open(args.inputlist) as file: - for index, line in enumerate(file): - name = line.strip() - inputs.append(name) - print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) - targets = list() - duplicates = 0 - with open(args.targetlist) as file: - for index, line in enumerate(file): - name = line.strip() - targets.append(name) - if name in inputs: - duplicates = duplicates + 1 - print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) - crossReference = dict() - with open(args.crossreference) as file: - for index, line in enumerate(file): - columns = line.split() - core = columns[0] - partner = columns[-1] - if core not in crossReference: - crossReference[core] = [] - crossReference[core].append(partner) - print ("Loaded cross reference from `%s`." % args.crossreference) - interactions = dict() - for targetName in targets: - targetDirectory = args.targetpath.rstrip("/") - targetFile = "%s/%s" % (targetDirectory, targetName) - matchScores(targetFile=targetFile, - targetName=targetName, - inputs=sorted(inputs), - inputPath=args.inputpath, - crossReference=crossReference, - minScore=args.minscore, - idLength=args.idx, - interactions=interactions) - if duplicates != len(targets): - for inputName in inputs: - inputDirectory = args.inputpath.rstrip("/") - inputFile = "%s/%s" % (inputDirectory, inputName) - matchScores(targetFile=inputFile, - targetName=inputName, - inputs=targets, - inputPath=args.targetpath, - crossReference=crossReference, - minScore=args.minscore, - idLength=args.idx, - interactions=interactions) - interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) - with open(args.output, 'w') as output_file: - for entry in interactions: - output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) + logFile = open(args.log, 'a+') + targets = list() + targetPath = args.targetpath.rstrip("/") + with open(args.targetlist) as file: + for index, line in enumerate(file): + name = line.strip() + targets.append(name) + print ("Loaded %s target names from `%s`." % (len(targets), args.targetlist)) + if args.inputlist: + inputs = list() + inputPath = args.inputpath.rstrip("/") + with open(args.inputlist) as file: + for index, line in enumerate(file): + name = line.strip() + inputs.append(name) + print ("Loaded %s input names from `%s`." % (len(inputs), args.inputlist)) + else: + inputs = targets + inputPath = targetPath + crossReference = dict() + with open(args.crossreference) as file: + for index, line in enumerate(file): + columns = line.split() + core = columns[0] + partner = columns[-1] + if core not in crossReference: + crossReference[core] = [] + crossReference[core].append(partner) + print ("Loaded cross reference from `%s`." % args.crossreference) + interactions = dict() + for targetName in targets: + targetFile = "%s/%s" % (targetPath, targetName) + matchScores(targetFile=targetFile, + targetName=targetName, + inputs=inputs, + inputPath=inputPath, + crossReference=crossReference, + minScore=args.minscore, + idLength=args.idx, + logFile=logFile, + interactions=interactions) + if args.inputlist: + for inputName in inputs: + inputDirectory = inputPath + inputFile = "%s/%s" % (inputDirectory, inputName) + matchScores(targetFile=inputFile, + targetName=inputName, + inputs=targets, + inputPath=targetPath, + crossReference=crossReference, + minScore=args.minscore, + idLength=args.idx, + logFile=logFile, + interactions=interactions) + interactions = sorted(interactions.values(), key=lambda item: item["minZ"], reverse=True) + with open(args.output, 'w') as output_file: + for entry in interactions: + output_file.write("%s\t%s\t%s\t%s\n" % (entry["targetName"], entry["inputName"], entry["minZ"], entry["minInfo"])) + logFile.close() -def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, interactions): - targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) - if not targetHits: - print("No targets found `%s`" % targetFile) - else: - print ("Loaded target scores from `%s`." % targetFile) - for inputName in inputs: - inputDirectory = inputPath.rstrip("/") - inputFile = "%s/%s" % (inputDirectory, inputName) - inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength) - minZ = 0 - minInfo = "" - for t in targetHits: - if t in crossReference: - partners = crossReference[t] - for p in partners: - if p in inputHits: - score = min(targetHits[t], inputHits[p]) - if score > minZ: - minZ = score - minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) - if minZ > minScore: - if targetName > inputName: - interactionKey = "%s_%s" % (targetName, inputName) - else: - interactionKey = "%s_%s" % (inputName, targetName) - if interactionKey in interactions: - if interactions[interactionKey]["minZ"] >= minZ: - continue - interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) - print("Predicting: %s, min-Z: %s, templates: %s" % (inputName, minZ, minInfo)) - return interactions +def matchScores(targetFile, targetName, inputs, inputPath, crossReference, minScore, idLength, logFile, interactions): + targetTop, targetHits = getTemplateScores(targetFile, minScore, idLength) + if not targetHits: + print("No targets found `%s`" % targetFile) + else: + print ("Loaded target scores from `%s`." % targetFile) + for inputName in inputs: + inputFile = "%s/%s" % (inputPath, inputName) + inputTop, inputHits = getTemplateScores(inputFile, minScore, idLength) + minZ = 0 + minInfo = "" + for t in targetHits: + if t in crossReference: + partners = crossReference[t] + for p in partners: + if p in inputHits: + score = min(targetHits[t], inputHits[p]) + if score > minZ: + minZ = score + minInfo = "%s\t%s\t%s\t%s" % (targetTop, inputTop, t, p) + if minZ > minScore: + if targetName > inputName: + interactionKey = "%s_%s" % (targetName, inputName) + else: + interactionKey = "%s_%s" % (inputName, targetName) + if interactionKey in interactions: + if interactions[interactionKey]["minZ"] >= minZ: + continue + interactions[interactionKey] = dict(targetName=targetName, inputName=inputName, minZ=minZ, minInfo=minInfo) + logFile.write("Interaction between %s and %s [min-Z: %s].\n" % (targetName, inputName, minZ)) def getTemplateScores(hhrFile, minScore, identifierLength): - result = dict() - topTemplate = None - identifierLength = identifierLength + 4 - if os.path.isfile(hhrFile): - with open(hhrFile) as file: - for index, line in enumerate(file): - if index > 8: - if not line.strip(): - break - templateId = line[4:identifierLength] - templateScore = float(line[57:63]) - if templateScore > minScore: - if topTemplate is None: - topTemplate = templateId - result[templateId] = templateScore - return topTemplate, result + result = dict() + topTemplate = None + identifierLength = identifierLength + 4 + if os.path.isfile(hhrFile): + with open(hhrFile) as file: + for index, line in enumerate(file): + if index > 8: + if not line.strip(): + break + templateId = line[4:identifierLength] + templateScore = float(line[57:63]) + if templateScore > minScore: + if topTemplate is None: + topTemplate = templateId + result[templateId] = templateScore + return topTemplate, result if __name__ == "__main__": - parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') - parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=True) - parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=True) - parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) - parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) - parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) - parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) - parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) - parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) - args = parser.parse_args() - main(args) \ No newline at end of file + parser = argparse.ArgumentParser(description='This script identifies interactions by detecting matching HH-search results.') + parser.add_argument('-tl', '--targetlist', help='Text file containing identifiers.', required=True) + parser.add_argument('-tp', '--targetpath', help='Directory containing `hhr` files', required=True) + parser.add_argument('-il', '--inputlist', help='Text file containing identifiers.', required=False) + parser.add_argument('-ip', '--inputpath', help='Directory containing `hhr` files', required=False) + parser.add_argument('-c', '--crossreference', help='Cross Reference index file', required=True) + parser.add_argument('-x', '--idx', help='Length of identifier', type=int, default=6) + parser.add_argument('-o', '--output', help='Output file containing min-Z scores', required=True) + parser.add_argument('-l', '--log', help='Log file', required=True) + parser.add_argument('-m', '--minscore', help='min-Z score threshold', type=int, default=10) + args = parser.parse_args() + main(args) \ No newline at end of file
--- a/spring_minz.xml Wed Oct 28 06:49:58 2020 +0000 +++ b/spring_minz.xml Thu Oct 29 13:04:47 2020 +0000 @@ -2,59 +2,60 @@ <description>filter operation</description> <code file="spring_minz.py"/> <command detect_errors="exit_code"><![CDATA[ - mkdir -p inputs && - #for input in $inputs - link '${str(input)}' 'inputs/${input.element_identifier}' && - echo '${input.element_identifier}' >> 'input_list' && - #end for - mkdir -p targets && + mkdir -p targets && #for target in $targets link '${str(target)}' 'targets/${target.element_identifier}' && echo '${target.element_identifier}' >> 'target_list' && #end for - python3 '$__tool_directory__/spring_minz.py' -il input_list -ip inputs -tl target_list -tp targets -m '$minscore' -c '$crossreference' -x '$idx' -o '$output' + #if str($input_type.input_type_selector) == "true": + mkdir -p inputs && + #for input in $input_type.inputs + link '${str(input)}' 'inputs/${input.element_identifier}' && + echo '${input.element_identifier}' >> 'input_list' && + #end for + python3 '$__tool_directory__/spring_minz.py' -tl target_list -tp targets -il input_list -ip inputs -m '$minscore' -c '$crossreference' -x '$idx' -o '$output' -l '$log' + #else + python3 '$__tool_directory__/spring_minz.py' -tl target_list -tp targets -m '$minscore' -c '$crossreference' -x '$idx' -o '$output' -l '$log' + #end if ]]></command> <inputs> - <param format="txt" name="targets" type="data_collection" label="Target Profiles" help="Homology search result of target/query profiles `hhr`."/> - <param format="txt" name="inputs" type="data_collection" collection_type="list" label="Input Profiles" help="Homology search results of input profiles `hhr`."/> + <param format="txt" name="targets" type="data_collection" collection_type="list" label="Target Profiles" help="Homology search result of target/query profiles `hhr`."/> + <conditional name="input_type"> + <param name="input_type_selector" type="boolean" label="Identify interactions across sets?" checked="true"/> + <when value="true"> + <param format="txt" name="inputs" type="data_collection" collection_type="list" label="Input Profiles" help="Homology search results of input profiles `hhr`."/> + </when> + </conditional> <param format="txt" name="crossreference" type="data" label="Cross Reference" help="Cross reference of interacting proteins `first_id metadata_id second_id`."/> <param name="minscore" type="integer" label="Score threshold" value="10" min="1" max="200" help="Matching interaction pairs with a score lower than this threshold will be excluded."/> <param name="idx" type="integer" label="Identifier length" value="6" min="1" max="20" help="Specify the length of the identifier e.g. `1ACB_A` has length 6."/> </inputs> <outputs> - <data format="tabular" name="output" /> + <data format="tabular" name="output" label="SPRING min-Z Table"/> + <data format="txt" name="log" label="SPRING min-Z Log" /> </outputs> <tests> <test> <param name="targets"> <collection type="list"> - <element name="6WEY_A.hhr" value="6WEY_A.hhr" /> + <element name="7BQY_A.hhr" value="7BQY_A.hhr" /> + <element name="6WLC_A.hhr" value="6WLC_A.hhr" /> <element name="6WJI_A.hhr" value="6WJI_A.hhr" /> - <element name="6WLC_A.hhr" value="6WLC_A.hhr" /> - <element name="7BQY_A.hhr" value="7BQY_A.hhr" /> + <element name="6WEY_A.hhr" value="6WEY_A.hhr" /> <element name="6W37_A.hhr" value="6W37_A.hhr" /> <element name="6W9C_A.hhr" value="6W9C_A.hhr" /> + <element name="6W4H_B.hhr" value="6W4H_B.hhr" /> <element name="6W4H_A.hhr" value="6W4H_A.hhr" /> - <element name="6W4H_B.hhr" value="6W4H_B.hhr" /> <element name="6W9Q_A.hhr" value="6W9Q_A.hhr" /> </collection> </param> + <conditional name="input_type"> + <param name="input_type_selector" type="boolean" checked="false"/> + </conditional> <param name="idx" value="10"/> - <param name="inputs"> - <collection type="list"> - <element name="6WEY_A.hhr" value="6WEY_A.hhr" /> - <element name="6WJI_A.hhr" value="6WJI_A.hhr" /> - <element name="6WLC_A.hhr" value="6WLC_A.hhr" /> - <element name="7BQY_A.hhr" value="7BQY_A.hhr" /> - <element name="6W37_A.hhr" value="6W37_A.hhr" /> - <element name="6W9C_A.hhr" value="6W9C_A.hhr" /> - <element name="6W4H_A.hhr" value="6W4H_A.hhr" /> - <element name="6W4H_B.hhr" value="6W4H_B.hhr" /> - <element name="6W9Q_A.hhr" value="6W9Q_A.hhr" /> - </collection> - </param> <param name="crossreference" value="dbCAN_random.txt"/> <output name="output" file="dbCAN_result.txt"/> + <output name="log" file="dbCAN_log.txt"/> </test> <test> <param name="targets"> @@ -62,16 +63,20 @@ <element name="NP_000282.1.hhr" value="NP_000282.1.hhr" /> </collection> </param> - <param name="inputs"> - <collection type="list"> - <element name="NP_000282.1.hhr" value="NP_000282.1.hhr" /> - <element name="NP_000290.2.hhr" value="NP_000290.2.hhr" /> - <element name="NP_000548.2.hhr" value="NP_000548.2.hhr" /> - <element name="NP_000836.2.hhr" value="NP_000836.2.hhr" /> - </collection> - </param> + <conditional name="input_type"> + <param name="input_type_selector" type="boolean" checked="true"/> + <param name="inputs"> + <collection type="list"> + <element name="NP_000282.1.hhr" value="NP_000282.1.hhr" /> + <element name="NP_000290.2.hhr" value="NP_000290.2.hhr" /> + <element name="NP_000548.2.hhr" value="NP_000548.2.hhr" /> + <element name="NP_000836.2.hhr" value="NP_000836.2.hhr" /> + </collection> + </param> + </conditional> <param name="crossreference" value="pdb70_random.txt"/> <output name="output" file="pdb70_result.txt"/> + <output name="log" file="pdb70_log.txt"/> </test> </tests> <help><![CDATA[
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dbCAN_log.txt Thu Oct 29 13:04:47 2020 +0000 @@ -0,0 +1,9 @@ +Interaction between 7BQY_A.hhr and 6W37_A.hhr [min-Z: 14.0]. +Interaction between 7BQY_A.hhr and 6W9C_A.hhr [min-Z: 24.2]. +Interaction between 7BQY_A.hhr and 6W4H_B.hhr [min-Z: 24.0]. +Interaction between 6W37_A.hhr and 6W37_A.hhr [min-Z: 14.0]. +Interaction between 6W37_A.hhr and 6W9C_A.hhr [min-Z: 21.3]. +Interaction between 6W37_A.hhr and 6W4H_B.hhr [min-Z: 16.0]. +Interaction between 6W4H_A.hhr and 6W4H_A.hhr [min-Z: 24.7]. +Interaction between 6W9Q_A.hhr and 6W9C_A.hhr [min-Z: 21.0]. +Interaction between 6W9Q_A.hhr and 6W4H_B.hhr [min-Z: 22.3].