Mercurial > repos > jjjjia > cpo_prediction
changeset 13:a14b12a71a53 draft
planemo upload
author | jjjjia |
---|---|
date | Sat, 25 Aug 2018 20:56:37 -0400 |
parents | 4b2738bc81ed |
children | 15809340a106 |
files | cpo_combine.sh cpo_combiner.sh cpo_galaxy_prediction.py cpo_galaxy_predictions.xml cpo_galaxy_tree.py cpo_prediction_combine.xml |
diffstat | 6 files changed, 85 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpo_combine.sh Sat Aug 25 20:56:37 2018 -0400 @@ -0,0 +1,9 @@ +head -1 ${1[1]} > combined.tsv + +IFS=',' read -ra ADDR <<< "$1" #hax to read in a csv + +head -1 ${ADDR[0]} > ./combined.tsv +for i in "${ADDR[@]}"; do + echo $i + tail -1 $i >> ./combined.tsv +done
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpo_combiner.sh Sat Aug 25 20:56:37 2018 -0400 @@ -0,0 +1,11 @@ +#!/bin/bash + +head -1 ${1[1]} > combined.tsv + +IFS=',' read -ra ADDR <<< "$1" #hax to read in a csv + +head -1 ${ADDR[0]} > ./combined.tsv +for i in "${ADDR[@]}"; do + echo $i + tail -1 $i >> ./combined.tsv +done \ No newline at end of file
--- a/cpo_galaxy_prediction.py Fri Aug 24 19:10:42 2018 -0400 +++ b/cpo_galaxy_prediction.py Sat Aug 25 20:56:37 2018 -0400 @@ -457,7 +457,10 @@ pf.start = int(plasmidFinder.iloc[i,2]) pf.end = int(plasmidFinder.iloc[i,3]) pf.gene = str(plasmidFinder.iloc[i,4]) - pf.shortGene = pf.gene[:pf.gene.index("_")] + if (pf.gene.find("_") > -1): + pf.shortGene = pf.gene[:pf.gene.index("_")] + else: + pf.shortGene = pf.gene pf.coverage = str(plasmidFinder.iloc[i,5]) pf.coverage_map = str(plasmidFinder.iloc[i,6]) pf.gaps = str(plasmidFinder.iloc[i,7]) @@ -528,12 +531,14 @@ rgiAMR = ParseRGIResult(rgi, plasmidContigs, likelyPlasmidContigs) # outputDir + "/predictions/" + ID + ".rgi.txt", plasmidContigs, likelyPlasmidContigs)#*********************** ToJson(rgiAMR, "rgi.json") #************* - carbapenamases = [] + carbapenamases = [] + resfinderCarbas = [] #list of rfinder objects for lindaout list amrGenes = [] for keys in rFinder: carbapenamases.append(rFinder[keys].shortGene + "(" + rFinder[keys].source + ")") + resfinderCarbas.append(rFinder[keys]) for keys in rgiAMR: - if (rgiAMR[keys].Drug_Class.find("carbapenem") > -1): + if (rgiAMR[keys].Drug_Class.find("carbapenem") > -1 and rgiAMR[keys].AMR_Gene_Family.find("beta-lactamase") > -1): if (rgiAMR[keys].Best_Hit_ARO not in carbapenamases): carbapenamases.append(rgiAMR[keys].Best_Hit_ARO+ "(" + rgiAMR[keys].source + ")") else: @@ -598,7 +603,28 @@ #TSV output lindaOut = [] tsvOut = [] - lindaOut.append("new\tID\tQUALITY\tExpected Species\tMLST Scheme\tSequence Type\tMLST_ALLELE_1\tMLST_ALLELE_2\tMLST_ALLELE_3\tMLST_ALLELE_4\tMLST_ALLELE_5\tMLST_ALLELE_6\tMLST_ALLELE_7\tSEROTYPE\tK_CAPSULE\tPLASMID_1_FAMILY\tPLASMID_1_BEST_MATCH\tPLASMID_1_COVERAGE\tPLASMID_1_SNVS_TO_BEST_MATCH\tPLASMID_1_CARBAPENEMASE\tPLASMID_1_INC_GROUP\tPLASMID_2_RFLP\tPLASMID_2_FAMILY\tPLASMID_2_BEST_MATCH\tPLASMID_2_COVERAGE\tPLASMID_2_SNVS_TO_BEST_MATCH\tPLASMID_2_CARBAPENEMASE\tPLASMID_2_INC_GROUP") + lindaOut.append("ID\tQUALITY\tExpected Species\tMLST Scheme\tSequence Type\tMLST_ALLELE_1\tMLST_ALLELE_2\tMLST_ALLELE_3\tMLST_ALLELE_4\tMLST_ALLELE_5\tMLST_ALLELE_6\tMLST_ALLELE_7\tSEROTYPE\tK_CAPSULE\tPLASMID_2_RFLP\tPLASMID_1_FAMILY\tPLASMID_1_BEST_MATCH\tPLASMID_1_COVERAGE\tPLASMID_1_SNVS_TO_BEST_MATCH\tPLASMID_1_CARBAPENEMASE\tPLASMID_1_INC_GROUP\tPLASMID_2_RFLP\tPLASMID_2_FAMILY\tPLASMID_2_BEST_MATCH\tPLASMID_2_COVERAGE\tPLASMID_2_SNVS_TO_BEST_MATCH\tPLASMID_2_CARBAPENEMASE\tPLASMID_2_INC_GROUP") + lindaTemp = ID + "\t" #id + lindaTemp += "\t" #quality + lindaTemp += expectedSpecies + "\t" #expected + lindaTemp += mlstHit.species + "\t" #mlstscheme + lindaTemp += str(mlstHit.seqType) + "\t" #seq type + lindaTemp += "\t".join(mlstHit.scheme.split(";")) + "\t"#mlst alleles x 7 + lindaTemp += "\t\t" #sero and kcap + + #resfinderCarbas + for carbs in resfinderCarbas: + if (carbs.source == "plasmid"): # + lindaTemp += "\t\t\t\t\t" #plasmid 1 rflp plasmid 1 family information. PLASMID_1_FAMILY\tPLASMID_1_BEST_MATCH\tPLASMID_1_COVERAGE\tPLASMID_1_SNVS_TO_BEST_MATCH + lindaTemp += carbs.shortGene + "\t" #found an carbapenase + contig = carbs.sequence[6:] #this is the contig number + for i in mSuite.keys(): + if (str(mSuite[i].contig_num) == str(contig)): #found the right plasmid + lindaTemp += mSuite[i].rep_type + lindaOut.append(lindaTemp) + out = open("summary.linda.tsv", 'w') + for item in lindaOut: + out.write("%s\n" % item) tsvOut.append("new\tID\tExpected Species\tMLST Species\tSequence Type\tMLST Scheme\tCarbapenem Resistance Genes\tOther AMR Genes\tTotal Plasmids\tPlasmids ID\tNum_Contigs\tPlasmid Length\tPlasmid RepType\tPlasmid Mobility\tNearest Reference\tDefinitely Plasmid Contigs\tLikely Plasmid Contigs") #start with ID
--- a/cpo_galaxy_predictions.xml Fri Aug 24 19:10:42 2018 -0400 +++ b/cpo_galaxy_predictions.xml Sat Aug 25 20:56:37 2018 -0400 @@ -26,10 +26,11 @@ <param type="data" name="abricate" format="tabular" /> <param type="data" name="rgi" format="tabular" /> <param type="data" name="plasmidfinder" format="tabular" /> - <param type="text" name="expected"/> + <param type="text" name="expected" optional ="false"/> </inputs> <outputs> <data name="tsvSummary" format="tabular" from_work_dir="summary.tsv"/> + <data name="tsvSummaryExistingFormat" format="tabular" from_work_dir="summary.linda.tsv"/> <data name="txtSummary" format="txt" from_work_dir="summary.txt"/> </outputs> <tests>
--- a/cpo_galaxy_tree.py Fri Aug 24 19:10:42 2018 -0400 +++ b/cpo_galaxy_tree.py Sat Aug 25 20:56:37 2018 -0400 @@ -263,7 +263,8 @@ n.add_face(addFace(mData.CarbapenemResistanceGenes), index, "aligned") index = index + 1 for i in range(len(distanceDict[list(distanceDict.keys())[0]])): #this loop adds distance matrix - n.add_face(addFace(list(distanceDict[n.name])[i]), index + i, "aligned") + if (n.name in distanceDict): #make sure the column is in the distance matrice + n.add_face(addFace(list(distanceDict[n.name])[i]), index + i, "aligned") t.render("./tree.pdf", w=5000,units="mm", tree_style=ts) #save it as a png. or an phyloxml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpo_prediction_combine.xml Sat Aug 25 20:56:37 2018 -0400 @@ -0,0 +1,31 @@ +<tool id="cpo_prediction_combiner" name="cpo_prediction_combiner" version="0.1.0"> + <description>This tool combines a collection result from cpo_prediction_parser into 1 file</description> + <requirements> + <requirement type="package" version="3.6">python</requirement> + </requirements> + <command detect_errors="exit_code"> + <![CDATA[ + bash '$__tool_directory__/cpo_combine.sh' '$indirs' + ]]> + </command> + <inputs> + <param name="indirs" type="data" multiple="true" format="tabular"/> + </inputs> + <outputs> + <data name="combinedSummary" format="tabular" from_work_dir="combined.tsv"/> + </outputs> + <help> + This tool combines multiple single prediction outputs together into one. + </help> + <citations> + <citation type="bibtex"> +@misc{cpo, + author = {j, j}, + year = {2018}, + title = {cpo_prediction}, + publisher = {j}, + journal = {j of j}, + url = {https://bfjia.net, +}</citation> + </citations> +</tool> \ No newline at end of file