# HG changeset patch
# User earlhaminst
# Date 1572522539 14400
# Node ID 16e925bf567e78dd9d7bddffd255af68063f2d5f
# Parent 6a5282f71f82e56603dc91ef79802b0da1fe5222
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit ed32f2e6d8174873cefcbe141084f857f84b0586"
diff -r 6a5282f71f82 -r 16e925bf567e ete_gene_cnv.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ete_gene_cnv.py Thu Oct 31 07:48:59 2019 -0400
@@ -0,0 +1,75 @@
+from __future__ import print_function
+
+import argparse
+import collections
+
+from ete3 import PhyloTree
+
+
+def printTSV(myDict, colList=None):
+ """ Pretty print a list of dictionaries (myDict) as a dynamically sized table.
+ If column names (colList) aren't specified, they will show in random order.
+ Author: Thierry Husson - Use it as you want but don't blame me.
+ """
+ if not colList:
+ colList = list(myDict[0].keys() if myDict else [])
+
+ myList = [colList]
+
+ for item in myDict:
+ myList.append([str(item[col] if item[col] is not None else '') for col in colList])
+
+ for item in myList:
+ print(*item, sep="\t")
+
+
+def main():
+ parser = argparse.ArgumentParser(description='Gene Copy Number Finder')
+ parser.add_argument('--genetree', required=True, help='GeneTree in nhx format')
+ parser.add_argument('--speciesorder', required=True, help='Comma-separated species list')
+ args = parser.parse_args()
+
+ species_list = args.speciesorder.split(",")
+ species_list = [_.strip() for _ in species_list]
+ table = []
+
+ with open(args.genetree, "r") as f:
+ # reads multiple gene tree line by line gene tree
+ for line in f:
+ # Remove empty NHX features that can be produced by TreeBest but break ete3
+ line = line.replace('[&&NHX]', '')
+
+ # reads single gene tree
+ genetree = PhyloTree(line)
+ leaves = genetree.get_leaf_names()
+
+ leaves_parts = [_.split("_") for _ in leaves]
+ for i, leaf_parts in enumerate(leaves_parts):
+ if len(leaf_parts) != 2:
+ raise Exception("Leaf node '%s' is not in gene_species format" % leaves[i])
+
+ leaves_species = [_[1] for _ in leaves_parts]
+ species_counter = collections.Counter(leaves_species)
+
+ # Assign to ref_species the first element of species_list which
+ # appears in a leaf node
+ for ref_species in species_list:
+ if ref_species in species_counter:
+ break
+ else:
+ raise Exception("None of the specified species was found in the GeneTree '%s'" % line)
+
+ # Find the gene of the (first) leaf node for the ref_species
+ for leaf_parts in leaves_parts:
+ if leaf_parts[1] == ref_species:
+ species_counter['gene'] = leaf_parts[0]
+ break
+
+ table.append(species_counter)
+
+ colList = ["gene"] + species_list
+ printTSV(table, colList)
+
+
+if __name__ == "__main__":
+ main()
diff -r 6a5282f71f82 -r 16e925bf567e ete_gene_cnv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ete_gene_cnv.xml Thu Oct 31 07:48:59 2019 -0400
@@ -0,0 +1,33 @@
+
+ from a genetree using the ETE Toolkit
+
+ ete_macros.xml
+
+
+ '$genes'
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 6a5282f71f82 -r 16e925bf567e ete_homology_classifier.xml
--- a/ete_homology_classifier.xml Thu Oct 11 11:52:28 2018 -0400
+++ b/ete_homology_classifier.xml Thu Oct 31 07:48:59 2019 -0400
@@ -1,5 +1,5 @@
- from a genetree utilising the ETE Toolkit
+ from a genetree using the ETE Toolkit
ete_macros.xml
diff -r 6a5282f71f82 -r 16e925bf567e test-data/test.nhx
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.nhx Thu Oct 31 07:48:59 2019 -0400
@@ -0,0 +1,3 @@
+(((a_w,a_x),(a_y,a_z)),a_zz);
+(((a_w,a_w),(a_y,a_z)),a_zz);
+(((a_w,a_x),(a_y,a_y)),a_zz);
\ No newline at end of file
diff -r 6a5282f71f82 -r 16e925bf567e test-data/test.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.tsv Thu Oct 31 07:48:59 2019 -0400
@@ -0,0 +1,4 @@
+gene w x y z zz
+a 1 1 1 1 1
+a 2 0 1 1 1
+a 1 1 2 0 1