Mercurial > repos > earlhaminst > ete
changeset 9:b29ee6a16524 draft
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ete commit 17c65045b726d0695814bfe761e534f6521786f1"
author | earlhaminst |
---|---|
date | Tue, 20 Oct 2020 15:10:40 +0000 |
parents | 16e925bf567e |
children | 541a2ffc01ff |
files | ete_genetree_splitter.py ete_genetree_splitter.xml ete_lineage_generator.py test-data/31_genetree.nhx test-data/32_genetree.nhx test-data/33_genetree.nhx test-data/34_genetree.nhx |
diffstat | 7 files changed, 51 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/ete_genetree_splitter.py Thu Oct 31 07:48:59 2019 -0400 +++ b/ete_genetree_splitter.py Tue Oct 20 15:10:40 2020 +0000 @@ -13,6 +13,7 @@ parser.add_option('--species_format', type='int', default=8, help='Species Tree input format (0-9)') parser.add_option('--gene_node', type='int', default=0, help='Gene node format 0=gene_species, 1=species_gene') parser.add_option('--gainlose', action='store_true', default=False, help='Find out gene gain/lose') + parser.add_option('--split', type='choice', choices=['dups', 'treeko'], dest="split", default='dups', help='Choose GeneTree splitting algorithms') parser.add_option('--output_format', type='int', default=9, help='GeneTree output format (0-9)') options, args = parser.parse_args() @@ -47,11 +48,22 @@ genetree, events = genetree.reconcile(speciestree) - # splits tree by duplication events which returns the list of all subtrees resulting from splitting current tree by its duplication nodes. - for cluster_id, node in enumerate(genetree.split_by_dups(), 1): - outfile = str(cluster_id) + '_genetree.nhx' - with open(outfile, 'w') as f: - f.write(node.write(format=options.output_format)) + if options.split == "dups": + # splits tree by duplication events which returns the list of all subtrees resulting from splitting current tree by its duplication nodes. + for cluster_id, node in enumerate(genetree.split_by_dups(), 1): + outfile = str(cluster_id) + '_genetree.nhx' + with open(outfile, 'w') as f: + f.write(node.write(format=options.output_format)) + elif options.split == "treeko": + # splits tree using the TreeKO algorithm. + ntrees, ndups, sptrees = genetree.get_speciation_trees() + + cluster_id = 0 + for spt in sptrees: + cluster_id = cluster_id + 1 + outfile = str(cluster_id) + '_genetree.nhx' + with open(outfile, 'w') as f: + f.write(spt.write(format=options.output_format)) def parse_sp_name(node_name):
--- a/ete_genetree_splitter.xml Thu Oct 31 07:48:59 2019 -0400 +++ b/ete_genetree_splitter.xml Tue Oct 20 15:10:40 2020 +0000 @@ -1,4 +1,4 @@ -<tool id="ete_genetree_splitter" name="ETE GeneTree splitter" version="@VERSION@"> +<tool id="ete_genetree_splitter" name="ETE GeneTree splitter" version="@VERSION@+galaxy1"> <description>from a genetree using the ETE Toolkit</description> <macros> <import>ete_macros.xml</import> @@ -12,6 +12,7 @@ python '$__tool_directory__/ete_genetree_splitter.py' --genetree '$genetreeFile' --gene_node $gene_node +--split $splitter #if $gainlose_conditional.gainlose == "True" --speciestree '$gainlose_conditional.speciesFile' --species_format $gainlose_conditional.species_format @@ -25,6 +26,10 @@ <option value="0" selected="true">gene_species</option> <option value="1">species_gene</option> </param> + <param name="splitter" type="select" label="GeneTree splitting algorithm"> + <option value="dups">Split by Duplication</option> + <option value="treeko">Split using TreeKO algorithm</option> + </param> <conditional name="gainlose_conditional"> <param name="gainlose" type="select" label="Find out gene gain/lose"> <option value="True">Yes</option> @@ -72,6 +77,7 @@ <test> <param name="genetreeFile" ftype="nhx" value="genetree.nhx" /> <param name="gene_node" value="0"/> + <param name="splitter" value="dups"/> <param name="output_format" value="9" /> <output_collection name="genetrees_lists" type="list" count="4"> <element name="1" file="11_genetree.nhx" ftype="nhx" /> @@ -83,6 +89,7 @@ <test> <param name="genetreeFile" ftype="nhx" value="genetree.nhx" /> <param name="gene_node" value="0"/> + <param name="splitter" value="dups"/> <param name="gainlose" value="True" /> <param name="speciesFile" ftype="nhx" value="speciestree.nhx" /> <param name="species_format" value="8" /> @@ -95,6 +102,18 @@ <element name="5" file="25_genetree.nhx" ftype="nhx" /> </output_collection> </test> + <test> + <param name="genetreeFile" ftype="nhx" value="genetree.nhx" /> + <param name="gene_node" value="0"/> + <param name="splitter" value="treeko"/> + <param name="output_format" value="9" /> + <output_collection name="genetrees_lists" type="list" count="5"> + <element name="1" file="31_genetree.nhx" ftype="nhx" /> + <element name="2" file="32_genetree.nhx" ftype="nhx" /> + <element name="3" file="33_genetree.nhx" ftype="nhx" /> + <element name="4" file="34_genetree.nhx" ftype="nhx" /> + </output_collection> + </test> </tests> <help><![CDATA[ Split GeneTrees from single GeneTree by duplication event using the `ETE Toolkit`_.
--- a/ete_lineage_generator.py Thu Oct 31 07:48:59 2019 -0400 +++ b/ete_lineage_generator.py Tue Oct 20 15:10:40 2020 +0000 @@ -41,17 +41,19 @@ RANK_IDX: mapping from rank names to indices (distance to root/leaf?) lower: use lower taxa for filling "NA"s """ - lineage = ncbi.get_lineage(taxid) - lineage_ranks = ncbi.get_rank(lineage) - lineage_names = ncbi.get_taxid_translator(lineage, try_synonyms=True) + lineage_taxids = ncbi.get_lineage(taxid) + lineage_ranks = ncbi.get_rank(lineage_taxids) + lineage_names = ncbi.get_taxid_translator(lineage_taxids, try_synonyms=True) if lower: - lineage.reverse() - for l in lineage: - if not lineage_ranks[l] in RANK_IDX: + lineage_taxids.reverse() + for parent_taxid in lineage_taxids: + parent_rank = lineage_ranks[parent_taxid] + if parent_rank not in RANK_IDX: continue - if ranks[RANK_IDX[lineage_ranks[l]]] != "NA": + parent_rank_index = RANK_IDX[parent_rank] + if ranks[parent_rank_index] != "NA": continue - ranks[RANK_IDX[lineage_ranks[l]]] = lineage_names[l] + ranks[parent_rank_index] = lineage_names[parent_taxid] # get command line options
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/31_genetree.nhx Tue Oct 20 15:10:40 2020 +0000 @@ -0,0 +1,1 @@ +(((insr_rattusnorvegicus,insr_musmusculus),(insr_homosapiens,insr_pantroglodytes)),insr_susscrofa); \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/32_genetree.nhx Tue Oct 20 15:10:40 2020 +0000 @@ -0,0 +1,1 @@ +((maob_rattusnorvegicus,maob_musmusculus),((maob_homosapiens,maob_pantroglodytes),(maob_susscrofa,maob_canisfamiliaris))); \ No newline at end of file