Mercurial > repos > petr-novak > dante_ltr
changeset 6:b91ca438a1cb draft
"planemo upload commit 9633fb98932151f059ce02a0ce202a4374ef8d68"
author | petr-novak |
---|---|
date | Thu, 19 May 2022 08:21:55 +0000 |
parents | 0c3111ab729b |
children | c33d6583e548 |
files | clean_dante_ltr.xml clean_ltr.R dante_ltr_search.xml |
diffstat | 3 files changed, 25 insertions(+), 24 deletions(-) [+] |
line wrap: on
line diff
--- a/clean_dante_ltr.xml Mon May 16 07:50:41 2022 +0000 +++ b/clean_dante_ltr.xml Thu May 19 08:21:55 2022 +0000 @@ -1,4 +1,4 @@ -<tool id="clean_dante_ltr" name="DANTE_LTR transposamble elements filtering" version="0.1.5" python_template_version="3.5"> +<tool id="clean_dante_ltr" name="DANTE_LTR retrotransposons filtering" version="0.1.5" python_template_version="3.5"> <requirements> <requirement type="package">r-optparse</requirement> @@ -24,33 +24,30 @@ ]]></command> <inputs> - <param type="data" name="dante_ltr" format="gff3" /> - <param type="data" name="reference" format="fasta" /> + <param type="data" name="dante_ltr" format="gff3" + label="GFF3 output from DANTE_LTR retrotransposon identification pipeline"/> + <param type="data" name="reference" format="fasta" label="Reference sequence matching input GFF3" /> </inputs> <outputs> - <data name="dante_ltr_clean" format="gff3" label="Annotation of validated LTR transposable - elements based on annotation $dante_ltr.hid and reference $reference.hid"/> - <data name="rm_lib" format="fasta" label="Non-redundant library of LTR transposable - elements based on annotation $dante_ltr.hid and reference $reference.hid"/> - - <data name="te_full" format="fasta" label="Full length LTR transposable - elements based on annotation $dante_ltr.hid and reference $reference.hid"/> + <data name="dante_ltr_clean" format="gff3" + label="Validated LTR retrotransposons annotation (GFF3) based on annotation + $dante_ltr.hid and reference $reference.hid"/> + <data name="rm_lib" format="fasta" label="Non-redundant library of LTR retrotransposons (FASTA) based on annotation $dante_ltr.hid and reference $reference.hid"/> - <data name="ltr5" format="fasta" label="5'LTR of transposable - elements based on annotation $dante_ltr.hid and reference $reference.hid"/> + <data name="te_full" format="fasta" label="Library of full length LTR retrotransposons (FASTA) based on annotation $dante_ltr.hid and reference $reference.hid"/> + + <data name="ltr5" format="fasta" label="Library of 5'LTR of retrotransposons (FASTA) based on annotation $dante_ltr.hid and reference $reference.hid"/> - <data name="ltr3" format="fasta" label="3'LTR of transposable - elements based on annotation $dante_ltr.hid and reference $reference.hid"/> + <data name="ltr3" format="fasta" label="Library of 3'LTR of retrotransposons (FASTA) based on annotation $dante_ltr.hid and reference $reference.hid"/> - <data name="summary" format="pdf" label="Summary of TE and LTR lenghts based on - $dante_ltr.hid and reference $reference.hid"/> + <data name="summary" format="pdf" label="LTR retrotransposons lengths summary based on $dante_ltr.hid and reference $reference.hid"/> </outputs> <help><![CDATA[ - This tool takes output from DANTE_LTR search identifies good quality transposable elements. - Good quality TE are considered those which does not have any cross-similarity between distinct lineages. + This tool takes output from DANTE_LTR search identifies good quality retrotransposons. + Good quality retrotransposons are considered those which does not have any cross-similarity between distinct lineages. Output from this tool is a annotation in GFF3 format and - non-redundant library of elements for custom RepeatMasker search. + libraries of elements for custom RepeatMasker search. ]]></help> </tool> \ No newline at end of file
--- a/clean_ltr.R Mon May 16 07:50:41 2022 +0000 +++ b/clean_ltr.R Thu May 19 08:21:55 2022 +0000 @@ -177,10 +177,14 @@ gff_te <- gff_out[gff_out$type %in% "transposable_element"] gff_5ltr <- gff_out[gff_out$LTR %in% "5LTR"] gff_3ltr <- gff_out[gff_out$LTR %in% "3LTR"] + full_te <- getSeqNamed(s, gff_te) +names(full_te) <- paste0(gff_te$ID,":",names(full_te)) ltr5 <- getSeqNamed(s, gff_5ltr) +names(ltr5) <- paste0(gff_5ltr$Parent,":",names(ltr5)) ltr3 <- getSeqNamed(s, gff_3ltr) -inc <- gff_te$Rank != "DL" +names(ltr3) <- paste0(gff_3ltr$Parent,":",names(ltr3)) +inc <- gff_te$Rank != "DL" writeXStringSet(seq_representative, paste0(opt$output, "_RM_lib_non_redundant.fasta")) writeXStringSet(full_te, paste0(opt$output, "_RM_lib_full_TE.fasta"))
--- a/dante_ltr_search.xml Mon May 16 07:50:41 2022 +0000 +++ b/dante_ltr_search.xml Thu May 19 08:21:55 2022 +0000 @@ -1,4 +1,4 @@ -<tool id="dante_ltr_search" name="DANTE_LTR transposable element identification" version="0.1.5" python_template_version="3.5"> +<tool id="dante_ltr_search" name="DANTE_LTR retrotransposon identification" version="0.1.5" python_template_version="3.5"> <requirements> <requirement type="package">blast</requirement> <requirement type="package">r-optparse</requirement> @@ -13,12 +13,12 @@ mv output.gff3 $te_ltr_gff ]]></command> <inputs> - <param type="data" name="dante" format="gff3" label="Filtered gff3 output from DANTE pipeline"/> + <param type="data" name="dante" format="gff3" label="Filtered GFF3 output from DANTE pipeline"/> <param type="data" name="reference" format="fasta" label="Reference sequence matching DANTE output" /> </inputs> <outputs> - <data name="te_ltr_gff" format="gff3" label="Annotation of detected LTR transposable elements - based on the annotation $dante.hid and reference $reference.hid" /> + <data name="te_ltr_gff" format="gff3" label="LTR retrotransposons annotation (GFF3) + based on DANTE annotation $dante.hid and reference $reference.hid" /> </outputs> <help><![CDATA[ This tool uses output from DANTE annotation pipeline to identify full length LTR