Mercurial > repos > portiahollyoak > temp
comparison temp_insertions.xml @ 21:9672fe07a232 draft default tip
planemo upload for repository https://github.com/portiahollyoak/Tools commit 0fea84d05f8976b8360a8b4943ecb01b87e3ade0-dirty
| author | mvdbeek |
|---|---|
| date | Mon, 05 Dec 2016 09:58:47 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 20:6e02b9179a24 | 21:9672fe07a232 |
|---|---|
| 1 <tool id ="TEMP_insertions" name="TEMP Insertion" version="0.3.0"> | |
| 2 <description>finds TE insertions relative to reference</description> | |
| 3 <requirements> | |
| 4 <requirement type="package" version="1.6.924=pl5.22.0_0">perl-bioperl</requirement> | |
| 5 <requirement type="package" version="0.7.13">bwa</requirement> | |
| 6 <requirement type="package" version="2.25.0">bedtools</requirement> | |
| 7 <requirement type="package" version="324">ucsc-twobittofa</requirement> | |
| 8 <requirement type="package" version="1.3.1">samtools</requirement> | |
| 9 </requirements> | |
| 10 <stdio> | |
| 11 <exit_code range="1:" /> | |
| 12 </stdio> | |
| 13 <command><![CDATA[ | |
| 14 ln -f -s "$alignment.metadata.bam_index" alignment.sorted.bam.bai && | |
| 15 ln -f -s "$alignment" alignment.sorted.bam && | |
| 16 bash $__tool_directory__/scripts/TEMP_Insertion.sh | |
| 17 -x "$minimum_score_difference" | |
| 18 -i alignment.sorted.bam | |
| 19 -s $__tool_directory__/scripts | |
| 20 -r "$consensus_te_seqs" | |
| 21 -t "$te_locations" | |
| 22 #if $te_families: | |
| 23 -u "$te_families" | |
| 24 #end if | |
| 25 -m "$mismatches" | |
| 26 -f "$median_insertsize" | |
| 27 -c \${GALAXY_SLOTS:-2} && | |
| 28 mv alignment.insertion.refined.bp.summary $insertion_summary | |
| 29 ]]></command> | |
| 30 <inputs> | |
| 31 <param format="bam" name="alignment" type="data" label="Alignment bam file"/> | |
| 32 <param format="fasta" name="consensus_te_seqs" type="data" label="Consensus TE Seqs fasta file"/> | |
| 33 <param format="bed" name="te_locations" type="data" label="TE Annotations bed file"/> | |
| 34 <param format="tabular" name="te_families" type="data" optional="True" label="TE Identifiers and Families"/> | |
| 35 <param name="median_insertsize" value="" type="integer" label="Median Insert Length"/> | |
| 36 <param name="mismatches" min="0" max="5" type="integer" value="3" label="Allow this many mismatches when aligning to TEs"/> | |
| 37 <param name="minimum_score_difference" type="integer" min="0" max="37" value="30" label="Minimum difference between mapping scores"/> | |
| 38 </inputs> | |
| 39 <outputs> | |
| 40 <data format="bed" name="insertion_summary" label="${alignment.element_identifier} Insertions" /> | |
| 41 </outputs> | |
| 42 <tests> | |
| 43 <test> | |
| 44 <param name="alignment" value="test_chromosome.sorted.bam" ftype="bam"/> | |
| 45 <param name="consensus_te_seqs" value="test_concensus.fa" ftype="fasta"/> | |
| 46 <param name="te_locations" value="test_TE_annotation.bed" ftype="bed"/> | |
| 47 <param name="median_insertsize" value="500" ftype="integer"/> | |
| 48 <param name="minimum_score_difference" value="0" ftype="integer"/> | |
| 49 <output name="insertion_summary" file="test_insertions_out.bed" ftype="bed" compare="sim_size"/> | |
| 50 </test> | |
| 51 </tests> | |
| 52 <help> <![CDATA[ | |
| 53 | |
| 54 | |
| 55 TEMP | |
| 56 ------------- | |
| 57 TEMP is a software package for detecting transposable elements (TEs) insertions and absences from pooled high-throughput sequencing data | |
| 58 | |
| 59 Current version v1.04 | |
| 60 | |
| 61 Author: Jiali Zhuang (jiali.zhuang@umassmed.edu) and Jie Wang (jie.wangj@umassmed.edu) Weng Lab, University of Massachusetts Medical School, Worcester, MA, USA | |
| 62 | |
| 63 *Input files/variables* | |
| 64 ------------------------- | |
| 65 * Alignment file in BAM format | |
| 66 * Reference genome used in aligning, in fasta or twobit format. | |
| 67 * Transposable Elements' Consensus Sequences in fasta format. | |
| 68 * Annotations of TEs in reference genome in bed format. | |
| 69 * TE Identifiers and Families (optional) - A file containing in the first column the TE names/identifiers from the consensus sequences file, and in the second column, their respective TE family names as in the TE annotations file. When supplied, if a detected insertion overlaps with an annotated TE of the same family, the detected insertion will be excluded from the results. | |
| 70 * Median Insert Length | |
| 71 * Number of Mismatches allowed (default 3) | |
| 72 * Minimum difference between mapping scores. The minimum difference in scores between the optimal and suboptimal alignments to consider a read uniquely mapped. | |
| 73 | |
| 74 *Output files* | |
| 75 ----------------- | |
| 76 * **In the Insertions output file there are 14 columns:** | |
| 77 * Column 1: The chromosome where the detected insertion happens. | |
| 78 * Column 2: The coordinate of the start position of the detected insertion. | |
| 79 * Column 3: The coordinate of the end position of the detected insertion. | |
| 80 * Column 4: The TE family that the detected insertion belongs to. | |
| 81 * Column 5: The direction of the insertion. “Plus” means that the TE is integrated with the plus strand of the genome while “minus” means the TE is integrated with the minus strand. | |
| 82 * Column 6: The class of the insertion. “1p1” means that the detected insertion is supported by reads at both sides. “2p” means the detected insertion is supported by more than 1 read at only 1 side. “Singleton” means the detected insertion is supported by only 1 read at 1 side. | |
| 83 * Column 7: The total number of read pairs that support the detected insertion. | |
| 84 * Column 8: The estimated population frequency of the detected insertion. | |
| 85 * Columns 9 & 10: The coordinate of a junction and the number of the reads supporting it. If the junction is not found column 9 will be the arithmetic mean of the start and end coordinates and column 10 will have the value 0. | |
| 86 * Columns 11 & 12: Same as Columns 9 & 10 except for the junction on the other strand. | |
| 87 * Column 13: The number of reads supporting the detected insertion at the 5’ end of the TE (not including junction spanning reads). | |
| 88 * Column 13: The number of reads supporting the detected insertion at the 3’ end of the TE (not including junction spanning reads). | |
| 89 | |
| 90 | |
| 91 ----- | |
| 92 | |
| 93 | |
| 94 * **In the Absences output file there are 14 columns:** | |
| 95 * Column 1: The chromosome where the detected absence happens. | |
| 96 * Column 2: The coordinate of the start position of the detected absence. | |
| 97 * Column 3: The coordinate of the end position of the detected absence. | |
| 98 * Column 4: The TE family that the detected insertion belongs to. | |
| 99 * Column 5: Junctions at 5’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands. | |
| 100 * Column 6: Junctions at 3’ of the excised TE. The two numbers are the coordinates of the junctions on the two strands. | |
| 101 * Column 7: The number of reads supporting the absence. | |
| 102 * Column 8: The number of reads supporting the reference (no absence). | |
| 103 * Column 9: Estimated population frequency of the detected absence event. | |
| 104 | |
| 105 | |
| 106 ]]> </help> | |
| 107 <citations> | |
| 108 <citation type="doi">10.1093/nar/gku323</citation> | |
| 109 </citations> | |
| 110 </tool> |
