# HG changeset patch # User mvdbeek # Date 1498052305 14400 # Node ID 301316cb596a6e5e1f40765d1ce969cbadabc1e3 # Parent db5c766503dd0ebbc9a6373e866bb6d458a42390 planemo upload for repository https://github.com/bardin-lab/readtagger/tree/master/galaxy commit 6e01a2e472ebbb07ce5181b836bae8bc5c7ecf36-dirty diff -r db5c766503dd -r 301316cb596a add_matesequence.xml --- a/add_matesequence.xml Thu May 11 05:33:27 2017 -0400 +++ b/add_matesequence.xml Wed Jun 21 09:38:25 2017 -0400 @@ -1,7 +1,7 @@ - + into tag field - readtagger + readtagger add_matesequence --version + modifies proper_pair flag in bam files - readtagger + readtagger + from multiple bam files macros.xml - readtagger + readtagger +# +#So, for example, if you had phiX indexed stored in +#/depot/data2/galaxy/phiX/base/, +#then the bwa_index.loc entry would look like this: +# +#phiX174 phiX phiX Pretty /depot/data2/galaxy/phiX/base/phiX.fa +# +#and your /depot/data2/galaxy/phiX/base/ directory +#would contain phiX.fa.* files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 phiX.fa.amb +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 phiX.fa.ann +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 phiX.fa.bwt +#...etc... +# +#Your bwa_index.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#phiX174 phiX phiX174 /depot/data2/galaxy/phiX/base/phiX.fa +#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/hg18/base/hg18canon.fa +#hg18full hg18 hg18 Full /depot/data2/galaxy/hg18/base/hg18full.fa +#/orig/path/hg19.fa hg19 hg19 /depot/data2/galaxy/hg19/base/hg19.fa +#...etc... +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. +# diff -r db5c766503dd -r 301316cb596a findcluster.xml --- a/findcluster.xml Thu May 11 05:33:27 2017 -0400 +++ b/findcluster.xml Wed Jun 21 09:38:25 2017 -0400 @@ -1,7 +1,10 @@ - + in bam files + + macros.xml + - readtagger + readtagger findcluster --version - + + + @@ -35,7 +51,8 @@ - + + @@ -51,24 +68,35 @@ Find clusters of reads that support a TE insertion. - Options: + Options: --input_path PATH Find cluster in this BAM file. + --region TEXT Find clusters in this Region (Format is + chrX:2000-1000). + --max_proper_pair_size INTEGER Maximum proper pairs size. If not given will + be inferred from the data. --output_bam PATH Write out BAM file with cluster information to this path. Reads will have an additional "CD" tag to indicate the cluster number --output_gff PATH Write out GFF file with cluster information to this path. + --output_fasta PATH Write out supporting evidence for clusters + to this path. --sample_name TEXT Sample name to use when writing out clusters in GFF file. Default is to infer the name from the input filename. --include_duplicates / --no-include_duplicates Include reads marked as duplicates when finding clusters. - --reference_fasta TEXT Blast cluster contigs against this fasta - file - --blastdb TEXT Blast cluster contigs against this blast - database + --transposon_reference_fasta TEXT + Transposon fasta to align clipped reads to. + Not necessary if BWA index is provided. + --transposon_bwa_index TEXT Transposon BWA index to align clipped reads + to + --genome_reference_fasta TEXT Genome fasta to align clipped reads to. Not + necessary if BWA index is provided. + --genome_bwa_index TEXT Genome BWA index to align clipped reads to --threads INTEGER RANGE Threads to use for cap3 assembly step + --shm_dir PATH Path to shared memory folder --version Show the version and exit. --help Show this message and exit. diff -r db5c766503dd -r 301316cb596a macros.xml --- a/macros.xml Thu May 11 05:33:27 2017 -0400 +++ b/macros.xml Wed Jun 21 09:38:25 2017 -0400 @@ -26,4 +26,24 @@ + + + + + + + + + + + + + + + + + + + + diff -r db5c766503dd -r 301316cb596a test-data/three_cluster_out.gff --- a/test-data/three_cluster_out.gff Thu May 11 05:33:27 2017 -0400 +++ b/test-data/three_cluster_out.gff Wed Jun 21 09:38:25 2017 -0400 @@ -1,4 +1,3 @@ ##gff-version 3 -3R findcluster TE 13373515 13373524 22 + . ID=extended_and_annotated_roi.bam_0;genotype=homozygous;genotype_likelihoods=1.8828617809e-31,9.53673406912e-07,0.999999046327;left_insert=0,CTCGGAATGTATCTAACTAACAAACTCATATCAAATATAAGCAAGTGCGCCAATTCGTATGCATATGGACATATGGACATATACATATAGTAACATAATATGCTTCTCATATTACGTTTACATACTTACACTAATTGTACATACAATCTTGCACATGCATAAACACATCAAACCAGCTTACATTTTTACTTACACTTAAGCGCATGATTTGTTGTGCATCCATACCGTTATTTTTCC;left_support=11;non_support=0;right_insert=0,GACAACGAAGAAATAAAGATCCAAACTAAAAAAATACCTCGTGTTGATTCTGAAACTTCTTTAAAGGCGTTGATCTTAGTCAAACGACGGATCATTTGTTCGACTCGAATAGTAAAATACGTAAGTATATAGATAGTCTATATTAATTTTAAAAGCTCAAAGGGGCGCAAGTCTCTCTCATCAGTTGTGTCTTTAGTTTTTTTGATTTGGTTTGGTG;right_support=11;valid_TSD=False -3R findcluster TE 13374595 13374595 2 + . ID=extended_and_annotated_roi.bam_1;genotype=homozygous;genotype_likelihoods=0.0106846062525,0.329771797916,0.659543595832;left_insert=0,GTTCACCCGCGTCCGAGTTCCTGCTCCACTACTCCCTGGCTGCTGACTCACTGTTGTTATAGGGGTGGCTTCCCCTCTGTTCTTCCTGGGGGAATGCTGCATCTTCCCCAGCTCCAAAATGGCGG;left_support=2;non_support=0;right_insert=;right_support=0;valid_TSD=False -3R findcluster TE 13374677 13374677 1 + . ID=extended_and_annotated_roi.bam_2;genotype=heterozygous;genotype_likelihoods=0.212598425197,0.393700787402;left_insert=;left_support=0;non_support=0;right_insert=;right_support=1;valid_TSD=False +3R findcluster TE 13373515 13373524 22 + . ID=extended_and_annotated_roi.bam_0;genotype=homozygous;genotype_likelihoods=1.8828617809e-31,9.53673406912e-07,0.999999046327;left_insert=0,CTCGGAATGTATCTAACTAACAAACTCATATCAAATATAAGCAAGTGCGCCAATTCGTATGCATATGGACATATGGACATATACATATAGTAACATAATATGCTTCTCATATTACGTTTACATACTTACACTAATTGTACATACAATCTTGCACATGCATAAACACATCAAACCAGCTTACATTTTTACTTACACTTAAGCGCATGATTTGTTGTGCATCCATACCGTTATTTTTCC;left_mate_support=8;left_support=11;non_support=0;right_insert=0,GACAACGAAGAAATAAAGATCCAAACTAAAAAAATACCTCGTGTTGATTCTGAAACTTCTTTAAAGGCGTTGATCTTAGTCAAACGACGGATCATTTGTTCGACTCGAATAGTAAAATACGTAAGTATATAGATAGTCTATATTAATTTTAAAAGCTCAAAGGGGCGCAAGTCTCTCTCATCAGTTGTGTCTTTAGTTTTTTTGATTTGGTTTGGTG;right_mate_support=6;right_support=11;valid_TSD=False +3R findcluster TE 13374595 13374676 3 + . ID=extended_and_annotated_roi.bam_1;genotype=homozygous;genotype_likelihoods=0.00038864889331,0.199922270221,0.799689080885;left_insert=0,GTTCACCCGCGTCCGAGTTCCTGCTCCACTACTCCCTGGCTGCTGACTCACTGTTGTTATAGGGGTGGCTTCCCCTCTGTTCTTCCTGGGGGAATGCTGCATCTTCCCCAGCTCCAAAATGGCGG;left_mate_support=2;left_support=2;non_support=0;right_insert=;right_mate_support=1;right_support=1;valid_TSD=False diff -r db5c766503dd -r 301316cb596a tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Jun 21 09:38:25 2017 -0400 @@ -0,0 +1,8 @@ + + + + + value, dbkey, name, path + +

+ diff -r db5c766503dd -r 301316cb596a update_mapq.xml --- a/update_mapq.xml Thu May 11 05:33:27 2017 -0400 +++ b/update_mapq.xml Wed Jun 21 09:38:25 2017 -0400 @@ -1,7 +1,7 @@ - + of supplementary alignments - readtagger + readtagger update_mapq --version + from SAM/BAM alignment files as FASTQ - readtagger + readtagger write_supplementary_fastq --version