# HG changeset patch # User nick # Date 1486442351 18000 # Node ID 9a0bee12b583e808686f9ba2327411294e6e8cd5 # Parent 4bc49a5769ee02e8f49e2ff7f905da135a98200e planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty diff -r 4bc49a5769ee -r 9a0bee12b583 align_families.xml --- a/align_families.xml Thu Dec 01 23:22:52 2016 -0500 +++ b/align_families.xml Mon Feb 06 23:39:11 2017 -0500 @@ -1,13 +1,13 @@ - + of duplex sequencing reads mafft - duplex + duplex DUPLEX_DIR - python \$DUPLEX_DIR/align_families.py -p \${GALAXY_SLOTS:-1} $input > $output + python "\$DUPLEX_DIR/align_families.py" -p \${GALAXY_SLOTS:-1} '$input' > '$output' @@ -25,6 +25,22 @@ + + @article{Stoler2016, + author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton}, + doi = {10.1186/s13059-016-1039-4}, + issn = {1474-760X}, + journal = {Genome biology}, + number = {1}, + pages = {180}, + pmid = {27566673}, + publisher = {Genome Biology}, + title = {{Streamlined analysis of duplex sequencing data with Du Novo.}}, + url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673}, + volume = {17}, + year = {2016} + } + **What it does** diff -r 4bc49a5769ee -r 9a0bee12b583 correct_barcodes.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/correct_barcodes.xml Mon Feb 06 23:39:11 2017 -0500 @@ -0,0 +1,61 @@ + + + of duplex sequencing reads + + bowtie2 + samtools + networkx + duplex + DUPLEX_DIR + + + bash "\$DUPLEX_DIR/baralign.sh" '$input' refdir barcodes.bam + && samtools view -f 256 barcodes.bam + | python "\$DUPLEX_DIR/correct.py" -d $dist -m $mapq -p $pos '$input' refdir/barcodes.fa + | sort > '$output' + + + + + + + + + + + + @article{Stoler2016, + author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton}, + doi = {10.1186/s13059-016-1039-4}, + issn = {1474-760X}, + journal = {Genome biology}, + number = {1}, + pages = {180}, + pmid = {27566673}, + publisher = {Genome Biology}, + title = {{Streamlined analysis of duplex sequencing data with Du Novo.}}, + url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673}, + volume = {17}, + year = {2016} + } + + + +**What it does** + +This is for processing duplex sequencing data. This will correct duplex barcodes and create new, larger families. Errors in barcodes normally prevent them from being recognized as the same as the other barcodes in their family. Correcting these errors allows the original, full families to be reconstructed, saving reads which would otherwise be lost. This tool accomplishes this by doing an all vs. all alignment between the barcodes with bowtie2. This identifies ones which are identical except a few, small differences. + +----- + +**Input** + +This expects the output format of the "Make families" tool. + +----- + +**Output** + +The output format is the same as the input format, ready to be consumed by the "Align families" tool. + + + diff -r 4bc49a5769ee -r 9a0bee12b583 dunovo.xml --- a/dunovo.xml Thu Dec 01 23:22:52 2016 -0500 +++ b/dunovo.xml Mon Feb 06 23:39:11 2017 -0500 @@ -1,20 +1,20 @@ - + from duplex sequencing alignments - duplex + duplex DUPLEX_DIR duplex.fa - && python \$DUPLEX_DIR/utils/outconv.py duplex.fa -1 $dcs1 -2 $dcs2 + && python "\$DUPLEX_DIR/utils/outconv.py" duplex.fa -1 '$dcs1' -2 '$dcs2' #if $keep_sscs: - && python \$DUPLEX_DIR/utils/outconv.py sscs.fa -1 $sscs1 -2 $sscs2 + && python "\$DUPLEX_DIR/utils/outconv.py" sscs.fa -1 '$sscs1' -2 '$sscs2' #end if ]]> @@ -45,6 +45,22 @@ + + @article{Stoler2016, + author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton}, + doi = {10.1186/s13059-016-1039-4}, + issn = {1474-760X}, + journal = {Genome biology}, + number = {1}, + pages = {180}, + pmid = {27566673}, + publisher = {Genome Biology}, + title = {{Streamlined analysis of duplex sequencing data with Du Novo.}}, + url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673}, + volume = {17}, + year = {2016} + } + **What it does** diff -r 4bc49a5769ee -r 9a0bee12b583 make_families.xml --- a/make_families.xml Thu Dec 01 23:22:52 2016 -0500 +++ b/make_families.xml Mon Feb 06 23:39:11 2017 -0500 @@ -1,16 +1,16 @@ - + of duplex sequencing reads - duplex + duplex DUPLEX_DIR - paste $fastq1 $fastq2 + paste '$fastq1' '$fastq2' | paste - - - - - | awk -f \$DUPLEX_DIR/make-barcodes.awk -v TAG_LEN=$taglen -v INVARIANT=$invariant + | awk -f "\$DUPLEX_DIR/make-barcodes.awk" -v TAG_LEN=$taglen -v INVARIANT=$invariant | sort - > $output + > '$output' @@ -37,6 +37,22 @@ + + @article{Stoler2016, + author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton}, + doi = {10.1186/s13059-016-1039-4}, + issn = {1474-760X}, + journal = {Genome biology}, + number = {1}, + pages = {180}, + pmid = {27566673}, + publisher = {Genome Biology}, + title = {{Streamlined analysis of duplex sequencing data with Du Novo.}}, + url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673}, + volume = {17}, + year = {2016} + } + **What it does** diff -r 4bc49a5769ee -r 9a0bee12b583 tool_dependencies.xml --- a/tool_dependencies.xml Thu Dec 01 23:22:52 2016 -0500 +++ b/tool_dependencies.xml Mon Feb 06 23:39:11 2017 -0500 @@ -1,12 +1,21 @@ + + + + + + - + + + + - https://github.com/galaxyproject/dunovo/archive/v0.5.tar.gz + https://github.com/galaxyproject/dunovo/archive/v0.6.tar.gz make .