Mercurial > repos > jpruab > jpr_picard
changeset 4:f4d018471628 draft default tip
Uploaded
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/picard_AddOrReplaceReadGroups.xml Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,205 @@ +<tool name="Add or Replace Groups" id="picard_ARRG" version="1.56.0"> + <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> + <command interpreter="python"> + picard_wrapper.py + --input="${inputFile}" + --rg-lb="${rglb}" + --rg-pl="${rgpl}" + --rg-pu="${rgpu}" + --rg-sm="${rgsm}" + --rg-id="${rgid}" + --rg-opts="${readGroupOpts.rgOpts}" + #if $readGroupOpts.rgOpts == "full" + --rg-cn="${readGroupOpts.rgcn}" + --rg-ds="${readGroupOpts.rgds}" + #end if + --output-format="${outputFormat}" + --output="${outFile}" + -j "\$JAVA_JAR_PATH/AddOrReplaceReadGroups.jar" + --tmpdir "${__new_file_path__}" + </command> + <inputs> + <param format="bam,sam" name="inputFile" type="data" label="SAM/BAM dataset to add or replace read groups in" + help="If empty, upload or import a SAM/BAM dataset." /> + <param name="rgid" value="1" type="text" label="Read group ID (ID tag)" help="The most important read group tag. Galaxy will use a value of '1' if nothing provided." /> + <param name="rgsm" value="" type="text" label="Read group sample name (SM tag)" /> + <param name="rglb" value="" type="text" label="Read group library (LB tag)" /> + <param name="rgpl" value="" type="text" label="Read group platform (PL tag)" help="illumina, solid, 454, pacbio, helicos" /> + <param name="rgpu" value="" type="text" label="Read group platform unit" help="like run barcode, etc." /> + <conditional name="readGroupOpts"> + <param name="rgOpts" type="select" label="Specify additional (optional) arguments" help="Allows you to set RGCN and RGDS."> + <option value="preSet">Use pre-set defaults</option> + <option value="full">Set optional arguments</option> + </param> + <when value="preSet" /> + <when value="full"> + <param name="rgcn" value="" type="text" label="Read group sequencing center name" help="Leave set to <null> for default (none)" /> + <param name="rgds" value="" type="text" label="Read group description" help="Leave set to <null> for default (none)" /> + </when> + </conditional> + <param name="outputFormat" type="boolean" checked="True" truevalue="bam" falsevalue="sam" label="Output bam instead of sam" help="Uncheck for sam output" /> + </inputs> + <outputs> + <data name="outFile" format="bam" label="${tool.name} on ${on_string}: ${outputFormat} with read groups replaced"> + <change_format> + <when input="outputFormat" value="sam" format="sam" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <!-- Command for replacing read groups in bam: + java -jar AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_ARRG_input1.bam O=picard_ARRG_output1.sam RGID=one RGLB=lib RGPL=illumina RGPU=peaewe RGSM=sam1 + --> + <param name="inputFile" value="picard_ARRG_input1.bam" /> + <param name="rglb" value="lib" /> + <param name="rgpl" value="illumina" /> + <param name="rgpu" value="peaewe" /> + <param name="rgsm" value="sam1" /> + <param name="rgid" value="one" /> + <param name="rgOpts" value="preSet" /> + <param name="outputFormat" value="False" /> + <output name="outFile" file="picard_ARRG_output1.sam" ftype="sam" /> + </test> + <test> + <!-- Command for replacing read groups in sam: + java -jar AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_ARRG_input1.sam O=picard_ARRG_output2.sam RGLB=LIB RGPL=IL RGPU=PLAT RGSM=smp RGID=M5 RGCN=FamousCenter RGDS="description with spaces" + picard_ARRG_input1.bam can be created from picard_ARRG_input1.sam + --> + <param name="inputFile" value="picard_ARRG_input1.sam" /> + <param name="rglb" value="LIB" /> + <param name="rgpl" value="IL" /> + <param name="rgpu" value="PLAT" /> + <param name="rgsm" value="smp" /> + <param name="rgid" value="M5" /> + <param name="rgOpts" value="full" /> + <param name="rgcn" value="FamousCenter" /> + <param name="rgds" value="description with spaces" /> + <param name="outputFormat" value="False" /> + <output name="outFile" file="picard_ARRG_output2.sam" ftype="sam" /> + </test> + <test> + <!-- Command for adding read groups in sam: + java -jar AddOrReplaceReadGroups.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_ARRG_input2.sam O=picard_ARRG_output3.bam RGID=M6 RGLB=LIB RGPL=IL RGPU=PLAT RGSM=smp1 + --> + <param name="inputFile" value="picard_ARRG_input2.sam" /> + <param name="rglb" value="LIB" /> + <param name="rgpl" value="IL" /> + <param name="rgpu" value="PLAT" /> + <param name="rgsm" value="smp1" /> + <param name="rgid" value="M6" /> + <param name="rgOpts" value="preSet" /> + <param name="outputFormat" value="True" /> + <output name="outFile" file="picard_ARRG_output3.bam" ftype="bam" /> + </test> + </tests> + <help> + +.. class:: infomark + +**Purpose** + +Add or Replace Read Groups in an input BAM or SAM file. + +**Read Groups are Important!** + +Many downstream analysis tools (such as GATK, for example) require BAM datasets to contain read groups. Even if you are not going to use GATK, setting read groups correctly from the start will simplify your life greatly. Below we provide an explanation of read groups fields taken from GATK FAQ webpage: + +.. csv-table:: + :header-rows: 1 + + Tag,Importance,Definition,Meaning + "ID","Required","Read group identifier. Each @RG line must have a unique ID. The value of ID is used in the RG tags of alignment records. Must be unique among all read groups in header section. Read group IDs may be modified when merging SAM files in order to handle collisions.","Ideally, this should be a globally unique identify across all sequencing data in the world, such as the Illumina flowcell + lane name and number. Will be referenced by each read with the RG:Z field, allowing tools to determine the read group information associated with each read, including the sample from which the read came. Also, a read group is effectively treated as a separate run of the NGS instrument in tools like base quality score recalibration (a GATK component) -- all reads within a read group are assumed to come from the same instrument run and to therefore share the same error model." + "SM","Sample. Use pool name where a pool is being sequenced.","Required. As important as ID.","The name of the sample sequenced in this read group. GATK tools treat all read groups with the same SM value as containing sequencing data for the same sample. Therefore it's critical that the SM field be correctly specified, especially when using multi-sample tools like the Unified Genotyper (a GATK component)." + "PL","Platform/technology used to produce the read. Valid values: ILLUMINA, SOLID, LS454, HELICOS and PACBIO.","Important. Not currently used in the GATK, but was in the past, and may return. The only way to known the sequencing technology used to generate the sequencing data","It's a good idea to use this field." + "LB","DNA preparation library identify","Essential for MarkDuplicates","MarkDuplicates uses the LB field to determine which read groups might contain molecular duplicates, in case the same DNA library was sequenced on multiple lanes." + +**Example of Read Group usage** + +Support we have a trio of samples: MOM, DAD, and KID. Each has two DNA libraries prepared, one with 400 bp inserts and another with 200 bp inserts. Each of these libraries is run on two lanes of an illumina hiseq, requiring 3 x 2 x 2 = 12 lanes of data. When the data come off the sequencer, we would create 12 BAM files, with the following @RG fields in the header:: + + Dad's data: + @RG ID:FLOWCELL1.LANE1 PL:illumina LB:LIB-DAD-1 SM:DAD PI:200 + @RG ID:FLOWCELL1.LANE2 PL:illumina LB:LIB-DAD-1 SM:DAD PI:200 + @RG ID:FLOWCELL1.LANE3 PL:illumina LB:LIB-DAD-2 SM:DAD PI:400 + @RG ID:FLOWCELL1.LANE4 PL:illumina LB:LIB-DAD-2 SM:DAD PI:400 + + Mom's data: + @RG ID:FLOWCELL1.LANE5 PL:illumina LB:LIB-MOM-1 SM:MOM PI:200 + @RG ID:FLOWCELL1.LANE6 PL:illumina LB:LIB-MOM-1 SM:MOM PI:200 + @RG ID:FLOWCELL1.LANE7 PL:illumina LB:LIB-MOM-2 SM:MOM PI:400 + @RG ID:FLOWCELL1.LANE8 PL:illumina LB:LIB-MOM-2 SM:MOM PI:400 + + Kid's data: + @RG ID:FLOWCELL2.LANE1 PL:illumina LB:LIB-KID-1 SM:KID PI:200 + @RG ID:FLOWCELL2.LANE2 PL:illumina LB:LIB-KID-1 SM:KID PI:200 + @RG ID:FLOWCELL2.LANE3 PL:illumina LB:LIB-KID-2 SM:KID PI:400 + @RG ID:FLOWCELL2.LANE4 PL:illumina LB:LIB-KID-2 SM:KID PI:400 + +Note the hierarchical relationship between read groups (unique for each lane) to libraries (sequenced on two lanes) and samples (across four lanes, two lanes for each library). + +**Picard documentation** + +This is a Galaxy wrapper for AddOrReplaceReadGroups, a part of the external package Picard-tools_. + + .. _Picard-tools: http://www.google.com/search?q=picard+samtools + +------ + +.. class:: infomark + +**Inputs, outputs, and parameters** + +Either a sam file or a bam file must be supplied. If a bam file is used, it must +be coordinate-sorted. Galaxy currently coordinate-sorts all bam files. + +The output file is either bam (the default) or sam, according to user selection, +and contains the same information as the input file except for the appropraite +additional (or modified) read group tags. Bam is recommended since it is smaller. + +From the Picard documentation. + +AddOrReplaceReadGroups REQUIRED parameters:: + + Option (Type) Description + + RGLB=String Read Group Library + RGPL=String Read Group platform (e.g. illumina, solid) + RGPU=String Read Group platform unit (eg. run barcode) + RGSM=String Read Group sample name + RGID=String Read Group ID; Default value: null (empty) + +AddOrReplaceReadGroups OPTIONAL parameters:: + + Option (Type) Description + + RGCN=String Read Group sequencing center name; Default value: null (empty) + RGDS=String Read Group description Default value: null (empty) + +One parameter that Picard's AddOrReplaceReadGroups offers that is automatically +set by Galaxy is the SORT_ORDER, which is set to coordinate. + +.. class:: warningmark + +**Warning on SAM/BAM quality** + +Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT** +flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears +to be the only way to deal with SAM/BAM that cannot be parsed. + + + + </help> +</tool> + + + + + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/picard_BamIndexStats.xml Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,118 @@ +<tool name="BAM Index Statistics" id="picard_BamIndexStats" version="1.56.0"> + <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> + <command interpreter="python"> + picard_wrapper.py + --input "${input_file}" + --bai-file "${input_file.metadata.bam_index}" + -t "${htmlfile}" + -d "${htmlfile.files_path}" + -j "\$JAVA_JAR_PATH/BamIndexStats.jar" + --tmpdir "${__new_file_path__}" + </command> + <inputs> + <param format="bam" name="input_file" type="data" label="BAM dataset to generate statistics for" + help="If empty, upload or import a BAM dataset" /> + </inputs> + <outputs> + <data format="html" name="htmlfile" label="${tool.name}_on_${on_string}.html" /> + </outputs> + <tests> + <test> + <!-- Command + java -jar BamIndexStats.jar I=test-data/picard_input_tiny_coord.bam > picard_BIS_output1.txt + picard_input_tiny_coord.bam can be created from picard_input_tiny_coord.sam + --> + <param name="input_file" value="picard_input_tiny_coord.bam" ftype="bam" /> + <output name="htmlfile" file="picard_BIS_output1.txt" ftype="html" compare="contains" lines_diff="12"/> + </test> + <test> + <!-- Command + java -jar BamIndexStats.jar I=test-data/picard_BIS_input1.bam > picard_BIS_output2.txt + picard_BIS_input1.bam can be created from picard_BIS_input1.sam + --> + <param name="input_file" value="picard_BIS_input1.bam" ftype="bam" /> + <output name="htmlfile" file="picard_BIS_output2.txt" ftype="html" compare="contains" lines_diff="12" /> + </test> + </tests> + <help> + +.. class:: infomark + +**Purpose** + +Generate Bam Index Stats for a provided BAM file. + +**Picard documentation** + +This is a Galaxy wrapper for BamIndexStats, a part of the external package Picard-tools_. + + .. _Picard-tools: http://www.google.com/search?q=picard+samtools + +------ + +.. class:: infomark + +**Inputs and outputs** + +The only input is the BAM file you wish to obtain statistics for, which is required. +Note that it must be coordinate-sorted. Galaxy currently coordinate-sorts all BAM files. + +This tool outputs an HTML file that contains links to the actual metrics results, as well +as a log file with info on the exact command run. + +.. class:: warningmark + +**Warning on SAM/BAM quality** + +Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT** +flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears +to be the only way to deal with SAM/BAM that cannot be parsed. + +------ + +**Example** + +Given a BAM file created from the following:: + + @HD VN:1.0 SO:coordinate + @SQ SN:chr1 LN:101 + @SQ SN:chr7 LN:404 + @SQ SN:chr8 LN:202 + @SQ SN:chr10 LN:303 + @SQ SN:chr14 LN:505 + @RG ID:0 SM:Hi,Mom! + @RG ID:1 SM:samplesample DS:ClearDescription + @PG ID:1 PN:Hey! VN:2.0 + @CO Just a generic comment to make the header longer + read1 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))II'I*/)-I*-)I.-)I)I),/-II..)./.,.).*II,I.II-)III0*IIIIIIII/32/,01460II/6/*0*/2/283//36868/I RG:Z:0 + read2 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))II'I*/)-I*-)I.-)I)I),/-II..)./.,.).*II,I.II-)III0*IIIIIIII/32/,01460II/6/*0*/2/283//36868/I RG:Z:0 + read3 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))II'I*/)-I*-)I.-)I)I),/-II..)./.,.).*II,I.II-)III0*IIIIIIII/32/,01460II/6/*0*/2/283//36868/I RG:Z:0 + read4 147 chr7 16 255 101M = 21 -96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))II'I*/)-I*-)I.-)I)I),/-II..)./.,.).*II,I.II-)III0*IIIIIIII/32/,01460II/6/*0*/2/283//36868/I RG:Z:0 + read5 99 chr7 21 255 101M = 16 96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))II'I*/)-I*-)I.-)I)I),/-II..)./.,.).*II,I.II-)III0*IIIIIIII/32/,01460II/6/*0*/2/283//36868/I RG:Z:0 + read6 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA I/15445666651/566666553+2/14/I/555512+3/)-'/-I-'*+))*''13+3)'//++''/'))/3+I*5++)I'2+I+/*I-II*)I-./1'1 RG:Z:0 + read7 163 chr7 302 255 10M1D10M5I76M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA I/15445666651/566666553+2/14/I/555512+3/)-'/-I-'*+))*''13+3)'//++''/'))/3+I*5++)I'2+I+/*I-II*)I-./1'1 RG:Z:0 + read8 165 * 0 0 * chr7 1 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA I/15445666651/566666553+2/14/I/555512+3/)-'/-I-'*+))*''13+3)'//++''/'))/3+I*5++)I'2+I+/*I-II*)I-./1'1 RG:Z:0 + +The following metrics file will be produced:: + + chr1 length= 101 Aligned= 0 Unaligned= 0 + chr7 length= 404 Aligned= 7 Unaligned= 0 + chr8 length= 202 Aligned= 0 Unaligned= 0 + chr10 length= 303 Aligned= 0 Unaligned= 0 + chr14 length= 505 Aligned= 0 Unaligned= 0 + NoCoordinateCount= 1 + + </help> +</tool> + + + + + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/picard_FastqToSam.xml Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,145 @@ +<tool id="picard_FastqToSam" name="FASTQ to BAM" version="1.56.0"> + <description>creates an unaligned BAM file</description> + <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> + <!-- Dan Blankenberg --> + <command>java -XX:DefaultMaxRAMFraction=1 -XX:+UseParallelGC + -jar "\$JAVA_JAR_PATH/FastqToSam.jar" + FASTQ="${input_fastq1}" + #if str( $input_fastq2) != "None": + FASTQ2="${input_fastq2}" + #end if + QUALITY_FORMAT="${ dict( fastqsanger='Standard', fastqcssanger='Standard', fastqillumina='Illumina', fastqsolexa='Solexa' )[ $input_fastq1.ext ] }" ##Solexa, Illumina, Standard + OUTPUT="${output_bam}" + READ_GROUP_NAME="${read_group_name}" + SAMPLE_NAME="${sample_name}" + #if $param_type.param_type_selector == "advanced": + #if str( $param_type.library_name ) != "": + LIBRARY_NAME="${param_type.library_name}" + #end if + #if str( $param_type.platform_unit ) != "": + PLATFORM_UNIT="${param_type.platform_unit}" + #end if + #if str( $param_type.platform ) != "": + PLATFORM="${param_type.platform}" + #end if + #if str( $param_type.sequencing_center ) != "": + SEQUENCING_CENTER="${param_type.sequencing_center}" + #end if + #if str( $param_type.predicted_insert_size ) != "": + PREDICTED_INSERT_SIZE="${param_type.predicted_insert_size}" + #end if + #if str( $param_type.description.value ) != "": + DESCRIPTION="${param_type.description}" + #end if + #if str( $param_type.run_date ) != "": + RUN_DATE="${param_type.run_date}" + #end if + #if str( $param_type.min_q ) != "": + MIN_Q="${param_type.min_q}" + #end if + #if str( $param_type.max_q ) != "": + MAX_Q="${param_type.max_q}" + #end if + SORT_ORDER="${param_type.sort_order}" + #else: + SORT_ORDER=coordinate ##unsorted, queryname, coordinate; always use coordinate + #end if + 2>&1 + || echo "Error running Picard FastqToSAM" >&2 + </command> + <inputs> + <param name="input_fastq1" type="data" format="fastqsanger,fastqillumina,fastqsolexa,fastqcssanger" label="FASTQ file" /> <!-- confirm that fastqcssanger also works --> + <param name="input_fastq2" type="data" format="fastqsanger,fastqillumina,fastqsolexa,fastqcssanger" optional="True" label="Second FASTQ of paired end data" help="Only needed when using paired end data." > + <options options_filter_attribute="ext" from_parameter="tool.app.datatypes_registry.datatypes_by_extension" transform_lines="obj.keys()"> + <column name="name" index="0"/> + <column name="value" index="0"/> + <filter type="param_value" ref="input_fastq1" ref_attribute="ext" column="0"/> + </options> + </param> + <param name="read_group_name" type="text" value="A" label="Read Group Name" /> + <param name="sample_name" type="text" value="unknown sample" label="Sample Name" /> + <conditional name="param_type"> + <param name="param_type_selector" type="select" label="Basic or Advanced options"> + <option value="basic" selected="True">Basic</option> + <option value="advanced">Advanced</option> + </param> + <when value="basic"> + <!-- Do nothing here --> + </when> + <when value="advanced"> + <param name="library_name" type="text" value="" label="Library Name" /> + <param name="platform_unit" type="text" value="" label="Platform Unit" /> + <param name="platform" type="text" value="" label="Platform" /> + <param name="sequencing_center" type="text" value="" label="Sequencing Center" /> + <param name="predicted_insert_size" type="integer" value="" optional="True" label="Predicted Insert Size" /> + <param name="description" type="text" value="" label="Description" /> + <param name="run_date" type="text" value="" label="Run Date" /> + <param name="min_q" type="integer" optional="True" value="0" label="Min Q" /> + <param name="max_q" type="integer" optional="True" value="93" label="Max Q" /> + <param name="sort_order" type="select" label="Sort order"> + <option value="coordinate" selected="True">coordinate</option> + <option value="queryname">queryname</option> + <option value="unsorted">unsorted</option> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data format="bam" name="output_bam" /> + </outputs> + <tests> + <test> + <param name="input_fastq1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" /> + <param name="input_fastq2" /> + <param name="read_group_name" value="A" /> + <param name="sample_name" value="unknown sample" /> + <param name="param_type_selector" value="basic" /> + <output name="output_bam" file="picard_fastq_to_sam_out1.bam" ftype="bam"/> + </test> + <test> + <param name="input_fastq1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" /> + <param name="input_fastq2" value="bwa_wrapper_in3.fastqsanger" ftype="fastqsanger" /> + <param name="read_group_name" value="A" /> + <param name="sample_name" value="unknown sample" /> + <param name="param_type_selector" value="basic" /> + <output name="output_bam" file="picard_fastq_to_sam_out2.bam" ftype="bam"/> + </test> + </tests> + <help> +**What it does** + +Picard: FastqToSam converts FASTQ files to unaligned BAM files. + +------ + +Please cite the website "http://picard.sourceforge.net". + +------ + + +**Input formats** + +FastqToSam accepts FASTQ input files. If using paired-end data, you should select two FASTQ files. + +------ + +**Outputs** + +The output is in BAM format, see http://samtools.sourceforge.net for more details. + +------- + +**FastqToSam settings** + +This is list of FastqToSam options:: + + READ_GROUP_NAME=String Read group name Default value: A. This option can be set to 'null' to clear the default value. + SAMPLE_NAME=String Sample name to insert into the read group header Required. + LIBRARY_NAME=String The library name to place into the LB attribute in the read group header Default value: null. + PLATFORM_UNIT=String The platform unit (often run_barcode.lane) to insert into the read group header Default value: null. + PLATFORM=String The platform type (e.g. illumina, solid) to insert into the read group header Default value: null. + SEQUENCING_CENTER=String The sequencing center from which the data originated Default value: null. + PREDICTED_INSERT_SIZE=Integer Predicted median insert size, to insert into the read group header Default value: null. + DESCRIPTION=String Inserted into the read group header Default value: null. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/picard_ReorderSam.xml Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,166 @@ +<tool name="Reorder SAM/BAM" id="picard_ReorderSam" version="1.56.0"> + <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> + <command interpreter="python"> + picard_wrapper.py + --input="${inputFile}" + #if $source.indexSource == "built-in" + --ref="${source.ref.fields.path}" + #else + --ref-file="${refFile}" + --species-name="${source.speciesName}" + --build-name="${source.buildName}" + --trunc-names="${source.truncateSeqNames}" + #end if + --allow-inc-dict-concord="${allowIncDictConcord}" + --allow-contig-len-discord="${allowContigLenDiscord}" + --output-format="${outputFormat}" + --output="${outFile}" + --tmpdir "${__new_file_path__}" + -j "\$JAVA_JAR_PATH/ReorderSam.jar" + </command> + <inputs> + <param format="bam,sam" name="inputFile" type="data" label="SAM/BAM dataset to be reordered" + help="If empty, upload or import a SAM/BAM dataset." /> + <conditional name="source"> + <param name="indexSource" type="select" label="Select Reference Genome" help="This tool will re-order SAM/BAM in the same order as reference selected below."> + <option value="built-in">Locally cached</option> + <option value="history">History</option> + </param> + <when value="built-in"> + <param name="ref" type="select" label="Select a reference genome"> + <options from_data_table="picard_indexes" /> + </param> + </when> + <when value="history"> + <param name="refFile" type="data" format="fasta" metadata_name="dbkey" label="Using reference file" /> + <param name="speciesName" type="text" value="" label="Species name" /> + <param name="buildName" type="text" value="" label="Build name" /> + <param name="truncateSeqNames" type="boolean" checked="False" truevalue="true" falsevalue="false" label="Truncate sequence names after first whitespace" /> + </when> + </conditional> + <param name="allowIncDictConcord" type="boolean" checked="False" truevalue="true" falsevalue="false" label="Allow incomplete dict concordance?" help="Allows a partial overlap of the BAM contigs with the new reference sequence contigs." /> + <param name="allowContigLenDiscord" type="boolean" checked="False" truevalue="true" falsevalue="false" label="Allow contig length discordance?" help="This is dangerous--don't check it unless you know exactly what you're doing!" /> + <param name="outputFormat" type="boolean" checked="True" truevalue="bam" falsevalue="sam" label="Output BAM instead of SAM" help="Uncheck for SAM output" /> + </inputs> + <outputs> + <data name="outFile" format="bam" label="${tool.name} on ${on_string}: reordered ${outputFormat}"> + <change_format> + <when input="outputFormat" value="sam" format="sam" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <!-- Commands: + cp test-data/phiX.fasta . + samtools faidx phiX.fasta + java -jar CreateSequenceDictionary.jar R=phiX.fasta O=phiX.dict URI=phiX.fasta TRUNCATE_NAMES_AT_WHITESPACE=false SPECIES=phiX174 + java -jar ReorderSam.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_RS_input1.bam O=picard_RS_output1.bam REFERENCE=phiX.fasta ALLOW_INCOMPLETE_DICT_CONCORDANCE=false ALLOW_CONTIG_LENGTH_DISCORDANCE=false + --> + <param name="inputFile" value="picard_RS_input1.bam" /> + <param name="indexSource" value="history" /> + <param name="refFile" value="phiX.fasta" /> + <param name="speciesName" value="phiX174" /> + <param name="buildName" value="" /> + <param name="truncateSeqNames" value="false" /> + <param name="allowIncDictConcord" value="false" /> + <param name="allowContigLenDiscord" value="false" /> + <param name="outputFormat" value="True" /> + <output name="outFile" file="picard_RS_output1.bam" ftype="bam" lines_diff="4" compare="contains" /> + </test> + <test> + <!-- Command: + java -jar ReorderSam.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_RS_input2.sam O=picard_RS_output2.sam REFERENCE=/path/to/phiX/picard_index/phiX.fa ALLOW_INCOMPLETE_DICT_CONCORDANCE=false ALLOW_CONTIG_LENGTH_DISCORDANCE=false + /path/to/phiX/srma_index/phiX.fa is path to phiX.fa, phiX.fa.fai, and phiX.dict + --> + <param name="inputFile" value="picard_RS_input2.sam" /> + <param name="indexSource" value="built-in" /> + <param name="ref" value="phiX" /> + <param name="allowIncDictConcord" value="false" /> + <param name="allowContigLenDiscord" value="false" /> + <param name="outputFormat" value="False" /> + <output name="outFile" file="picard_RS_output2.sam" ftype="sam" lines_diff="4" sort="True" /> + </test> + <test> + <!-- Commands: + cp test-data/picard_RS_input4.fasta . + samtools faidx picard_RS_input4.fasta + java -jar CreateSequenceDictionary.jar R=picard_RS_input4.fasta O=picard_RS_input4.dict URI=picard_RS_input4.fasta TRUNCATE_NAMES_AT_WHITESPACE=true SPECIES=phiX174 GENOME_ASSEMBLY=phiX_buildBlah1.1 + java -jar ReorderSam.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_RS_input3.bam O=picard_RS_output3.sam REFERENCE=picard_RS_input4.fasta ALLOW_INCOMPLETE_DICT_CONCORDANCE=true ALLOW_CONTIG_LENGTH_DISCORDANCE=false + picard_RS_input3.bam can be made from picard_RS_input3.sam + --> + <param name="inputFile" value="picard_RS_input3.bam" /> + <param name="indexSource" value="history" /> + <param name="refFile" value="picard_RS_input4.fasta" /> + <param name="speciesName" value="phiX174" /> + <param name="buildName" value="phiX_buildBlah1.1" /> + <param name="truncateSeqNames" value="true" /> + <param name="allowIncDictConcord" value="true" /> + <param name="allowContigLenDiscord" value="false" /> + <param name="outputFormat" value="False" /> + <output name="outFile" file="picard_RS_output3.sam" ftype="sam" lines_diff="12" sort="True" /> + </test> + </tests> + <help> + +.. class:: infomark + +**Purpose** + +Reorder SAM/BAM to match contig ordering in a particular reference file. Note that this is +not the same as sorting as done by the SortSam tool, which sorts by either coordinate +values or query name. The ordering in ReorderSam is based on exact name matching of +contigs/chromosomes. Reads that are mapped to a contig that is not in the new reference file are +not included in the output. + +**Picard documentation** + +This is a Galaxy wrapper for ReorderSam, a part of the external package Picard-tools_. + + .. _Picard-tools: http://www.google.com/search?q=picard+samtools + +------ + +.. class:: infomark + +**Inputs, outputs, and parameters** + +For the file that needs to be reordered, either a sam file or a bam file must be supplied. +If a bam file is used, it must be coordinate-sorted. A reference file is also required, +so either a fasta file should be supplied or a built-in reference can be selected. + +The output contains the same reads as the input file but the reads have been rearranged so +they appear in the same order as the provided reference file. The tool will output either +bam (the default) or sam, according to user selection. Bam is recommended since it is smaller. + +The only extra parameters that can be set are flags for allowing incomplete dict concordance +and allowing contig length discordance. If incomplete dict concordance is allowed, only a +partial overlap of the bam contigs with the new reference sequence contigs is required. By +default it is off, requiring a corresponding contig in the new reference for each read contig. +If contig length discordance is allowed, contig names that are the same between a read and the +new reference contig are allowed even if they have different lengths. This is usually not a +good idea, unless you know exactly what you're doing. It's off by default. + +.. class:: warningmark + +**Warning on SAM/BAM quality** + +Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT** +flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears +to be the only way to deal with SAM/BAM that cannot be parsed. + + + </help> +</tool> + + + + + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/picard_ReplaceSamHeader.xml Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,115 @@ +<tool name="Replace SAM/BAM Header" id="picard_ReplaceSamHeader" version="1.56.0"> + <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> + <command interpreter="python"> + picard_wrapper.py + --input "${inputFile}" + -o "${outFile}" + --header-file "${headerFile}" + --output-format "${outputFormat}" + -j "\$JAVA_JAR_PATH/ReplaceSamHeader.jar" + --tmpdir "${__new_file_path__}" + </command> + <inputs> + <param format="bam,sam" name="inputFile" type="data" label="SAM/BAM dataset to replace header in (TARGET)" + help="If empty, upload or import a SAM/BAM dataset." /> + <param format="bam,sam" name="headerFile" type="data" label="SAM/BAM to reader header from (SOURCE)" + help="If empty, upload or import a SAM/BAM dataset." /> + <param name="outputFormat" type="boolean" checked="True" truevalue="bam" falsevalue="sam" label="Output BAM instead of SAM" help="Uncheck for SAM output" /> + </inputs> + <outputs> + <data name="outFile" format="bam" label="${tool.name} on ${on_string}: ${outputFormat} with replaced header"> + <change_format> + <when input="outputFormat" value="sam" format="sam" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <!-- Command: + java -jar ReplaceSamHeader.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_input_tiny_coord.bam HEADER=test-data/picard_RSH_input1.bam O=picard_RSH_output1.sam + picard_RSH_input1.bam can be made from picard_RSH_input1.sam + --> + <param name="inputFile" value="picard_input_tiny_coord.bam" ftype="bam" /> + <param name="headerFile" value="picard_RSH_input1.bam" ftype="bam" /> + <param name="outputFormat" value="False" /> + <output name="outFile" file="picard_RSH_output1.sam" ftype="sam" /> + </test> + <test> + <!-- Command: + java -jar ReplaceSamHeader.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_input_tiny_coord.sam HEADER=test-data/picard_RSH_input1.bam O=picard_RSH_output2.sam + picard_RSH_input1.bam can be made from picard_RSH_input1.sam + --> + <param name="inputFile" value="picard_input_tiny_coord.sam" ftype="sam" /> + <param name="headerFile" value="picard_RSH_input1.bam" ftype="bam" /> + <param name="outputFormat" value="False" /> + <output name="outFile" file="picard_RSH_output2.sam" ftype="sam" /> + </test> + <test> + <!-- Command: + java -jar ReplaceSamHeader.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_input_tiny_coord.sam HEADER=test-data/picard_RSH_input1.sam O=picard_RSH_output2.bam + --> + <param name="inputFile" value="picard_input_tiny_coord.sam" ftype="sam" /> + <param name="headerFile" value="picard_RSH_input1.sam" ftype="sam" /> + <param name="outputFormat" value="True" /> + <output name="outFile" file="picard_RSH_output2.bam" ftype="bam" /> + </test> + </tests> + <help> + + +.. class:: infomark + +**Purpose** + +Replace Sam Header with the header from another sam file. The tool does not do any +significant validation, so it's up to the user to make sure that the elements in +the header are relevant and that the new header has all the required things. + +Replace the SAMFileHeader in a SAM file with the given header. Validation is +minimal. It is up to the user to ensure that all the elements referred to in the +SAMRecords are present in the new header. Sort order of the two input files must +be the same. + +**Picard documentation** + +This is a Galaxy wrapper for ReplaceSamHeader, a part of the external package Picard-tools_. + + .. _Picard-tools: http://www.google.com/search?q=picard+samtools + +------ + +.. class:: infomark + +**Inputs and outputs** + +Either a sam file or a bam file is required as the file whose header will be replaced. +The header file is also required and can also be either sam or bam (it does not have +to be the same type as the other file). In both cases, if a bam file is used, it must +be coordinate-sorted. Galaxy currently coordinate-sorts all bam files. + +The tool will output either bam (the default) or sam. Bam is recommended since it is smaller. + +.. class:: warningmark + +**Warning on SAM/BAM quality** + +Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT** +flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears +to be the only way to deal with SAM/BAM that cannot be parsed. + + + + </help> +</tool> + + + + + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/picard_SamToFastq.xml Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,189 @@ +<tool id="picard_SamToFastq" name="SAM to FASTQ" version="1.56.1" force_history_refresh="True"> + <description>creates a FASTQ file</description> + <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> + <!-- Dan Blankenberg --> + <command interpreter="python">picard_SamToFastq_wrapper.py + -p ' + java -XX:DefaultMaxRAMFraction=1 -XX:+UseParallelGC + -jar "\$JAVA_JAR_PATH/SamToFastq.jar" + INPUT="${input_sam}" + VALIDATION_STRINGENCY="LENIENT" + RE_REVERSE="${re_reverse}" + INCLUDE_NON_PF_READS="${include_non_pf_reads}" + #if str( $clipping_attribute ): + CLIPPING_ATTRIBUTE="${clipping_attribute}" + #end if + #if str( $clipping_action ): + CLIPPING_ACTION="${clipping_action}" + #end if + #if str( $read1_trim ): + READ1_TRIM="${read1_trim}" + #end if + #if str( $read1_max_bases_to_write ): + READ1_MAX_BASES_TO_WRITE="${read1_max_bases_to_write}" + #end if + INCLUDE_NON_PRIMARY_ALIGNMENTS="${include_non_primary_alignments}" + + #if str( $output_per_read_group_selector ) == 'per_sam_file': + ##OUTPUT_PER_RG=false + FASTQ="${output_fastq1}" + + #if str( $single_paired_end_type.single_paired_end_type_selector ) == 'paired': + SECOND_END_FASTQ="${output_fastq2}" + #if str( $single_paired_end_type.read2_trim ): + READ2_TRIM="${single_paired_end_type.read2_trim}" + #end if + #if str( $single_paired_end_type.read2_max_bases_to_write ): + READ2_MAX_BASES_TO_WRITE="${single_paired_end_type.read2_max_bases_to_write}" + #end if + #end if + ' + #else: + OUTPUT_PER_RG=true + #if str( $single_paired_end_type.single_paired_end_type_selector ) == 'paired': + ' + --read_group_file_2 "${output_fastq2}" + --file_id_2 "${output_fastq2.id}" + -p ' + #if str( $single_paired_end_type.read2_trim ): + READ2_TRIM="${single_paired_end_type.read2_trim}" + #end if + #if str( $single_paired_end_type.read2_max_bases_to_write ): + READ2_MAX_BASES_TO_WRITE="${single_paired_end_type.read2_max_bases_to_write}" + #end if + #end if + ' + --read_group_file_1 "${output_fastq1}" + --new_files_path "${__new_file_path__}" + --file_id_1 "${output_fastq1.id}" + #end if + </command> + <inputs> + <param name="input_sam" type="data" format="sam,bam" label="BAM/SAM file" /> + <param name="read1_trim" type="integer" value="" optional="True" label="The number of bases to trim from the beginning of read 1." /> + <param name="read1_max_bases_to_write" type="integer" optional="True" value="" label="The maximum number of bases to write from read 1 after trimming." /> + <param name="output_per_read_group_selector" type="select" label="Output per read group"> + <option value="per_sam_file" selected="True">Per BAM/SAM file</option> + <option value="per_read_group">Per Read Group</option> + </param> + <conditional name="single_paired_end_type"> + <param name="single_paired_end_type_selector" type="select" label="Single or Paired end"> + <option value="single" selected="True">Single</option> + <option value="paired">Paired end</option> + </param> + <when value="single"> + <!-- nothing yet --> + </when> + <when value="paired"> + <param name="read2_trim" type="integer" value="" optional="True" label="The number of bases to trim from the beginning of read 2." /> + <param name="read2_max_bases_to_write" type="integer" optional="True" value="" label="The maximum number of bases to write from read 2 after trimming." /> + </when> + </conditional> + <param name="re_reverse" type="boolean" truevalue="true" falsevalue="false" checked="True" label="Re-reverse bases and qualities of reads on negative strand"/> + <param name="include_non_pf_reads" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Include non-PF reads from the SAM file into the output FASTQ files."/> + <param name="clipping_attribute" type="text" value="" label="The attribute that stores the position at which the SAM record should be clipped" help="Leave blank for null" /> + <param name="clipping_action" type="text" value="" label="The action that should be taken with clipped reads" help="'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region. Leave blank for null" /> + <param name="include_non_primary_alignments" type="boolean" truevalue="true" falsevalue="false" checked="False" label="If true, include non-primary alignments in the output." help="Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments."/> + + </inputs> + <outputs> + <data format="fastqsanger" name="output_fastq1" label="${tool.name} on ${on_string}: FASTQ 1" /> + <data format="fastqsanger" name="output_fastq2" label="${tool.name} on ${on_string}: FASTQ 2" > + <filter>single_paired_end_type['single_paired_end_type_selector'] == 'paired'</filter> + </data> + </outputs> + <tests> + <test> + <param name="input_sam" value="bfast_out1.sam" ftype="sam" /> + <param name="output_per_read_group_selector" value="per_sam_file" /> + <param name="single_paired_end_type_selector" value="single" /> + <param name="read1_trim" value="" /> + <param name="read1_max_bases_to_write" value="" /> + <param name="re_reverse" value="True" /> + <param name="include_non_pf_reads" value="False" /> + <param name="clipping_action" value="" /> + <param name="clipping_attribute" value="" /> + <param name="include_non_primary_alignments" value="False" /> + <output name="output_fastq1" file="random_phiX_1.fastqsanger"/> + </test> + <test> + <param name="input_sam" value="bwa_wrapper_out3.sam" ftype="sam" /> + <param name="output_per_read_group_selector" value="per_sam_file" /> + <param name="single_paired_end_type_selector" value="paired" /> + <param name="read1_trim" value="" /> + <param name="read1_max_bases_to_write" value="" /> + <param name="read2_trim" value="" /> + <param name="read2_max_bases_to_write" value="" /> + <param name="re_reverse" value="True" /> + <param name="include_non_pf_reads" value="False" /> + <param name="clipping_action" value="" /> + <param name="clipping_attribute" value="" /> + <param name="include_non_primary_alignments" value="False" /> + <output name="output_fastq1" file="bwa_wrapper_in2.fastqsanger" lines_diff="64"/> <!-- 16 unaligned fastq blocks not present in original sam file --> + <output name="output_fastq2" file="bwa_wrapper_in3.fastqsanger" lines_diff="64"/> <!-- 16 unaligned fastq blocks not present in original sam file --> + </test> + <test> + <param name="input_sam" value="bwa_wrapper_out3.sam" ftype="sam" /> + <param name="output_per_read_group_selector" value="per_read_group" /> + <param name="single_paired_end_type_selector" value="paired" /> + <param name="read1_trim" value="" /> + <param name="read1_max_bases_to_write" value="" /> + <param name="read2_trim" value="" /> + <param name="read2_max_bases_to_write" value="" /> + <param name="re_reverse" value="True" /> + <param name="include_non_pf_reads" value="False" /> + <param name="clipping_action" value="" /> + <param name="clipping_attribute" value="" /> + <param name="include_non_primary_alignments" value="False" /> + <output name="output_fastq1" file="bwa_wrapper_in2.fastqsanger" lines_diff="64"/> <!-- 16 unaligned fastq blocks not present in original sam file --> + <output name="output_fastq2" file="bwa_wrapper_in3.fastqsanger" lines_diff="64"/> <!-- 16 unaligned fastq blocks not present in original sam file --> + </test> + </tests> + <help> +**What it does** + +Picard: SamToFastq converts SAM files to FASTQ files. + +Extracts read sequences and qualities from the input SAM/BAM file and writes them into the output file in Sanger fastq format. In the RC mode (default is True), if the read is aligned and the alignment is to the reverse strand on the genome, the read's sequence from input SAM file will be reverse-complemented prior to writing it to fastq in order restore correctly the original read sequence as it was generated by the sequencer. + +------ + +Please cite the website "http://picard.sourceforge.net". + +------ + + +**Input formats** + +FastqToSam accepts SAM input files, see http://samtools.sourceforge.net for more details. + +------ + +**Outputs** + +The output is in FASTQ format. If using Paired end data, 2 fastq files are created. + +------- + +**FastqToSam settings** + +This is list of SamToFastq options:: + + INPUT=File Input SAM/BAM file to extract reads from Required. + FASTQ=File Output fastq file (single-end fastq or, if paired, first end of the pair fastq). Required. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG) + SECOND_END_FASTQ=File Output fastq file (if paired, second end of the pair fastq). Default value: null. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG) + OUTPUT_PER_RG=Boolean Output a fastq file per read group (two fastq files per read group if the group is paired). Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} Cannot be used in conjuction with option(s) SECOND_END_FASTQ (F2) FASTQ (F) + OUTPUT_DIR=File Directory in which to output the fastq file(s). Used only when OUTPUT_PER_RG is true. Default value: null. + RE_REVERSE=Boolean Re-reverse bases and qualities of reads with negative strand flag set before writing them to fastq Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false} + INCLUDE_NON_PF_READS=Boolean Include non-PF reads from the SAM file into the output FASTQ files. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + CLIPPING_ATTRIBUTE=String The attribute that stores the position at which the SAM record should be clipped Default value: null. + CLIPPING_ACTION=String The action that should be taken with clipped reads: 'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region. Default value: null. + READ1_TRIM=Integer The number of bases to trim from the beginning of read 1. Default value: 0. This option can be set to 'null' to clear the default value. + READ1_MAX_BASES_TO_WRITE=Integer The maximum number of bases to write from read 1 after trimming. If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written. Default value: null. + READ2_TRIM=Integer The number of bases to trim from the beginning of read 2. Default value: 0. This option can be set to 'null' to clear the default value. + READ2_MAX_BASES_TO_WRITE=Integer The maximum number of bases to write from read 2 after trimming. If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written. Default value: null. + INCLUDE_NON_PRIMARY_ALIGNMENTS=Boolean If true, include non-primary alignments in the output. Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false} + + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/picard_SamToFastq_wrapper.py Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,93 @@ +#!/usr/bin/env python +#Dan Blankenberg + +""" +A wrapper script for running the Picard SamToFastq command. Allows parsing read groups into separate files. +""" + +import sys, optparse, os, tempfile, subprocess, shutil + +CHUNK_SIZE = 2**20 #1mb + + +def cleanup_before_exit( tmp_dir ): + if tmp_dir and os.path.exists( tmp_dir ): + shutil.rmtree( tmp_dir ) + +def open_file_from_option( filename, mode = 'rb' ): + if filename: + return open( filename, mode = mode ) + return None + +def __main__(): + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-p', '--pass_through', dest='pass_through_options', action='append', type="string", help='These options are passed through directly to PICARD, without any modification.' ) + parser.add_option( '-1', '--read_group_file_1', dest='read_group_file_1', action='store', type="string", default=None, help='Read Group 1 output file, when using multiple readgroups' ) + parser.add_option( '-2', '--read_group_file_2', dest='read_group_file_2', action='store', type="string", default=None, help='Read Group 2 output file, when using multiple readgroups and paired end' ) + parser.add_option( '', '--stdout', dest='stdout', action='store', type="string", default=None, help='If specified, the output of stdout will be written to this file.' ) + parser.add_option( '', '--stderr', dest='stderr', action='store', type="string", default=None, help='If specified, the output of stderr will be written to this file.' ) + parser.add_option( '-n', '--new_files_path', dest='new_files_path', action='store', type="string", default=None, help='new_files_path') + parser.add_option( '-i', '--file_id_1', dest='file_id_1', action='store', type="string", default=None, help='file_id_1') + parser.add_option( '-f', '--file_id_2', dest='file_id_2', action='store', type="string", default=None, help='file_id_2') + (options, args) = parser.parse_args() + + tmp_dir = tempfile.mkdtemp( prefix='tmp-picard-' ) + if options.pass_through_options: + cmd = ' '.join( options.pass_through_options ) + else: + cmd = '' + if options.new_files_path is not None: + print 'Creating FASTQ files by Read Group' + assert None not in [ options.read_group_file_1, options.new_files_path, options.file_id_1 ], 'When using read group aware, you need to specify --read_group_file_1, --read_group_file_2 (when paired end), --new_files_path, and --file_id' + cmd = '%s OUTPUT_DIR="%s"' % ( cmd, tmp_dir) + #set up stdout and stderr output options + stdout = open_file_from_option( options.stdout, mode = 'wb' ) + if stdout is None: + stdout = sys.stdout + stderr = open_file_from_option( options.stderr, mode = 'wb' ) + #if no stderr file is specified, we'll use our own + if stderr is None: + stderr = tempfile.NamedTemporaryFile( prefix="picard-stderr-", dir=tmp_dir ) + + proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) + return_code = proc.wait() + + if return_code: + stderr_target = sys.stderr + else: + stderr_target = sys.stdout + stderr.flush() + stderr.seek(0) + while True: + chunk = stderr.read( CHUNK_SIZE ) + if chunk: + stderr_target.write( chunk ) + else: + break + stderr.close() + #if rg aware, put files where they belong + if options.new_files_path is not None: + fastq_1_name = options.read_group_file_1 + fastq_2_name = options.read_group_file_2 + file_id_1 = options.file_id_1 + file_id_2 = options.file_id_2 + if file_id_2 is None: + file_id_2 = file_id_1 + for filename in sorted( os.listdir( tmp_dir ) ): + if filename.endswith( '_1.fastq' ): + if fastq_1_name: + shutil.move( os.path.join( tmp_dir, filename ), fastq_1_name ) + fastq_1_name = None + else: + shutil.move( os.path.join( tmp_dir, filename ), os.path.join( options.new_files_path, 'primary_%s_%s - 1_visible_fastqsanger' % ( file_id_1, filename[:-len( '_1.fastq' )] ) ) ) + elif filename.endswith( '_2.fastq' ): + if fastq_2_name: + shutil.move( os.path.join( tmp_dir, filename ), fastq_2_name ) + fastq_2_name = None + else: + shutil.move( os.path.join( tmp_dir, filename ), os.path.join( options.new_files_path, 'primary_%s_%s - 2_visible_fastqsanger' % ( file_id_2, filename[:-len( '_2.fastq' )] ) ) ) + + cleanup_before_exit( tmp_dir ) + +if __name__=="__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/picard_wrapper.py Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,776 @@ +#!/usr/bin/env python +""" +Originally written by Kelly Vincent +pretty output and additional picard wrappers by Ross Lazarus for rgenetics +Runs all available wrapped Picard tools. +usage: picard_wrapper.py [options] +code Ross wrote licensed under the LGPL +see http://www.gnu.org/copyleft/lesser.html +""" + +import optparse, os, sys, subprocess, tempfile, shutil, time, logging + +galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy %s tool output - see http://getgalaxy.org/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +""" +galhtmlattr = """Galaxy tool %s run at %s</b><br/>""" +galhtmlpostfix = """</div></body></html>\n""" + + +def stop_err( msg ): + sys.stderr.write( '%s\n' % msg ) + sys.exit() + + +def timenow(): + """return current time as a string + """ + return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) + + +class PicardBase(): + """ + simple base class with some utilities for Picard + adapted and merged with Kelly Vincent's code april 2011 Ross + lots of changes... + """ + + def __init__(self, opts=None,arg0=None): + """ common stuff needed at init for a picard tool + """ + assert opts <> None, 'PicardBase needs opts at init' + self.opts = opts + if self.opts.outdir == None: + self.opts.outdir = os.getcwd() # fixmate has no html file eg so use temp dir + assert self.opts.outdir <> None,'## PicardBase needs a temp directory if no output directory passed in' + self.picname = self.baseName(opts.jar) + if self.picname.startswith('picard'): + self.picname = opts.picard_cmd # special case for some tools like replaceheader? + self.progname = self.baseName(arg0) + self.version = '0.002' + self.delme = [] # list of files to destroy + self.title = opts.title + self.inputfile = opts.input + try: + os.makedirs(opts.outdir) + except: + pass + try: + os.makedirs(opts.tmpdir) + except: + pass + self.log_filename = os.path.join(self.opts.outdir,'%s.log' % self.picname) + self.metricsOut = os.path.join(opts.outdir,'%s.metrics.txt' % self.picname) + self.setLogging(logfname=self.log_filename) + + def baseName(self,name=None): + return os.path.splitext(os.path.basename(name))[0] + + def setLogging(self,logfname="picard_wrapper.log"): + """setup a logger + """ + logging.basicConfig(level=logging.INFO, + filename=logfname, + filemode='a') + + + def readLarge(self,fname=None): + """ read a potentially huge file. + """ + try: + # get stderr, allowing for case where it's very large + tmp = open( fname, 'rb' ) + s = '' + buffsize = 1048576 + try: + while True: + more = tmp.read( buffsize ) + if len(more) > 0: + s += more + else: + break + except OverflowError: + pass + tmp.close() + except Exception, e: + stop_err( 'Read Large Exception : %s' % str( e ) ) + return s + + def runCL(self,cl=None,output_dir=None): + """ construct and run a command line + we have galaxy's temp path as opt.temp_dir so don't really need isolation + sometimes stdout is needed as the output - ugly hacks to deal with potentially vast artifacts + """ + assert cl <> None, 'PicardBase runCL needs a command line as cl' + if output_dir == None: + output_dir = self.opts.outdir + if type(cl) == type([]): + cl = ' '.join(cl) + fd,templog = tempfile.mkstemp(dir=output_dir,suffix='rgtempRun.txt') + tlf = open(templog,'wb') + fd,temperr = tempfile.mkstemp(dir=output_dir,suffix='rgtempErr.txt') + tef = open(temperr,'wb') + process = subprocess.Popen(cl, shell=True, stderr=tef, stdout=tlf, cwd=output_dir) + rval = process.wait() + tlf.close() + tef.close() + stderrs = self.readLarge(temperr) + stdouts = self.readLarge(templog) + if rval > 0: + s = '## executing %s returned status %d and stderr: \n%s\n' % (cl,rval,stderrs) + stdouts = '%s\n%s' % (stdouts,stderrs) + else: + s = '## executing %s returned status %d and nothing on stderr\n' % (cl,rval) + logging.info(s) + os.unlink(templog) # always + os.unlink(temperr) # always + return s, stdouts, rval # sometimes s is an output + + def runPic(self, jar, cl): + """ + cl should be everything after the jar file name in the command + """ + runme = ['java -Xmx%s' % self.opts.maxjheap] + runme.append(" -Djava.io.tmpdir='%s' " % self.opts.tmpdir) + runme.append('-jar %s' % jar) + runme += cl + s,stdouts,rval = self.runCL(cl=runme, output_dir=self.opts.outdir) + return stdouts,rval + + def samToBam(self,infile=None,outdir=None): + """ + use samtools view to convert sam to bam + """ + fd,tempbam = tempfile.mkstemp(dir=outdir,suffix='rgutilsTemp.bam') + cl = ['samtools view -h -b -S -o ',tempbam,infile] + tlog,stdouts,rval = self.runCL(cl,outdir) + return tlog,tempbam,rval + + def sortSam(self, infile=None,outfile=None,outdir=None): + """ + """ + print '## sortSam got infile=%s,outfile=%s,outdir=%s' % (infile,outfile,outdir) + cl = ['samtools sort',infile,outfile] + tlog,stdouts,rval = self.runCL(cl,outdir) + return tlog + + def cleanup(self): + for fname in self.delme: + try: + os.unlink(fname) + except: + pass + + def prettyPicout(self,transpose,maxrows): + """organize picard outpouts into a report html page + """ + res = [] + try: + r = open(self.metricsOut,'r').readlines() + except: + r = [] + if len(r) > 0: + res.append('<b>Picard on line resources</b><ul>\n') + res.append('<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li>\n') + res.append('<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/>\n') + if transpose: + res.append('<b>Picard output (transposed to make it easier to see)</b><hr/>\n') + else: + res.append('<b>Picard output</b><hr/>\n') + res.append('<table cellpadding="3" >\n') + dat = [] + heads = [] + lastr = len(r) - 1 + # special case for estimate library complexity hist + thist = False + for i,row in enumerate(r): + if row.strip() > '': + srow = row.split('\t') + if row.startswith('#'): + heads.append(row.strip()) # want strings + else: + dat.append(srow) # want lists + if row.startswith('## HISTOGRAM'): + thist = True + if len(heads) > 0: + hres = ['<tr class="d%d"><td colspan="2">%s</td></tr>' % (i % 2,x) for i,x in enumerate(heads)] + res += hres + heads = [] + if len(dat) > 0: + if transpose and not thist: + tdat = map(None,*dat) # transpose an arbitrary list of lists + tdat = ['<tr class="d%d"><td>%s</td><td>%s </td></tr>\n' % ((i+len(heads)) % 2,x[0],x[1]) for i,x in enumerate(tdat)] + else: + tdat = ['\t'.join(x).strip() for x in dat] # back to strings :( + tdat = ['<tr class="d%d"><td colspan="2">%s</td></tr>\n' % ((i+len(heads)) % 2,x) for i,x in enumerate(tdat)] + res += tdat + dat = [] + res.append('</table>\n') + return res + + def fixPicardOutputs(self,transpose,maxloglines): + """ + picard produces long hard to read tab header files + make them available but present them transposed for readability + """ + logging.shutdown() + self.cleanup() # remove temp files stored in delme + rstyle="""<style type="text/css"> + tr.d0 td {background-color: oldlace; color: black;} + tr.d1 td {background-color: aliceblue; color: black;} + </style>""" + res = [rstyle,] + res.append(galhtmlprefix % self.progname) + res.append(galhtmlattr % (self.picname,timenow())) + flist = [x for x in os.listdir(self.opts.outdir) if not x.startswith('.')] + pdflist = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'] + if len(pdflist) > 0: # assumes all pdfs come with thumbnail .jpgs + for p in pdflist: + pbase = os.path.splitext(p)[0] # removes .pdf + imghref = '%s.jpg' % pbase + mimghref = '%s-0.jpg' % pbase # multiple pages pdf -> multiple thumbnails without asking! + if mimghref in flist: + imghref=mimghref # only one for thumbnail...it's a multi page pdf + res.append('<table cellpadding="10"><tr><td>\n') + res.append('<a href="%s"><img src="%s" title="Click image preview for a print quality PDF version" hspace="10" align="middle"></a>\n' % (p,imghref)) + res.append('</tr></td></table>\n') + if len(flist) > 0: + res.append('<b>The following output files were created (click the filename to view/download a copy):</b><hr/>') + res.append('<table>\n') + for i,f in enumerate(flist): + fn = os.path.split(f)[-1] + res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fn)) + res.append('</table><p/>\n') + pres = self.prettyPicout(transpose,maxloglines) + if len(pres) > 0: + res += pres + l = open(self.log_filename,'r').readlines() + llen = len(l) + if llen > 0: + res.append('<b>Picard Tool Run Log</b><hr/>\n') + rlog = ['<pre>',] + if llen > maxloglines: + n = min(50,int(maxloglines/2)) + rlog += l[:n] + rlog.append('------------ ## %d rows deleted ## --------------\n' % (llen-maxloglines)) + rlog += l[-n:] + else: + rlog += l + rlog.append('</pre>') + if llen > maxloglines: + rlog.append('\n<b>## WARNING - %d log lines truncated - <a href="%s">%s</a> contains entire output</b>' % (llen - maxloglines,self.log_filename,self.log_filename)) + res += rlog + else: + res.append("### Odd, Picard left no log file %s - must have really barfed badly?\n" % self.log_filename) + res.append('<hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n') + res.append( 'generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool') + res.append(galhtmlpostfix) + outf = open(self.opts.htmlout,'w') + outf.write(''.join(res)) + outf.write('\n') + outf.close() + + def makePicInterval(self,inbed=None,outf=None): + """ + picard wants bait and target files to have the same header length as the incoming bam/sam + a meaningful (ie accurate) representation will fail because of this - so this hack + it would be far better to be able to supply the original bed untouched + Additional checking added Ross Lazarus Dec 2011 to deal with two 'bug' reports on the list + """ + assert inbed <> None + bed = open(inbed,'r').readlines() + sbed = [x.split('\t') for x in bed] # lengths MUST be 5 + lens = [len(x) for x in sbed] + strands = [x[3] for x in sbed if not x[3] in ['+','-']] + maxl = max(lens) + minl = min(lens) + e = [] + if maxl <> minl: + e.append("## Input error: Inconsistent field count in %s - please read the documentation on bait/target format requirements, fix and try again" % inbed) + if maxl <> 5: + e.append("## Input error: %d fields found in %s, 5 required - please read the warning and documentation on bait/target format requirements, fix and try again" % (maxl,inbed)) + if len(strands) > 0: + e.append("## Input error: Fourth column in %s is not the required strand (+ or -) - please read the warning and documentation on bait/target format requirements, fix and try again" % (inbed)) + if len(e) > 0: # write to stderr and quit + print >> sys.stderr, '\n'.join(e) + sys.exit(1) + thead = os.path.join(self.opts.outdir,'tempSamHead.txt') + if self.opts.datatype == 'sam': + cl = ['samtools view -H -S',self.opts.input,'>',thead] + else: + cl = ['samtools view -H',self.opts.input,'>',thead] + self.runCL(cl=cl,output_dir=self.opts.outdir) + head = open(thead,'r').readlines() + s = '## got %d rows of header\n' % (len(head)) + logging.info(s) + o = open(outf,'w') + o.write(''.join(head)) + o.write(''.join(bed)) + o.close() + return outf + + def cleanSam(self, insam=None, newsam=None, picardErrors=[],outformat=None): + """ + interesting problem - if paired, must remove mate pair of errors too or we have a new set of errors after cleaning - missing mate pairs! + Do the work of removing all the error sequences + pysam is cool + infile = pysam.Samfile( "-", "r" ) + outfile = pysam.Samfile( "-", "w", template = infile ) + for s in infile: outfile.write(s) + + errors from ValidateSameFile.jar look like + WARNING: Record 32, Read name SRR006041.1202260, NM tag (nucleotide differences) is missing + ERROR: Record 33, Read name SRR006041.1042721, Empty sequence dictionary. + ERROR: Record 33, Read name SRR006041.1042721, RG ID on SAMRecord not found in header: SRR006041 + + """ + assert os.path.isfile(insam), 'rgPicardValidate cleansam needs an input sam file - cannot find %s' % insam + assert newsam <> None, 'rgPicardValidate cleansam needs an output new sam file path' + removeNames = [x.split(',')[1].replace(' Read name ','') for x in picardErrors if len(x.split(',')) > 2] + remDict = dict(zip(removeNames,range(len(removeNames)))) + infile = pysam.Samfile(insam,'rb') + info = 'found %d error sequences in picardErrors, %d unique' % (len(removeNames),len(remDict)) + if len(removeNames) > 0: + outfile = pysam.Samfile(newsam,'wb',template=infile) # template must be an open file + i = 0 + j = 0 + for row in infile: + dropme = remDict.get(row.qname,None) # keep if None + if not dropme: + outfile.write(row) + j += 1 + else: # discard + i += 1 + info = '%s\n%s' % (info, 'Discarded %d lines writing %d to %s from %s' % (i,j,newsam,insam)) + outfile.close() + infile.close() + else: # we really want a nullop or a simple pointer copy + infile.close() + if newsam: + shutil.copy(insam,newsam) + logging.info(info) + + + +def __main__(): + doFix = False # tools returning htmlfile don't need this + doTranspose = True # default + maxloglines = 100 # default + #Parse Command Line + op = optparse.OptionParser() + # All tools + op.add_option('-i', '--input', dest='input', help='Input SAM or BAM file' ) + op.add_option('-e', '--inputext', default=None) + op.add_option('-o', '--output', default=None) + op.add_option('-n', '--title', default="Pick a Picard Tool") + op.add_option('-t', '--htmlout', default=None) + op.add_option('-d', '--outdir', default=None) + op.add_option('-x', '--maxjheap', default='4g') + op.add_option('-b', '--bisulphite', default='false') + op.add_option('-s', '--sortorder', default='query') + op.add_option('','--tmpdir', default='/tmp') + op.add_option('-j','--jar',default='') + op.add_option('','--picard-cmd',default=None) + # Many tools + op.add_option( '', '--output-format', dest='output_format', help='Output format' ) + op.add_option( '', '--bai-file', dest='bai_file', help='The path to the index file for the input bam file' ) + op.add_option( '', '--ref', dest='ref', help='Built-in reference with fasta and dict file', default=None ) + # CreateSequenceDictionary + op.add_option( '', '--ref-file', dest='ref_file', help='Fasta to use as reference', default=None ) + op.add_option( '', '--species-name', dest='species_name', help='Species name to use in creating dict file from fasta file' ) + op.add_option( '', '--build-name', dest='build_name', help='Name of genome assembly to use in creating dict file from fasta file' ) + op.add_option( '', '--trunc-names', dest='trunc_names', help='Truncate sequence names at first whitespace from fasta file' ) + # MarkDuplicates + op.add_option( '', '--remdups', default='true', help='Remove duplicates from output file' ) + op.add_option( '', '--optdupdist', default="100", help='Maximum pixels between two identical sequences in order to consider them optical duplicates.' ) + # CollectInsertSizeMetrics + op.add_option('', '--taillimit', default="0") + op.add_option('', '--histwidth', default="0") + op.add_option('', '--minpct', default="0.01") + op.add_option('', '--malevel', default='') + op.add_option('', '--deviations', default="0.0") + # CollectAlignmentSummaryMetrics + op.add_option('', '--maxinsert', default="20") + op.add_option('', '--adaptors', default='') + # FixMateInformation and validate + # CollectGcBiasMetrics + op.add_option('', '--windowsize', default='100') + op.add_option('', '--mingenomefrac', default='0.00001') + # AddOrReplaceReadGroups + op.add_option( '', '--rg-opts', dest='rg_opts', help='Specify extra (optional) arguments with full, otherwise preSet' ) + op.add_option( '', '--rg-lb', dest='rg_library', help='Read Group Library' ) + op.add_option( '', '--rg-pl', dest='rg_platform', help='Read Group platform (e.g. illumina, solid)' ) + op.add_option( '', '--rg-pu', dest='rg_plat_unit', help='Read Group platform unit (eg. run barcode) ' ) + op.add_option( '', '--rg-sm', dest='rg_sample', help='Read Group sample name' ) + op.add_option( '', '--rg-id', dest='rg_id', help='Read Group ID' ) + op.add_option( '', '--rg-cn', dest='rg_seq_center', help='Read Group sequencing center name' ) + op.add_option( '', '--rg-ds', dest='rg_desc', help='Read Group description' ) + # ReorderSam + op.add_option( '', '--allow-inc-dict-concord', dest='allow_inc_dict_concord', help='Allow incomplete dict concordance' ) + op.add_option( '', '--allow-contig-len-discord', dest='allow_contig_len_discord', help='Allow contig length discordance' ) + # ReplaceSamHeader + op.add_option( '', '--header-file', dest='header_file', help='sam or bam file from which header will be read' ) + + op.add_option('','--assumesorted', default='true') + op.add_option('','--readregex', default="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*") + #estimatelibrarycomplexity + op.add_option('','--minid', default="5") + op.add_option('','--maxdiff', default="0.03") + op.add_option('','--minmeanq', default="20") + #hsmetrics + op.add_option('','--baitbed', default=None) + op.add_option('','--targetbed', default=None) + #validate + op.add_option('','--ignoreflags', action='append', type="string") + op.add_option('','--maxerrors', default=None) + op.add_option('','--datatype', default=None) + op.add_option('','--bamout', default=None) + op.add_option('','--samout', default=None) + + opts, args = op.parse_args() + opts.sortme = opts.assumesorted == 'false' + assert opts.input <> None + # need to add + # instance that does all the work + pic = PicardBase(opts,sys.argv[0]) + + tmp_dir = opts.outdir + haveTempout = False # we use this where sam output is an option + rval = 0 + stdouts = 'Not run yet' + # set ref and dict files to use (create if necessary) + ref_file_name = opts.ref + if opts.ref_file <> None: + csd = 'CreateSequenceDictionary' + realjarpath = os.path.split(opts.jar)[0] + jarpath = os.path.join(realjarpath,'%s.jar' % csd) # for refseq + tmp_ref_fd, tmp_ref_name = tempfile.mkstemp( dir=opts.tmpdir , prefix = pic.picname) + ref_file_name = '%s.fasta' % tmp_ref_name + # build dict + dict_file_name = '%s.dict' % tmp_ref_name + os.symlink( opts.ref_file, ref_file_name ) + cl = ['REFERENCE=%s' % ref_file_name] + cl.append('OUTPUT=%s' % dict_file_name) + cl.append('URI=%s' % os.path.basename( opts.ref_file )) + cl.append('TRUNCATE_NAMES_AT_WHITESPACE=%s' % opts.trunc_names) + if opts.species_name: + cl.append('SPECIES=%s' % opts.species_name) + if opts.build_name: + cl.append('GENOME_ASSEMBLY=%s' % opts.build_name) + pic.delme.append(dict_file_name) + pic.delme.append(ref_file_name) + pic.delme.append(tmp_ref_name) + stdouts,rval = pic.runPic(jarpath, cl) + # run relevant command(s) + + # define temporary output + # if output is sam, it must have that extension, otherwise bam will be produced + # specify sam or bam file with extension + if opts.output_format == 'sam': + suff = '.sam' + else: + suff = '' + tmp_fd, tempout = tempfile.mkstemp( dir=opts.tmpdir, suffix=suff ) + + cl = ['VALIDATION_STRINGENCY=LENIENT',] + + if pic.picname == 'AddOrReplaceReadGroups': + # sort order to match Galaxy's default + cl.append('SORT_ORDER=coordinate') + # input + cl.append('INPUT=%s' % opts.input) + # outputs + cl.append('OUTPUT=%s' % tempout) + # required read groups + cl.append('RGLB="%s"' % opts.rg_library) + cl.append('RGPL="%s"' % opts.rg_platform) + cl.append('RGPU="%s"' % opts.rg_plat_unit) + cl.append('RGSM="%s"' % opts.rg_sample) + if opts.rg_id: + cl.append('RGID="%s"' % opts.rg_id) + # optional read groups + if opts.rg_seq_center: + cl.append('RGCN="%s"' % opts.rg_seq_center) + if opts.rg_desc: + cl.append('RGDS="%s"' % opts.rg_desc) + stdouts,rval = pic.runPic(opts.jar, cl) + haveTempout = True + + elif pic.picname == 'BamIndexStats': + tmp_fd, tmp_name = tempfile.mkstemp( dir=tmp_dir ) + tmp_bam_name = '%s.bam' % tmp_name + tmp_bai_name = '%s.bai' % tmp_bam_name + os.symlink( opts.input, tmp_bam_name ) + os.symlink( opts.bai_file, tmp_bai_name ) + cl.append('INPUT=%s' % ( tmp_bam_name )) + pic.delme.append(tmp_bam_name) + pic.delme.append(tmp_bai_name) + pic.delme.append(tmp_name) + stdouts,rval = pic.runPic( opts.jar, cl ) + f = open(pic.metricsOut,'a') + f.write(stdouts) # got this on stdout from runCl + f.write('\n') + f.close() + doTranspose = False # but not transposed + + elif pic.picname == 'EstimateLibraryComplexity': + cl.append('I=%s' % opts.input) + cl.append('O=%s' % pic.metricsOut) + if float(opts.minid) > 0: + cl.append('MIN_IDENTICAL_BASES=%s' % opts.minid) + if float(opts.maxdiff) > 0.0: + cl.append('MAX_DIFF_RATE=%s' % opts.maxdiff) + if float(opts.minmeanq) > 0: + cl.append('MIN_MEAN_QUALITY=%s' % opts.minmeanq) + if opts.readregex > '': + cl.append('READ_NAME_REGEX="%s"' % opts.readregex) + if float(opts.optdupdist) > 0: + cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist) + stdouts,rval = pic.runPic(opts.jar, cl) + + elif pic.picname == 'CollectAlignmentSummaryMetrics': + # Why do we do this fakefasta thing? + # Because we need NO fai to be available or picard barfs unless it matches the input data. + # why? Dunno Seems to work without complaining if the .bai file is AWOL.... + fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name)) + try: + os.symlink(ref_file_name,fakefasta) + except: + s = '## unable to symlink %s to %s - different devices? Will shutil.copy' + info = s + shutil.copy(ref_file_name,fakefasta) + pic.delme.append(fakefasta) + cl.append('ASSUME_SORTED=true') + adaptlist = opts.adaptors.split(',') + adaptorseqs = ['ADAPTER_SEQUENCE=%s' % x for x in adaptlist] + cl += adaptorseqs + cl.append('IS_BISULFITE_SEQUENCED=%s' % opts.bisulphite) + cl.append('MAX_INSERT_SIZE=%s' % opts.maxinsert) + cl.append('OUTPUT=%s' % pic.metricsOut) + cl.append('R=%s' % fakefasta) + cl.append('TMP_DIR=%s' % opts.tmpdir) + if not opts.assumesorted.lower() == 'true': # we need to sort input + sortedfile = '%s.sorted' % os.path.basename(opts.input) + if opts.datatype == 'sam': # need to work with a bam + tlog,tempbam,trval = pic.samToBam(opts.input,opts.outdir) + pic.delme.append(tempbam) + try: + tlog = pic.sortSam(tempbam,sortedfile,opts.outdir) + except: + print '## exception on sorting sam file %s' % opts.input + else: # is already bam + try: + tlog = pic.sortSam(opts.input,sortedfile,opts.outdir) + except : # bug - [bam_sort_core] not being ignored - TODO fixme + print '## exception %s on sorting bam file %s' % (sys.exc_info()[0],opts.input) + cl.append('INPUT=%s.bam' % os.path.abspath(os.path.join(opts.outdir,sortedfile))) + pic.delme.append(os.path.join(opts.outdir,sortedfile)) + else: + cl.append('INPUT=%s' % os.path.abspath(opts.input)) + stdouts,rval = pic.runPic(opts.jar, cl) + + + elif pic.picname == 'CollectGcBiasMetrics': + assert os.path.isfile(ref_file_name),'PicardGC needs a reference sequence - cannot read %s' % ref_file_name + # sigh. Why do we do this fakefasta thing? Because we need NO fai to be available or picard barfs unless it has the same length as the input data. + # why? Dunno + fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name)) + try: + os.symlink(ref_file_name,fakefasta) + except: + s = '## unable to symlink %s to %s - different devices? May need to replace with shutil.copy' + info = s + shutil.copy(ref_file_name,fakefasta) + pic.delme.append(fakefasta) + x = 'rgPicardGCBiasMetrics' + pdfname = '%s.pdf' % x + jpgname = '%s.jpg' % x + tempout = os.path.join(opts.outdir,'rgPicardGCBiasMetrics.out') + temppdf = os.path.join(opts.outdir,pdfname) + cl.append('R=%s' % fakefasta) + cl.append('WINDOW_SIZE=%s' % opts.windowsize) + cl.append('MINIMUM_GENOME_FRACTION=%s' % opts.mingenomefrac) + cl.append('INPUT=%s' % opts.input) + cl.append('OUTPUT=%s' % tempout) + cl.append('TMP_DIR=%s' % opts.tmpdir) + cl.append('CHART_OUTPUT=%s' % temppdf) + cl.append('SUMMARY_OUTPUT=%s' % pic.metricsOut) + stdouts,rval = pic.runPic(opts.jar, cl) + if os.path.isfile(temppdf): + cl2 = ['convert','-resize x400',temppdf,os.path.join(opts.outdir,jpgname)] # make the jpg for fixPicardOutputs to find + s,stdouts,rval = pic.runCL(cl=cl2,output_dir=opts.outdir) + else: + s='### runGC: Unable to find pdf %s - please check the log for the causal problem\n' % temppdf + lf = open(pic.log_filename,'a') + lf.write(s) + lf.write('\n') + lf.close() + + elif pic.picname == 'CollectInsertSizeMetrics': + """ <command interpreter="python"> + picard_wrapper.py -i "$input_file" -n "$out_prefix" --tmpdir "${__new_file_path__}" --deviations "$deviations" + --histwidth "$histWidth" --minpct "$minPct" --malevel "$malevel" + -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/picard/CollectInsertSizeMetrics.jar" -d "$html_file.files_path" -t "$html_file" + </command> + """ + isPDF = 'InsertSizeHist.pdf' + pdfpath = os.path.join(opts.outdir,isPDF) + histpdf = 'InsertSizeHist.pdf' + cl.append('I=%s' % opts.input) + cl.append('O=%s' % pic.metricsOut) + cl.append('HISTOGRAM_FILE=%s' % histpdf) + #if opts.taillimit <> '0': # this was deprecated although still mentioned in the docs at 1.56 + # cl.append('TAIL_LIMIT=%s' % opts.taillimit) + if opts.histwidth <> '0': + cl.append('HISTOGRAM_WIDTH=%s' % opts.histwidth) + if float( opts.minpct) > 0.0: + cl.append('MINIMUM_PCT=%s' % opts.minpct) + if float(opts.deviations) > 0.0: + cl.append('DEVIATIONS=%s' % opts.deviations) + if opts.malevel: + malists = opts.malevel.split(',') + malist = ['METRIC_ACCUMULATION_LEVEL=%s' % x for x in malists] + cl += malist + stdouts,rval = pic.runPic(opts.jar, cl) + if os.path.exists(pdfpath): # automake thumbnail - will be added to html + cl2 = ['mogrify', '-format jpg -resize x400 %s' % pdfpath] + pic.runCL(cl=cl2,output_dir=opts.outdir) + else: + s = 'Unable to find expected pdf file %s<br/>\n' % pdfpath + s += 'This <b>always happens if single ended data was provided</b> to this tool,\n' + s += 'so please double check that your input data really is paired-end NGS data.<br/>\n' + s += 'If your input was paired data this may be a bug worth reporting to the galaxy-bugs list\n<br/>' + logging.info(s) + if len(stdouts) > 0: + logging.info(stdouts) + + elif pic.picname == 'MarkDuplicates': + # assume sorted even if header says otherwise + cl.append('ASSUME_SORTED=%s' % (opts.assumesorted)) + # input + cl.append('INPUT=%s' % opts.input) + # outputs + cl.append('OUTPUT=%s' % opts.output) + cl.append('METRICS_FILE=%s' % pic.metricsOut ) + # remove or mark duplicates + cl.append('REMOVE_DUPLICATES=%s' % opts.remdups) + # the regular expression to be used to parse reads in incoming SAM file + cl.append('READ_NAME_REGEX="%s"' % opts.readregex) + # maximum offset between two duplicate clusters + cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist) + stdouts,rval = pic.runPic(opts.jar, cl) + + elif pic.picname == 'FixMateInformation': + cl.append('I=%s' % opts.input) + cl.append('O=%s' % tempout) + cl.append('SORT_ORDER=%s' % opts.sortorder) + stdouts,rval = pic.runPic(opts.jar,cl) + haveTempout = True + + elif pic.picname == 'ReorderSam': + # input + cl.append('INPUT=%s' % opts.input) + # output + cl.append('OUTPUT=%s' % tempout) + # reference + cl.append('REFERENCE=%s' % ref_file_name) + # incomplete dict concordance + if opts.allow_inc_dict_concord == 'true': + cl.append('ALLOW_INCOMPLETE_DICT_CONCORDANCE=true') + # contig length discordance + if opts.allow_contig_len_discord == 'true': + cl.append('ALLOW_CONTIG_LENGTH_DISCORDANCE=true') + stdouts,rval = pic.runPic(opts.jar, cl) + haveTempout = True + + elif pic.picname == 'ReplaceSamHeader': + cl.append('INPUT=%s' % opts.input) + cl.append('OUTPUT=%s' % tempout) + cl.append('HEADER=%s' % opts.header_file) + stdouts,rval = pic.runPic(opts.jar, cl) + haveTempout = True + + elif pic.picname == 'CalculateHsMetrics': + maxloglines = 100 + baitfname = os.path.join(opts.outdir,'rgPicardHsMetrics.bait') + targetfname = os.path.join(opts.outdir,'rgPicardHsMetrics.target') + baitf = pic.makePicInterval(opts.baitbed,baitfname) + if opts.targetbed == opts.baitbed: # same file sometimes + targetf = baitf + else: + targetf = pic.makePicInterval(opts.targetbed,targetfname) + cl.append('BAIT_INTERVALS=%s' % baitf) + cl.append('TARGET_INTERVALS=%s' % targetf) + cl.append('INPUT=%s' % os.path.abspath(opts.input)) + cl.append('OUTPUT=%s' % pic.metricsOut) + cl.append('TMP_DIR=%s' % opts.tmpdir) + stdouts,rval = pic.runPic(opts.jar,cl) + + elif pic.picname == 'ValidateSamFile': + import pysam + doTranspose = False + sortedfile = os.path.join(opts.outdir,'rgValidate.sorted') + stf = open(pic.log_filename,'w') + tlog = None + if opts.datatype == 'sam': # need to work with a bam + tlog,tempbam,rval = pic.samToBam(opts.input,opts.outdir) + try: + tlog = pic.sortSam(tempbam,sortedfile,opts.outdir) + except: + print '## exception on sorting sam file %s' % opts.input + else: # is already bam + try: + tlog = pic.sortSam(opts.input,sortedfile,opts.outdir) + except: # bug - [bam_sort_core] not being ignored - TODO fixme + print '## exception on sorting bam file %s' % opts.input + if tlog: + print '##tlog=',tlog + stf.write(tlog) + stf.write('\n') + sortedfile = '%s.bam' % sortedfile # samtools does that + cl.append('O=%s' % pic.metricsOut) + cl.append('TMP_DIR=%s' % opts.tmpdir) + cl.append('I=%s' % sortedfile) + opts.maxerrors = '99999999' + cl.append('MAX_OUTPUT=%s' % opts.maxerrors) + if opts.ignoreflags[0] <> 'None': # picard error values to ignore + igs = ['IGNORE=%s' % x for x in opts.ignoreflags if x <> 'None'] + cl.append(' '.join(igs)) + if opts.bisulphite.lower() <> 'false': + cl.append('IS_BISULFITE_SEQUENCED=true') + if opts.ref <> None or opts.ref_file <> None: + cl.append('R=%s' % ref_file_name) + stdouts,rval = pic.runPic(opts.jar,cl) + if opts.datatype == 'sam': + pic.delme.append(tempbam) + newsam = opts.output + outformat = 'bam' + pe = open(pic.metricsOut,'r').readlines() + pic.cleanSam(insam=sortedfile, newsam=newsam, picardErrors=pe,outformat=outformat) + pic.delme.append(sortedfile) # not wanted + stf.close() + pic.cleanup() + else: + print >> sys.stderr,'picard.py got an unknown tool name - %s' % pic.picname + sys.exit(1) + if haveTempout: + # Some Picard tools produced a potentially intermediate bam file. + # Either just move to final location or create sam + if os.path.exists(tempout): + shutil.move(tempout, os.path.abspath(opts.output)) + if opts.htmlout <> None or doFix: # return a pretty html page + pic.fixPicardOutputs(transpose=doTranspose,maxloglines=maxloglines) + if rval <> 0: + print >> sys.stderr, '## exit code=%d; stdout=%s' % (rval,stdouts) + # signal failure +if __name__=="__main__": __main__() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgPicardASMetrics.xml Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,162 @@ +<tool name="SAM/BAM Alignment Summary Metrics" id="PicardASMetrics" version="1.56.0"> + <command interpreter="python"> + picard_wrapper.py -i "${input_file}" -d "${html_file.files_path}" -t "${html_file}" + --assumesorted "${sorted}" -b "${bisulphite}" --adaptors "${adaptors}" --maxinsert "${maxinsert}" -n "${out_prefix}" --datatype "${input_file.ext}" + -j \$JAVA_JAR_PATH/CollectAlignmentSummaryMetrics.jar --tmpdir "${__new_file_path__}" +#if $genomeSource.refGenomeSource == "history": + --ref-file "${genomeSource.ownFile}" +#else + --ref "${genomeSource.index.fields.path}" +#end if + </command> + <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> + <inputs> + <param format="sam,bam" name="input_file" type="data" label="SAM/BAM dataset to generate statistics for" + help="If empty, upload or import a SAM/BAM dataset."/> + <param name="out_prefix" value="Picard Alignment Summary Metrics" type="text" + label="Title for the output file" help="Use this remind you what the job was for." size="80" /> + + <conditional name="genomeSource"> + + <param name="refGenomeSource" type="select" label="Select Reference Genome"> + <option value="default" selected="true">Use the assigned data genome/build</option> + <option value="indexed">Select a different built-in genome</option> + <option value="history">Use a genome (fasta format) from my history</option> + </param> + <when value="default"> + <param name="index" type="select" label="Check the assigned reference genome" help="Galaxy thinks that the reads in you dataset were aligned against this reference. If this is not correct, use the 'Select a build-in reference genome' option of the 'Select Reference Genome' dropdown to select approprtiate Reference."> + <options from_data_table="all_fasta"> + <filter type="data_meta" ref="input_file" key="dbkey" column="dbkey" multiple="True" separator="," /> + <validator type="no_options" message="No reference build available for selected input" /> + </options> + </param> + </when> + <when value="indexed"> + <param name="index" type="select" label="Select a built-in reference genome" help="This list contains genomes cached at this Galaxy instance. If your genome of interest is not present here request it by using 'Help' link at the top of Galaxy interface or use the 'Use a genome (fasta format) from my history' option of the 'Select Reference Genome' dropdown."> + <options from_data_table="all_fasta"> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome from history" help="This option works best for relatively small genomes. If you are working with large human-sized genomes, send request to Galaxy team for adding your reference to this Galaxy instance by using 'Help' link at the top of Galaxy interface."/> + </when> + </conditional> + <param name="sorted" type="boolean" label="Assume the input file is already sorted" checked="true" truevalue="true" falsevalue="false"/> + <param name="bisulphite" type="boolean" label="Input file contains Bisulphite sequenced reads" checked="false" falsevalue="false" truevalue="true" /> + <param name="adaptors" value="" type="text" area="true" label="Adapter sequences" help="One per line if multiple" size="5x120" /> + <param name="maxinsert" value="100000" type="integer" label="Larger paired end reads and inter-chromosomal pairs considered chimeric " size="20" /> + </inputs> + <outputs> + <data format="html" name="html_file" label="${out_prefix}.html" /> + </outputs> + <tests> + <test> + <param name="out_prefix" value="AsMetrics" /> + <param name="bisulphite" value="false" /> + <param name="sorted" value="true" /> + <param name="adaptors" value="" /> + <param name="maxinsert" value="100000" /> + <param name="refGenomeSource" value="history" /> + <param name="ownFile" value="picard_input_hg18.trimmed.fasta" /> + <param name="input_file" value="picard_input_tiny.sam" dbkey="hg18" /> + <output name="html_file" file="picard_output_alignment_summary_metrics.html" ftype="html" lines_diff="55"/> + </test> + <test> + <param name="out_prefix" value="AsMetricsIndexed" /> + <param name="bisulphite" value="false" /> + <param name="sorted" value="true" /> + <param name="adaptors" value="" /> + <param name="maxinsert" value="100000" /> + <param name="refGenomeSource" value="indexed" /> + <param name="index" value="hg19" /> + <param name="input_file" value="picard_input_sorted_pair.sam" dbkey="hg19" /> + <output name="html_file" file="picard_output_AsMetrics_indexed_hg18_sorted_pair.html" ftype="html" lines_diff="50"/> + </test> + </tests> + <help> + +.. class:: infomark + +**Summary** + +This Galaxy tool uses Picard to report high-level measures of alignment based on a provided sam or bam file. + +**Picard documentation** + +This is a Galaxy wrapper for CollectAlignmentSummaryMetrics, a part of the external package Picard-tools_. + + .. _Picard-tools: http://www.google.com/search?q=picard+samtools + +----- + +.. class:: infomark + +**Syntax** + +- **Input** - SAM/BAM format aligned short read data in your current history +- **Title** - the title to use for all output files from this job - use it for high level metadata +- **Reference Genome** - Galaxy (and Picard) needs to know which genomic reference was used to generate alignemnts within the input SAM/BAM dataset. Here you have three choices: + + - *Assigned data genome/build* - a genome specified for this dataset. If you your SAM/BAM dataset has an assigned reference genome it will be displayed below this dropdown. If it does not -> use one of the following two options. + - *Select a different built-in genome* - this option will list all reference genomes presently cached at this instance of Galaxy. + - *Select a reference genome from history* - alternatively you can upload your own version of reference genome into your history and use it with this option. This is however not advisable with large human-sized genomes. If your genome is large contact Galaxy team using "Help" link at the top of the interface and provide exact details on where we can download sequences you would like to use as the refenece. We will then install them as a part of locally cached genomic references. + +- **Assume Sorted** - saves sorting time - but only if true! +- **Bisulphite data** - see Picard documentation http://picard.sourceforge.net/command-line-overview.shtml#CollectAlignmentSummaryMetrics +- **Maximum acceptable insertion length** - see Picard documentation at http://picard.sourceforge.net/command-line-overview.shtml#CollectAlignmentSummaryMetrics + +----- + +.. class:: infomark + +**Inputs, outputs, and parameters** + +The Picard documentation (reformatted for Galaxy) says: + +.. csv-table:: + :header-rows: 1 + + Option,Description + "INPUT=File","SAM or BAM file Required." + "OUTPUT=File","File to write insert size metrics to Required." + "REFERENCE_SEQUENCE=File","Reference sequence file Required." + "ASSUME_SORTED=Boolean","If true (default), unsorted SAM/BAM files will be considerd coordinate sorted " + "MAX_INSERT_SIZE=Integer","Paired end reads above this insert size will be considered chimeric along with inter-chromosomal pairs. Default value: 100000." + "ADAPTER_SEQUENCE=String","This option may be specified 0 or more times. " + "IS_BISULFITE_SEQUENCED=Boolean","Whether the SAM or BAM file consists of bisulfite sequenced reads. Default value: false. " + "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created." + +The output produced by the tool has the following columns:: + + 1. CATEGORY: One of either UNPAIRED (for a fragment run), FIRST_OF_PAIR when metrics are for only the first read in a paired run, SECOND_OF_PAIR when the metrics are for only the second read in a paired run or PAIR when the metrics are aggregeted for both first and second reads in a pair. + 2. TOTAL_READS: The total number of reads including all PF and non-PF reads. When CATEGORY equals PAIR this value will be 2x the number of clusters. + 3. PF_READS: The number of PF reads where PF is defined as passing Illumina's filter. + 4. PCT_PF_READS: The percentage of reads that are PF (PF_READS / TOTAL_READS) + 5. PF_NOISE_READS: The number of PF reads that are marked as noise reads. A noise read is one which is composed entirey of A bases and/or N bases. These reads are marked as they are usually artifactual and are of no use in downstream analysis. + 6. PF_READS_ALIGNED: The number of PF reads that were aligned to the reference sequence. This includes reads that aligned with low quality (i.e. their alignments are ambiguous). + 7. PCT_PF_READS_ALIGNED: The percentage of PF reads that aligned to the reference sequence. PF_READS_ALIGNED / PF_READS + 8. PF_HQ_ALIGNED_READS: The number of PF reads that were aligned to the reference sequence with a mapping quality of Q20 or higher signifying that the aligner estimates a 1/100 (or smaller) chance that the alignment is wrong. + 9. PF_HQ_ALIGNED_BASES: The number of bases aligned to the reference sequence in reads that were mapped at high quality. Will usually approximate PF_HQ_ALIGNED_READS * READ_LENGTH but may differ when either mixed read lengths are present or many reads are aligned with gaps. + 10. PF_HQ_ALIGNED_Q20_BASES: The subest of PF_HQ_ALIGNED_BASES where the base call quality was Q20 or higher. + 11. PF_HQ_MEDIAN_MISMATCHES: The median number of mismatches versus the reference sequence in reads that were aligned to the reference at high quality (i.e. PF_HQ_ALIGNED READS). + 12. PF_HQ_ERROR_RATE: The percentage of bases that mismatch the reference in PF HQ aligned reads. + 13. MEAN_READ_LENGTH: The mean read length of the set of reads examined. When looking at the data for a single lane with equal length reads this number is just the read length. When looking at data for merged lanes with differing read lengths this is the mean read length of all reads. + 14. READS_ALIGNED_IN_PAIRS: The number of aligned reads who's mate pair was also aligned to the reference. + 15. PCT_READS_ALIGNED_IN_PAIRS: The percentage of reads who's mate pair was also aligned to the reference. READS_ALIGNED_IN_PAIRS / PF_READS_ALIGNED + 16. BAD_CYCLES: The number of instrument cycles in which 80% or more of base calls were no-calls. + 17. STRAND_BALANCE: The number of PF reads aligned to the positive strand of the genome divided by the number of PF reads aligned to the genome. + 18. PCT_CHIMERAS: The percentage of reads that map outside of a maximum insert size (usually 100kb) or that have the two ends mapping to different chromosomes. + 19. PCT_ADAPTER: The percentage of PF reads that are unaligned and match to a known adapter sequence right from the start of the read. + +.. class:: warningmark + +**Warning on SAM/BAM quality** + +Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT** +flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears +to be the only way to deal with SAM/BAM that cannot be parsed. + + + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgPicardFixMate.xml Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,107 @@ +<tool name="Paired Read Mate Fixer" id="rgPicFixMate" version="1.56.0"> + <description>for paired data</description> + <command interpreter="python"> + picard_wrapper.py -i "${input_file}" -o "${out_file}" --tmpdir "${__new_file_path__}" -n "${out_prefix}" + --output-format "${outputFormat}" -j "\$JAVA_JAR_PATH/FixMateInformation.jar" --sortorder "${sortOrder}" + </command> + <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> + <inputs> + <param format="bam,sam" name="input_file" type="data" label="SAM/BAM dataset to fix" + help="If empty, upload or import a SAM/BAM dataset."/> + <param name="sortOrder" type="select" help="If in doubt, leave as default and read Picard/Samtools documentation" + label="Sort order"> + <option value="coordinate" selected ="true">Coordinate sort</option> + <option value="queryname">Query name sort</option> + <option value="unsorted">Unsorted - docs not clear if this means unchanged or not</option> + </param> + <param name="out_prefix" value="Fix Mate" type="text" + label="Title for the output file" help="Use this remind you what the job was for." size="80" /> + <param name="outputFormat" type="boolean" checked="True" truevalue="bam" falsevalue="sam" label="Output BAM instead of SAM" help="Uncheck for SAM output" /> + </inputs> + <outputs> + <data format="bam" name="out_file" label="${tool.name} on ${on_string}: ${outputFormat} with fixed mates"> + <change_format> + <when input="outputFormat" value="sam" format="sam" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="input_file" value="picard_input_sorted_pair.sam" /> + <param name="sortOrder" value="coordinate" /> + <param name="outputFormat" value="True" /> + <param name="out_prefix" value="Test FixMate" /> + <output name="out_file" file="picard_output_fixmate_sorted_pair.bam" ftype="bam" /> + </test> + <test> + <param name="input_file" value="picard_input_sorted_pair.sam" /> + <param name="sortOrder" value="coordinate" /> + <param name="outputFormat" value="False" /> + <param name="out_prefix" value="Test FixMate" /> + <output name="out_file" file="picard_output_fixmate_sorted_pair.sam" ftype="sam" /> + </test> + </tests> + <help> + + +.. class:: infomark + +**Purpose** + +Ensure that all mate-pair information is in sync between each read and it's mate pair. + +**Picard documentation** + +This is a Galaxy wrapper for FixMateInformation, a part of the external package Picard-tools_. + + .. _Picard-tools: http://www.google.com/search?q=picard+samtools + +.. class:: warningmark + +**Useful for paired data only** + +Likely won't do anything helpful for single end sequence data +Currently, Galaxy doesn't distinguish paired from single ended SAM/BAM so make sure +the data you choose are valid (paired end) SAM or BAM data - unless you trust this +tool not to harm your data. + +----- + +.. class:: infomark + +**Syntax** + +- **Input** - a paired read sam/bam format aligned short read data in your current history +- **Sort order** - can be used to adjust the ordering of reads +- **Title** - the title to use for all output files from this job - use it for high level metadata +- **Output Format** - either SAM or compressed as BAM + +----- + +.. class:: infomark + +**Inputs, outputs, and parameters** + +.. csv-table:: + + :header-rows: 1 + + Option,Description + "INPUT=File","The input file to fix. This option may be specified 0 or more times." + "OUTPUT=File","The output file to write to" + "SORT_ORDER=SortOrder","Optional sort order if the OUTPUT file should be sorted differently than the INPUT file. Default value: null. Possible values: {unsorted, queryname, coordinate}" + "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created. Default value: false" + +.. class:: warningmark + +**Warning on SAM/BAM quality** + +Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT** +flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears +to be the only way to deal with SAM/BAM that cannot be parsed. + + + </help> +</tool> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgPicardGCBiasMetrics.xml Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,150 @@ +<tool name="SAM/BAM GC Bias Metrics" id="PicardGCBiasMetrics" version="1.56.0"> + <command interpreter="python"> + picard_wrapper.py -i "${input_file}" -d "${html_file.files_path}" -t "${html_file}" + --windowsize "${windowsize}" --mingenomefrac "${mingenomefrac}" -n "${out_prefix}" --tmpdir "${__new_file_path__}" + -j "\$JAVA_JAR_PATH/CollectGcBiasMetrics.jar" +#if $genomeSource.refGenomeSource == "history": + --ref-file "${genomeSource.ownFile}" +#else: + --ref "${genomeSource.index.fields.path}" +#end if + </command> + <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> + <inputs> + <param format="sam,bam" name="input_file" type="data" label="SAM/BAM dataset to generateGC bias metrics" + help="If empty, upload or import a SAM/BAM dataset."/> + <param name="out_prefix" value="Short Read GC Bias Metrics" type="text" + label="Title for the output file" help="Use this remind you what the job was for." size="80" /> + <conditional name="genomeSource"> + <param name="refGenomeSource" type="select" label="Select Reference Genome"> + <option value="default" selected="true">Use the assigned data genome/build</option> + <option value="indexed">Select a different built-in genome</option> + <option value="history">Use a genome (fasta format) from my history</option> + </param> + <when value="default"> + <param name="index" type="select" label="Check the assigned reference genome" help="Galaxy thinks that the reads in you dataset were aligned against this reference. If this is not correct, use the 'Select a build-in reference genome' option of the 'Select Reference Genome' dropdown to select approprtiate Reference."> + <options from_data_table="all_fasta"> + <filter type="data_meta" ref="input_file" key="dbkey" column="dbkey" multiple="True" separator=","/> + <validator type="no_options" message="No reference build available for the selected input data" /> + </options> + </param> + </when> + <when value="indexed"> + <param name="index" type="select" label="Select a built-in reference genome" help="This list contains genomes cached at this Galaxy instance. If your genome of interest is not present here request it by using 'Help' link at the top of Galaxy interface or use the 'Use a genome (fasta format) from my history' option of the 'Select Reference Genome' dropdown."> + <options from_data_table="all_fasta"/> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome from history" help="This option works best for relatively small genomes. If you are working with large human-sized genomes, send request to Galaxy team for adding your reference to this Galaxy instance by using 'Help' link at the top of Galaxy interface."/> + </when> + </conditional> + <param name="windowsize" type="integer" label="GC minimum window size" value="100" + help="The size of windows on the genome that are used to bin reads. Default value: 100."/> + <param name="mingenomefrac" value="0.00001" type="float" label="Minimum Genome Fraction" + help="For summary metrics, exclude GC windows that include less than this fraction of the genome. Default value: 1.0E-5." /> + <!-- + + Users can be enabled to set Java heap size by uncommenting this option and adding '-x "$maxheap"' to the <command> tag. + If commented out the heapsize defaults to the value specified within picard_wrapper.py + + <param name="maxheap" type="select" help="If in doubt, choose 8G and read Picard documentation please" + label="Java heap size"> + <option value="1G">1GB: very small data</option> + <option value="2G" selected="true">2GB</option> + <option value="4G">4GB for larger datasets </option> + <option value="8G" >8GB use if 4GB fails</option> + <option value="16G">16GB - try this if 8GB fails</option> + </param> + + --> + + </inputs> + <outputs> + <data format="html" name="html_file" label="${out_prefix}.html"/> + </outputs> + <tests> + <test> + <!-- Uncomment this if maxheap is enabled above + <param name="maxheap" value="8G" /> + --> + <param name="out_prefix" value="CollectGCBias" /> + <param name="windowsize" value="100" /> + <param name="mingenomefrac" value="0.00001" /> + <param name="refGenomeSource" value="history" /> + <param name="ownFile" value="picard_input_hg18.trimmed.fasta" dbkey="hg18" /> + <param name="input_file" value="picard_input_summary_alignment_stats.sam" ftype="sam" dbkey="hg18"/> + <output name="html_file" file="picard_output_GcBias_uploaded_hg18_summary_alignment_stats.html" ftype="html" lines_diff="50"/> + </test> + </tests> + <help> + + +.. class:: infomark + +**Summary** + +This Galaxy tool uses Picard to report detailed metrics about reads that fall within windows of a certain GC bin on the reference genome. + +**Picard documentation** + +This is a Galaxy wrapper for CollectGcBiasMetrics, a part of the external package Picard-tools_. + + .. _Picard-tools: http://www.google.com/search?q=picard+samtools + +----- + +.. class:: infomark + +**Syntax** + +- **Input** - SAM/BAM format aligned short read data in your current history +- **Title** - the title to use for all output files from this job - use it for high level metadata +- **Reference Genome** - Galaxy (and Picard) needs to know which genomic reference was used to generate alignemnts within the input SAM/BAM dataset. Here you have three choices: + + - *Assigned data genome/build* - a genome specified for this dataset. If you your SAM/BAM dataset has an assigned reference genome it will be displayed below this dropdown. If it does not -> use one of the following two options. + - *Select a different built-in genome* - this option will list all reference genomes presently cached at this instance of Galaxy. + - *Select a reference genome from history* - alternatively you can upload your own version of reference genome into your history and use it with this option. This is however not advisable with large human-sized genomes. If your genome is large contact Galaxy team using "Help" link at the top of the interface and provide exact details on where we can download sequences you would like to use as the refenece. We will then install them as a part of locally cached genomic references. + +- **Window Size** see Picard documentation http://picard.sourceforge.net/command-line-overview.shtml#CollectGCBiasMetrics +- **Minimum Genome Fraction** See Picard documentation at http://picard.sourceforge.net/command-line-overview.shtml#CollectGCBiasMetrics + +----- + +.. class:: infomark + +**Inputs, outputs, and parameters** + +The Picard documentation (reformatted for Galaxy) says: + +.. csv-table:: + :header-rows: 1 + + Option,Description + "REFERENCE_SEQUENCE=File","The reference sequence fasta file. Required." + "INPUT=File","The BAM or SAM file containing aligned reads. Required." + "OUTPUT=File","The text file to write the metrics table to. Required." + "CHART_OUTPUT=File","The PDF file to render the chart to. Required." + "SUMMARY_OUTPUT=File","The text file to write summary metrics to. Default value: null." + "WINDOW_SIZE=Integer","The size of windows on the genome that are used to bin reads. Default value: 100." + "MINIMUM_GENOME_FRACTION=Double","For summary metrics, exclude GC windows that include less than this fraction of the genome. Default value: 1.0E-5." + "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created. Default value: false." + +The output produced by the tool has the following columns:: + + 1. GC: The G+C content of the reference sequence represented by this bin. Values are from 0% to 100% + 2. WINDOWS: The number of windows on the reference genome that have this G+C content. + 3. READ_STARTS: The number of reads who's start position is at the start of a window of this GC. + 4. MEAN_BASE_QUALITY: The mean quality (determined via the error rate) of all bases of all reads that are assigned to windows of this GC. + 5. NORMALIZED_COVERAGE: The ration of "coverage" in this GC bin vs. the mean coverage of all GC bins. A number of 1 represents mean coverage, a number less than one represents lower than mean coverage (e.g. 0.5 means half as much coverage as average) while a number greater than one represents higher than mean coverage (e.g. 3.1 means this GC bin has 3.1 times more reads per window than average). + 6. ERROR_BAR_WIDTH: The radius of error bars in this bin based on the number of observations made. For example if the normalized coverage is 0.75 and the error bar width is 0.1 then the error bars would be drawn from 0.65 to 0.85. + +.. class:: warningmark + +**Warning on SAM/BAM quality** + +Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT** +flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears +to be the only way to deal with SAM/BAM that cannot be parsed. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgPicardHsMetrics.xml Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,156 @@ +<tool name="SAM/BAM Hybrid Selection Metrics" id="PicardHsMetrics" version="1.56.0"> + <description>for targeted resequencing data</description> + <command interpreter="python"> + + picard_wrapper.py -i "${input_file}" -d "${html_file.files_path}" -t "${html_file}" --datatype "${input_file.ext}" + --baitbed "${bait_bed}" --targetbed "${target_bed}" -n "${out_prefix}" --tmpdir "${__new_file_path__}" + -j "\$JAVA_JAR_PATH/CalculateHsMetrics.jar" + + </command> + <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> + <inputs> + <param format="sam,bam" name="input_file" type="data" label="SAM/BAM dataset to generate statistics for" /> + <param name="out_prefix" value="Picard HS Metrics" type="text" label="Title for the output file" help="Use to remind you what the job was for." size="80" /> + <param name="bait_bed" type="data" format="bed,interval" label="Bait intervals: Sequences for bait in the design" help="Note specific format requirements below!" size="80" /> + <param name="target_bed" type="data" format="bed,interval" label="Target intervals: Sequences for targets in the design" help="Note specific format requirements below!" size="80" /> + <!-- + + Users can be enabled to set Java heap size by uncommenting this option and adding '-x "$maxheap"' to the <command> tag. + If commented out the heapsize defaults to the value specified within picard_wrapper.py + + <param name="maxheap" type="select" + help="If in doubt, try the default. If it fails with a complaint about java heap size, try increasing it please - larger jobs will require your own hardware." + label="Java heap size"> + <option value="4G" selected = "true">4GB default </option> + <option value="8G" >8GB use if 4GB fails</option> + <option value="16G">16GB - try this if 8GB fails</option> + </param> + + --> + </inputs> + <outputs> + <data format="html" name="html_file" label="${out_prefix}.html" /> + </outputs> + <tests> + <test> + <!-- Uncomment this if maxheap parameter is enabled + <param name="maxheap" value="8G" /> + --> + <param name="out_prefix" value="HSMetrics" /> + <param name="input_file" value="picard_input_summary_alignment_stats.sam" ftype="sam" /> + <param name="bait_bed" value="picard_input_bait.bed" /> + <param name="target_bed" value="picard_input_bait.bed" /> + <output name="html_file" file="picard_output_hs_transposed_summary_alignment_stats.html" ftype="html" lines_diff="212"/> + </test> + </tests> + <help> + +.. class:: infomark + +**Summary** + +Calculates a set of Hybrid Selection specific metrics from an aligned SAM or BAM file. + +.. class:: warnmark + +**WARNING about bait and target files** + +Picard is very fussy about the bait and target file format. If these are not exactly right, it will fail with an error something like: + +Exception in thread "main" net.sf.picard.PicardException: Invalid interval record contains 6 fields: chr1 45787123 45787316 CASO_22G_25063 1000 + + +If you see an error like that from this tool, please do NOT report it to any of the Galaxy mailing lists as it is not a bug! +It means you must reformat your bait and target files. Galaxy cannot do that for you automatically unfortunately. + +The required definition is described in the documentation at http://www.broadinstitute.org/gsa/wiki/index.php/Built-in_command-line_arguments +and the sample provided looks like this: + +chr1 1104841 1104940 + target_1 +chr1 1105283 1105599 + target_2 +chr1 1105712 1105860 + target_3 +chr1 1105960 1106119 + target_4 + +So your bait and target files MUST have 5 columns with chr, start, end, strand and name tab delimited and in exactly that order. +Note that the Picard mandated sam header described in the documentation linked above is automagically added by the tool in Galaxy. + +.. class:: infomark + +**Picard documentation** + +This is a Galaxy wrapper for CalculateHsMetrics.jar, a part of the external package Picard-tools_. + + .. _Picard-tools: http://www.google.com/search?q=picard+samtools + +----- + +.. class:: infomark + +**Inputs, outputs, and parameters** + +Picard documentation says (reformatted for Galaxy): + +Calculates a set of Hybrid Selection specific metrics from an aligned SAM or BAM file. + +.. csv-table:: + :header-rows: 1 + + "Option", "Description" + "BAIT_INTERVALS=File","An interval list file that contains the locations of the baits used. Required." + "TARGET_INTERVALS=File","An interval list file that contains the locations of the targets. Required." + "INPUT=File","An aligned SAM or BAM file. Required." + "OUTPUT=File","The output file to write the metrics to. Required. Cannot be used in conjuction with option(s) METRICS_FILE (M)" + "METRICS_FILE=File","Legacy synonym for OUTPUT, should not be used. Required. Cannot be used in conjuction with option(s) OUTPUT (O)" + "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created. Default value: false" + +HsMetrics + + The set of metrics captured that are specific to a hybrid selection analysis. + +Output Column Definitions:: + + 1. BAIT_SET: The name of the bait set used in the hybrid selection. + 2. GENOME_SIZE: The number of bases in the reference genome used for alignment. + 3. BAIT_TERRITORY: The number of bases which have one or more baits on top of them. + 4. TARGET_TERRITORY: The unique number of target bases in the experiment where target is usually exons etc. + 5. BAIT_DESIGN_EFFICIENCY: Target terrirtoy / bait territory. 1 == perfectly efficient, 0.5 = half of baited bases are not target. + 6. TOTAL_READS: The total number of reads in the SAM or BAM file examine. + 7. PF_READS: The number of reads that pass the vendor's filter. + 8. PF_UNIQUE_READS: The number of PF reads that are not marked as duplicates. + 9. PCT_PF_READS: PF reads / total reads. The percent of reads passing filter. + 10. PCT_PF_UQ_READS: PF Unique Reads / Total Reads. + 11. PF_UQ_READS_ALIGNED: The number of PF unique reads that are aligned with mapping score > 0 to the reference genome. + 12. PCT_PF_UQ_READS_ALIGNED: PF Reads Aligned / PF Reads. + 13. PF_UQ_BASES_ALIGNED: The number of bases in the PF aligned reads that are mapped to a reference base. Accounts for clipping and gaps. + 14. ON_BAIT_BASES: The number of PF aligned bases that mapped to a baited region of the genome. + 15. NEAR_BAIT_BASES: The number of PF aligned bases that mapped to within a fixed interval of a baited region, but not on a baited region. + 16. OFF_BAIT_BASES: The number of PF aligned bases that mapped to neither on or near a bait. + 17. ON_TARGET_BASES: The number of PF aligned bases that mapped to a targetted region of the genome. + 18. PCT_SELECTED_BASES: On+Near Bait Bases / PF Bases Aligned. + 19. PCT_OFF_BAIT: The percentage of aligned PF bases that mapped neither on or near a bait. + 20. ON_BAIT_VS_SELECTED: The percentage of on+near bait bases that are on as opposed to near. + 21. MEAN_BAIT_COVERAGE: The mean coverage of all baits in the experiment. + 22. MEAN_TARGET_COVERAGE: The mean coverage of targets that recieved at least coverage depth = 2 at one base. + 23. PCT_USABLE_BASES_ON_BAIT: The number of aligned, de-duped, on-bait bases out of the PF bases available. + 24. PCT_USABLE_BASES_ON_TARGET: The number of aligned, de-duped, on-target bases out of the PF bases available. + 25. FOLD_ENRICHMENT: The fold by which the baited region has been amplified above genomic background. + 26. ZERO_CVG_TARGETS_PCT: The number of targets that did not reach coverage=2 over any base. + 27. FOLD_80_BASE_PENALTY: The fold over-coverage necessary to raise 80% of bases in "non-zero-cvg" targets to the mean coverage level in those targets. + 28. PCT_TARGET_BASES_2X: The percentage of ALL target bases acheiving 2X or greater coverage. + 29. PCT_TARGET_BASES_10X: The percentage of ALL target bases acheiving 10X or greater coverage. + 30. PCT_TARGET_BASES_20X: The percentage of ALL target bases acheiving 20X or greater coverage. + 31. PCT_TARGET_BASES_30X: The percentage of ALL target bases acheiving 30X or greater coverage. + 32. HS_LIBRARY_SIZE: The estimated number of unique molecules in the selected part of the library. + 33. HS_PENALTY_10X: The "hybrid selection penalty" incurred to get 80% of target bases to 10X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 10X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 10 * HS_PENALTY_10X. + 34. HS_PENALTY_20X: The "hybrid selection penalty" incurred to get 80% of target bases to 20X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 20X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 20 * HS_PENALTY_20X. + 35. HS_PENALTY_30X: The "hybrid selection penalty" incurred to get 80% of target bases to 10X. This metric should be interpreted as: if I have a design with 10 megabases of target, and want to get 30X coverage I need to sequence until PF_ALIGNED_BASES = 10^6 * 30 * HS_PENALTY_30X. + +.. class:: warningmark + +**Warning on SAM/BAM quality** + +Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT** +flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears to be the only way to deal with SAM/BAM that cannot be parsed. + + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgPicardInsertSize.xml Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,97 @@ +<tool name="Insertion size metrics" id="PicardInsertSize" version="1.56.0"> + <description>for PAIRED data</description> + <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> + <command interpreter="python"> + picard_wrapper.py -i "${input_file}" -n "${out_prefix}" --tmpdir "${__new_file_path__}" --deviations "${deviations}" + --histwidth "${histWidth}" --minpct "${minPct}" --malevel "${malevel}" + -j "\$JAVA_JAR_PATH/CollectInsertSizeMetrics.jar" -d "${html_file.files_path}" -t "${html_file}" + </command> + <inputs> + <param format="bam,sam" name="input_file" type="data" label="SAM/BAM dataset to generate statistics for" + help="If empty, upload or import a SAM/BAM dataset."/> + <param name="out_prefix" value="Insertion size metrics" type="text" + label="Title for the output file" help="Use this remind you what the job was for" size="120" /> + <param name="deviations" value="10.0" type="float" + label="Deviations" size="5" + help="See Picard documentation: Generate mean, sd and plots by trimming the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION" /> + <param name="histWidth" value="0" type="integer" + label="Histogram width" size="5" + help="Explicitly sets the histogram width option - leave 0 to ignore" /> + <param name="minPct" value="0.05" type="float" + label="Minimum percentage" size="5" + help="Discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads" /> + <param name="malevel" value="0" type="select" multiple="true" label="Metric Accumulation Level" + help="Level(s) at which metrics will be accumulated"> + <option value="ALL_READS" selected="true">All reads (default)</option> + <option value="SAMPLE" default="true">Sample</option> + <option value="LIBRARY" default="true">Library</option> + <option value="READ_GROUP" default="true">Read group</option> + </param> + </inputs> + <outputs> + <data format="html" name="html_file" label="InsertSize_${out_prefix}.html"/> + </outputs> + <tests> + <test> + <param name="input_file" value="picard_input_tiny.sam" /> + <param name="out_prefix" value="Insertion size metrics" /> + <param name="deviations" value="10.0" /> + <param name="histWidth" value="0" /> + <param name="minPct" value="0.01" /> + <param name="malevel" value="ALL_READS" /> + <output name="html_file" file="picard_output_insertsize_tinysam.html" ftype="html" compare="contains" lines_diff="40" /> + </test> + </tests> + <help> + + +.. class:: infomark + +**Purpose** + +Reads a SAM or BAM file and describes the distribution +of insert size (excluding duplicates) with metrics and a histogram plot. + +**Picard documentation** + +This is a Galaxy wrapper for CollectInsertSizeMetrics, a part of the external package Picard-tools_. + + .. _Picard-tools: http://www.google.com/search?q=picard+samtools + +.. class:: warningmark + +**Useful for paired data only** + +This tool works for paired data only and can be expected to fail for single end data. + +----- + +.. class:: infomark + +**Inputs, outputs, and parameters** + +Picard documentation says (reformatted for Galaxy): + +.. csv-table:: + :header-rows: 1 + + Option,Description + "INPUT=File","SAM or BAM file Required." + "OUTPUT=File","File to write insert size metrics to Required." + "HISTOGRAM_FILE=File","File to write insert size histogram chart to Required." + "TAIL_LIMIT=Integer","When calculating mean and stdev stop when the bins in the tail of the distribution contain fewer than mode/TAIL_LIMIT items. This also limits how much data goes into each data category of the histogram." + "HISTOGRAM_WIDTH=Integer","Explicitly sets the histogram width, overriding the TAIL_LIMIT option. Also, when calculating mean and stdev, only bins LE HISTOGRAM_WIDTH will be included. " + "MINIMUM_PCT=Float","When generating the histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads. (Range: 0 to 1) Default value: 0.01." + "STOP_AFTER=Integer","Stop after processing N reads, mainly for debugging. Default value: 0." + "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created. Default value: false." + +.. class:: warningmark + +**Warning on SAM/BAM quality** + +Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT** +flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears +to be the only way to deal with SAM/BAM that cannot be parsed. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgPicardLibComplexity.xml Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,123 @@ +<tool name="Estimate Library Complexity" id="rgEstLibComp" version="1.56.0"> + <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> + <command interpreter="python"> + picard_wrapper.py -i "${input_file}" -n "${out_prefix}" --tmpdir "${__new_file_path__}" --minid "${minIDbases}" + --maxdiff "${maxDiff}" --minmeanq "${minMeanQ}" --readregex "${readRegex}" --optdupdist "${optDupeDist}" + -j "\$JAVA_JAR_PATH/EstimateLibraryComplexity.jar" -d "${html_file.files_path}" -t "${html_file}" + </command> + <inputs> + <param format="bam,sam" name="input_file" type="data" label="SAM/BAM dataset" + help="If empty, upload or import a SAM/BAM dataset."/> + <param name="out_prefix" value="Library Complexity" type="text" + label="Title for the output file" help="Use this remind you what the job was for." size="80" /> + <param name="minIDbases" value="5" type="integer" label="Minimum identical bases at starts of reads for grouping" size="5" + help="Total_reads / 4^max_id_bases reads will be compared at a time. Lower numbers = more accurate results and exponentially more time/memory." /> + <param name="maxDiff" value="0.03" type="float" + label="Maximum difference rate for identical reads" size="5" + help="The maximum rate of differences between two reads to call them identical" /> + <param name="minMeanQ" value="20" type="integer" + label="Minimum percentage" size="5" + help="The minimum mean quality of bases in a read pair. Lower average quality reads filtered out from all calculations" /> + <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" type="text" size="120" + label="Regular expression that can be used to parse read names in the incoming SAM file" + help="Names are parsed to extract: tile/region, x coordinate and y coordinate, to estimate optical duplication rate" > + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> + <param name="optDupeDist" value="100" type="text" + label="The maximum offset between two duplicte clusters in order to consider them optical duplicates." size="5" + help="e.g. 5-10 pixels. Later Illumina software versions multiply pixel values by 10, in which case 50-100" /> + + </inputs> + <outputs> + <data format="html" name="html_file" label="${out_prefix}_lib_complexity.html"/> + </outputs> + <tests> + <test> + <param name="input_file" value="picard_input_tiny.sam" /> + <param name="out_prefix" value="Library Complexity" /> + <param name="minIDbases" value="5" /> + <param name="maxDiff" value="0.03" /> + <param name="minMeanQ" value="20" /> + <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" /> + <param name="optDupeDist" value="100" /> + <output name="html_file" file="picard_output_estlibcomplexity_tinysam.html" ftype="html" lines_diff="30" /> + </test> + </tests> + <help> + +.. class:: infomark + +**Purpose** + +Attempts to estimate library complexity from sequence alone. +Does so by sorting all reads by the first N bases (5 by default) of each read and then +comparing reads with the first N bases identical to each other for duplicates. Reads are considered to be +duplicates if they match each other with no gaps and an overall mismatch rate less than or equal to MAX_DIFF_RATE (0.03 by default). + +Reads of poor quality are filtered out so as to provide a more accurate estimate. +The filtering removes reads with any no-calls in the first N bases or with a mean base quality lower than +MIN_MEAN_QUALITY across either the first or second read. + +The algorithm attempts to detect optical duplicates separately from PCR duplicates and excludes these in the +calculation of library size. Also, since there is no alignment to screen out technical reads one +further filter is applied on the data. After examining all reads a histogram is built of +[#reads in duplicate set -> #of duplicate sets]; all bins that contain exactly one duplicate set are +then removed from the histogram as outliers before library size is estimated. + +**Picard documentation** + +This is a Galaxy wrapper for EstimateLibraryComplexity, a part of the external package Picard-tools_. + + .. _Picard-tools: http://www.google.com/search?q=picard+samtools + +----- + +.. class:: infomark + +**Inputs, outputs, and parameters** + +Picard documentation says (reformatted for Galaxy): + +.. csv-table:: + :header-rows: 1 + + Option Description + "INPUT=File","One or more files to combine and estimate library complexity from. Reads can be mapped or unmapped. This option may be specified 0 or more times." + "OUTPUT=File","Output file to writes per-library metrics to. Required." + "MIN_IDENTICAL_BASES=Integer","The minimum number of bases at the starts of reads that must be identical for reads to be grouped together for duplicate detection. In effect total_reads / 4^max_id_bases reads will be compared at a time, so lower numbers will produce more accurate results but consume exponentially more memory and CPU. Default value: 5." + "MAX_DIFF_RATE=Double","The maximum rate of differences between two reads to call them identical. Default value: 0.03. " + "MIN_MEAN_QUALITY=Integer","The minimum mean quality of the bases in a read pair for the read to be analyzed. Reads with lower average quality are filtered out and not considered in any calculations. Default value: 20." + "READ_NAME_REGEX=String","Regular expression that can be used to parse read names in the incoming SAM file. Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. The regular expression should contain three capture groups for the three variables, in order. Default value: [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*. This option can be set to 'null' to clear the default value." + "OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer","The maximum offset between two duplicte clusters in order to consider them optical duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels) unless using later versions of the Illumina pipeline that multiply pixel values by 10, in which case 50-100 is more normal. Default value: 100" + "CREATE_MD5_FILE=Boolean","Whether to create an MD5 digest for any BAM files created. Default value: false. This option can be set to 'null' to clear the default value. " + +.. class:: warningmark + +**Warning on SAM/BAM quality** + +Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT** +flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears +to be the only way to deal with SAM/BAM that cannot be parsed. + +.. class:: infomark + +**Note on the Regular Expression** + +(from the Picard docs) +This tool requires a valid regular expression to parse out the read names in the incoming SAM or BAM file. +These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. +The regular expression should contain three capture groups for the three variables, in order. +Default value: [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*. + + + </help> +</tool> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgPicardMarkDups.xml Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,130 @@ +<tool name="Mark Duplicate reads" id="rgPicardMarkDups" version="1.56.0"> + <command interpreter="python"> + picard_wrapper.py -i "${input_file}" -n "${out_prefix}" --tmpdir "${__new_file_path__}" -o "${out_file}" + --remdups "${remDups}" --assumesorted "${assumeSorted}" --readregex "${readRegex}" --optdupdist "${optDupeDist}" + -j "\$JAVA_JAR_PATH/MarkDuplicates.jar" -d "${html_file.files_path}" -t "${html_file}" -e "${input_file.ext}" + </command> + <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> + <inputs> + <param format="bam,sam" name="input_file" type="data" label="SAM/BAM dataset to mark duplicates in" + help="If empty, upload or import a SAM/BAM dataset."/> + <param name="out_prefix" value="Dupes Marked" type="text" + label="Title for the output file" help="Use this remind you what the job was for" size="80" /> + <param name="remDups" value="false" type="boolean" label="Remove duplicates from output file" + truevalue="true" falsevalue="false" checked="yes" + help="If true do not write duplicates to the output file instead of writing them with appropriate flags set." /> + <param name="assumeSorted" value="true" type="boolean" label="Assume reads are already ordered" + truevalue="true" falsevalue="false" checked="yes" + help="If true assume input data are already sorted (most Galaxy SAM/BAM should be)." /> + <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" type="text" size="80" + label="Regular expression that can be used to parse read names in the incoming SAM file" + help="Names are parsed to extract: tile/region, x coordinate and y coordinate, to estimate optical duplication rate" > + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> + <param name="optDupeDist" value="100" type="integer" + label="The maximum offset between two duplicate clusters in order to consider them optical duplicates." size="5" + help="e.g. 5-10 pixels. Later Illumina software versions multiply pixel values by 10, in which case 50-100." > + <validator type="in_range" message="Minimum optical dupe distance must be positive" min="0" /> + </param> + + </inputs> + <outputs> + <data format="bam" name="out_file" label="MarkDups_${out_prefix}.bam"/> + <data format="html" name="html_file" label="MarkDups_${out_prefix}.html"/> + </outputs> + <tests> + <test> + <param name="input_file" value="picard_input_tiny_coord.bam" ftype="bam" /> + <param name="out_prefix" value="Dupes Marked" /> + <param name="remDups" value="false" /> + <param name="assumeSorted" value="true" /> + <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" /> + <param name="optDupeDist" value="100" /> + <output name="out_file" file="picard_output_markdups_sortedpairsam.bam" ftype="bam" compare="diff" /> + <output name="html_file" file="picard_output_markdups_sortedpairsam.html" ftype="html" lines_diff="75" /> + </test> + <test> + <param name="input_file" value="picard_input_tiny_coord.sam" ftype="sam" /> + <param name="out_prefix" value="Dupes Marked" /> + <param name="remDups" value="true" /> + <param name="assumeSorted" value="true" /> + <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" /> + <param name="optDupeDist" value="100" /> + <output name="out_file" file="picard_output_markdups_remdupes.bam" ftype="bam" compare="diff" /> + <output name="html_file" file="picard_output_markdups_sortedpairsam.html" ftype="html" lines_diff="75" /> + </test> + </tests> + + <help> + +.. class:: infomark + +**Purpose** + +Marks all duplicate reads in a provided SAM or BAM file and either removes them or flags them. + +**Picard documentation** + +This is a Galaxy wrapper for MarkDuplicates, a part of the external package Picard-tools_. + + .. _Picard-tools: http://www.google.com/search?q=picard+samtools + +----- + +.. class:: infomark + +**Inputs, outputs, and parameters** + +Picard documentation says (reformatted for Galaxy): + +.. csv-table:: Mark Duplicates docs + :header-rows: 1 + + Option,Description + "INPUT=File","The input SAM or BAM file to analyze. Must be coordinate sorted. Required." + "OUTPUT=File","The output file to right marked records to Required." + "METRICS_FILE=File","File to write duplication metrics to Required." + "REMOVE_DUPLICATES=Boolean","If true do not write duplicates to the output file instead of writing them with appropriate flags set. Default value: false." + "ASSUME_SORTED=Boolean","If true, assume that the input file is coordinate sorted, even if the header says otherwise. Default value: false." + "MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=Integer","This option is obsolete. ReadEnds will always be spilled to disk. Default value: 50000." + "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=Integer","Maximum number of file handles to keep open when spilling read ends to disk." + "READ_NAME_REGEX=String","Regular expression that can be used to parse read names in the incoming SAM file. Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. " + "OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer","The maximum offset between two duplicte clusters in order to consider them optical duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels) unless using later versions of the Illumina pipeline that multiply pixel values by 10, in which case 50-100 is more normal. Default value: 100" + +.. class:: warningmark + +**Warning on SAM/BAM quality** + +Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT** +flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears +to be the only way to deal with SAM/BAM that cannot be parsed. +.. class:: infomark + +**Note on the Regular Expression** + +(from the Picard docs) +This tool requires a valid regular expression to parse out the read names in the incoming SAM or BAM file. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. The regular expression should contain three capture groups for the three variables, in order. Default value: [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+). + +Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. All records are then written to the output file with the duplicate records flagged unless the remove duplicates option is selected. In some cases you may want to do this, but please only do this if you really understand what you are doing. + + </help> +</tool> + + + + + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bfast_out1.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,23 @@ +@HD VN:0.1.2 SO:unsorted GO:none +@SQ SN:phiX174 LN:5386 +@PG ID:bfast VN:0.6.4d +random_phiX_region_0 0 phiX174 553 255 50M * 0 0 TTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_1 0 phiX174 3693 255 50M * 0 0 GTTAGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTA ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_2 0 phiX174 375 255 50M * 0 0 AATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTTTCCA ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_3 0 phiX174 3168 255 50M * 0 0 GGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_4 0 phiX174 5254 255 50M * 0 0 ACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGAC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_5 0 phiX174 5066 255 50M * 0 0 AGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACTTCCCA ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_6 0 phiX174 1226 255 50M * 0 0 CACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGTGTTAATGC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_7 0 phiX174 1096 255 50M * 0 0 AACTACTCCGGTTATCGCTGGCGACTCCTTCGAGATGGACGCCGTTGGCG ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_8 0 phiX174 535 255 50M * 0 0 CTCGTGCTCGTCGCTGCGTTGAGGCTTGCGTTTATGGTACGCTGGACTTT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_9 0 phiX174 3669 255 50M * 0 0 CAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGACTTAGTTCA ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_10 0 phiX174 4887 255 50M * 0 0 TACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_11 0 phiX174 1849 255 50M * 0 0 TATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGACTAAAGAGA ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_12 0 phiX174 4145 255 50M * 0 0 AGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATA ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_13 0 phiX174 1853 255 50M * 0 0 TTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGACTAAAGAGATTCA ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_14 0 phiX174 2800 255 50M * 0 0 CCGGGCAATAACGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2300 NM:i:1 NH:i:1 IH:i:1 HI:i:1 MD:Z:11T38 XA:i:0 +random_phiX_region_15 0 phiX174 1910 255 50M * 0 0 AACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_16 0 phiX174 3366 255 50M * 0 0 GCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCA ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_17 0 phiX174 2165 255 50M * 0 0 CATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAG ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_18 0 phiX174 2051 255 50M * 0 0 TGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0 +random_phiX_region_19 0 phiX174 5099 255 50M * 0 0 GCTGTCGCTACTTCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PG:Z:bfast AS:i:2500 NM:i:0 NH:i:1 IH:i:1 HI:i:1 MD:Z:50 XA:i:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bwa_wrapper_in2.fastqsanger Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,120 @@ +@seq1/1 +GGACTCAGATAGTAATCC ++ +II#IIIIIII$5+.(9II +@seq2/1 +ATTCGACCTATCCTTGCG ++ +IIIIIIIIIIIIIIIIII +@seq3/1 +GTAACAAAGTTTGGATTG ++ +IIIIIIIIIIIIIIIIII +@seq4/1 +AGCCGCTCGTCTTTTATG ++ +IIIIIIIIIIIIIIIIII +@seq5/1 +CAGTTATATGGCTTTTGG ++ +IIIIIIIIIIIIIIIIII +@seq6/1 +AGGCGCTCGTCTTGGTAT ++ +IIIIIIIIIIIIIIIIII +@seq7/1 +TGTAGGTGGTCAACCAAT ++ +IIIIIIIIIIIIIIIIII +@seq8/1 +ACACCCGTCCTTTACGTC ++ +IIIIIIIIIIIIIIIIII +@seq9/1 +GCCGCTATTCAGGTTGTT ++ +IIIIIIIIIIIIIIIIII +@seq10/1 +ATTCTTTCTTTTCGTATC ++ +IIIIIIIIIIIIIIIIII +@seq11/1 +GCATTTCTACTCCTTCTC ++ +II#IIIIIII$5+.(9II +@seq12/1 +CGCGCTTCGATAAAAATG ++ +IIIIIIIIIIIIIIIIII +@seq13/1 +ATTTCTACTCTTTCTCAT ++ +IIIIIIIIIIIIIIIIII +@seq14/1 +CCCTTTTGAATGTCACGC ++ +IIIIIIIIIIIIIIIIII +@seq15/1 +CCAACTTACCAAGGTGGG ++ +IIIIIIIIIIIIIIIIII +@seq16/1 +TCAGGGTATTAAAAGAGA ++ +IIIIIIIIIIIIIIIIII +@seq17/1 +GTGATGTGCTTGCTACCG ++ +IIIIIIIIIIIIIIIIII +@seq18/1 +TCAATCCCCCATGCTTGG ++ +IIIIIIIIIIIIIIIIII +@seq19/1 +TTCCTGCGCTTAATGCTT ++ +IIIIIIIIIIIIIIIIII +@seq20/1 +CTTATTACCATTTCAACT ++ +IIIIIIIIIIIIIIIIII +@seq21/1 +CTGATACCAATAAAACCC ++ +II#IIIIIII$5+.(9II +@seq22/1 +AATCAAACTTACCAAGGG ++ +IIIIIIIIIIIIIIIIII +@seq23/1 +TGTGCTTCCCCAACTTGA ++ +IIIIIIIIIIIIIIIIII +@seq24/1 +TTTCTCAATCCCCAATGC ++ +IIIIIIIIIIIIIIIIII +@seq25/1 +TTGCTACTGACCGCTCTT ++ +IIIIIIIIIIIIIIIIII +@seq26/1 +CCGCGTGAAATTTCTATG ++ +IIIIIIIIIIIIIIIIII +@seq27/1 +CGCTAATCAAGTTGTTTC ++ +IIIIIIIIIIIIIIIIII +@seq28/1 +AAAGAGATTATTTGTCGG ++ +IIIIIIIIIIIIIIIIII +@seq29/1 +CAAATTAATGCGCGCTTC ++ +IIIIIIIIIIIIIIIIII +@seq30/1 +ATCCCCTATGCTTGGCTT ++ +IIIIIIIIIIIIIIIIII
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bwa_wrapper_in3.fastqsanger Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,120 @@ +@seq1/2 +ACGCTCCTTTAAAATATC ++ +IIIII$%*$G$A31I&&B +@seq2/2 +CAGCTCGAGAAGCTCTTA ++ +IIIIIIIIIIIIIIIIII +@seq3/2 +CTACTGACCGCTCTCGTG ++ +IIIIIIIIIIIIIIIIII +@seq4/2 +TAGGTGGTCAACCATTTT ++ +IIIIIIIIIIIIIIIIII +@seq5/2 +TTTCTATGTGGCTTAATA ++ +IIIIIIIIIIIIIIIIII +@seq6/2 +GTAGGTGGTCAACAATTT ++ +IIIIIIIIIIIIIIIIII +@seq7/2 +TTTAATTGCAGGGGCTTC ++ +IIIIIIIIIIIIIIIIII +@seq8/2 +ATGCGCTCTATTCTCTGG ++ +IIIIIIIIIIIIIIIIII +@seq9/2 +TTCTGTTGGTGCTGATAT ++ +IIIIIIIIIIIIIIIIII +@seq10/2 +AGGGCGTTGAGTTCGATA ++ +IIIIIIIIIIIIIIIIII +@seq11/2 +ATCCCCAATGCTTGGCTT ++ +IIIII$%*$G$A31I&&B +@seq12/2 +GGATTGGCGTTTCCAACC ++ +IIIIIIIIIIIIIIIIII +@seq13/2 +CCCCAATCCTTGCCTTCC ++ +IIIAAIIIIIIIIIIIII +@seq14/2 +TGATATTTTGACTTTGAG ++ +IIIIIIIIIIIIIIIIII +@seq15/2 +TTACGAAACGCGACGCCG ++ +IIIIIIIIIIIIIIIIII +@seq16/2 +TTATTTTTCTCCAGCCAC ++ +IIIIIIIIIIIIIIIIII +@seq17/2 +AAACAATACTTTAGGCAT ++ +IIIIIIIIIIIIIIIIII +@seq18/2 +CCGTTCCATAAGCAGATG ++ +IIIIIIIIIIIIIIIIII +@seq19/2 +GAGCGTCCTGGTGCTGAT ++ +IIIIIIIIIIIIIIIIII +@seq20/2 +ACTCCGGTTATCGCTGGC ++ +IIIIIIIIIIIIIIIIII +@seq21/2 +TAAGCATTTGGTTCAGGG ++ +IIIII$%*$G$A31I&&B +@seq22/2 +GTTACGACGCGACGCCGT ++ +IIIIIIIIIIIIIIIIII +@seq23/2 +TTTAATAACCCTATAGAC ++ +IIIIIIIIIIIIIIIIII +@seq24/2 +CTTGGCTTCCCTAAGCAG ++ +IIIIIIIIIIIIIIIIII +@seq25/2 +CGTGCTCGTTGCTGCGTT ++ +IIIIIIIIIIIIIIIIII +@seq26/2 +AAGGATGTTTTCCGTTCT ++ +IIIIIIIIIIIIIIIIII +@seq27/2 +TGTTTGGTGCTGATATTG ++ +IIIIIIIIIIIIIIIIII +@seq28/2 +TCCAGCCACTAAAGTGAG ++ +IIIIIIIIIIIIIIIIII +@seq29/2 +GATAATGATTGGGGTATC ++ +IIIIIIIIIIIIIIIIII +@seq30/2 +ACCATAAGCAGATGGATA ++ +IIIIIIIIIIIIIIIIII
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bwa_wrapper_out3.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,63 @@ +@SQ SN:phiX174 LN:5386 +@RG ID:abcdefg LB:lib-mom-A PL:ILLUMINA SM:mom DS:descrip DT:2010-11-01 PI:400 +@PG ID:bwa PN:bwa VN:0.5.9-r16 +seq1 113 phiX174 340 37 18M = 322 -18 GGATTACTATCTGAGTCC II9(.+5$IIIIIII#II RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:25 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq1 177 phiX174 322 25 18M = 340 18 GATATTTTAAAGGAGCGT B&&I13A$G$*%$IIIII RG:Z:abcdefg XT:A:U NM:i:2 SM:i:25 AM:i:25 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:2C8A6 +seq2 65 phiX174 141 37 18M = 159 18 ATTCGACCTATCCTTGCG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq2 129 phiX174 159 37 18M = 141 -18 CAGCTCGAGAAGCTCTTA IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq3 65 phiX174 505 37 18M = 523 18 GTAACAAAGTTTGGATTG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq3 129 phiX174 523 37 18M = 505 -18 CTACTGACCGCTCTCGTG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq4 69 phiX174 945 0 * = 945 0 AGCCGCTCGTCTTTTATG IIIIIIIIIIIIIIIIII RG:Z:abcdefg +seq4 137 phiX174 945 23 18M = 945 0 TAGGTGGTCAACCATTTT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:23 AM:i:0 X0:i:1 X1:i:1 XM:i:1 XO:i:0 XG:i:0 MD:Z:12A5 XA:Z:phiX174,+945,17M1S,2; +seq5 65 phiX174 4985 37 18M = 5003 18 CAGTTATATGGCTTTTGG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:13G4 +seq5 129 phiX174 5003 37 18M = 4985 -18 TTTCTATGTGGCTTAATA IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:13A4 +seq6 65 phiX174 925 37 11M1D7M = 944 19 AGGCGCTCGTCTTGGTAT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:11^T7 +seq6 129 phiX174 944 37 18M = 925 -19 GTAGGTGGTCAACAATTT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq7 65 phiX174 943 25 18M = 960 17 TGTAGGTGGTCAACCAAT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:2 SM:i:25 AM:i:25 X0:i:1 X1:i:1 XM:i:2 XO:i:0 XG:i:0 MD:Z:14A1T1 XA:Z:phiX174,+943,13M1I4M,2; +seq7 129 phiX174 960 37 18M = 943 -17 TTTAATTGCAGGGGCTTC IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:25 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq8 69 phiX174 1715 0 * = 1715 0 ACACCCGTCCTTTACGTC IIIIIIIIIIIIIIIIII RG:Z:abcdefg +seq8 137 phiX174 1715 37 18M = 1715 0 ATGCGCTCTATTCTCTGG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:10A7 +seq9 65 phiX174 2596 37 18M = 2613 17 GCCGCTATTCAGGTTGTT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:7A10 +seq9 129 phiX174 2613 37 18M = 2596 -17 TTCTGTTGGTGCTGATAT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq10 65 phiX174 4149 25 18M = 4168 19 ATTCTTTCTTTTCGTATC IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:2 SM:i:25 AM:i:25 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:5G11G0 +seq10 129 phiX174 4168 37 18M = 4149 -19 AGGGCGTTGAGTTCGATA IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:25 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq11 65 phiX174 4072 37 18M = 4091 19 GCATTTCTACTCCTTCTC II#IIIIIII$5+.(9II RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:12T5 +seq11 129 phiX174 4091 37 18M = 4072 -19 ATCCCCAATGCTTGGCTT IIIII$%*$G$A31I&&B RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq12 65 phiX174 5349 37 18M = 5365 16 CGCGCTTCGATAAAAATG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:25 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq12 129 phiX174 5365 25 18M = 5349 -16 GGATTGGCGTTTCCAACC IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:2 SM:i:25 AM:i:25 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:0T9A7 +seq13 65 phiX174 4074 37 18M = 4093 19 ATTTCTACTCTTTCTCAT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:25 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:17A0 +seq13 129 phiX174 4093 25 18M = 4074 -19 CCCCAATCCTTGCCTTCC IIIAAIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:2 SM:i:25 AM:i:25 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:7G4G5 +seq14 65 phiX174 3998 37 18M = 4016 18 CCCTTTTGAATGTCACGC IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:5C12 +seq14 129 phiX174 4016 37 3M1D15M = 3998 -18 TGATATTTTGACTTTGAG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:3^T15 +seq15 65 phiX174 5198 37 18M = 5216 18 CCAACTTACCAAGGTGGG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:13C4 +seq15 129 phiX174 5216 37 5M2I11M = 5198 -18 TTACGAAACGCGACGCCG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:2 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:2 MD:Z:16 +seq16 65 phiX174 2880 37 10M1I7M = 2897 17 TCAGGGTATTAAAAGAGA IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:2 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:1 MD:Z:5T11 +seq16 129 phiX174 2897 37 18M = 2880 -17 TTATTTTTCTCCAGCCAC IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:6G11 +seq17 65 phiX174 3034 37 18M = 3053 19 GTGATGTGCTTGCTACCG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:25 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq17 129 phiX174 3053 25 18M = 3034 -19 AAACAATACTTTAGGCAT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:2 SM:i:25 AM:i:25 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:0T9G7 +seq18 73 phiX174 4088 37 18M = 4088 0 TCAATCCCCCATGCTTGG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:9A8 +seq18 133 phiX174 4088 0 * = 4088 0 CCGTTCCATAAGCAGATG IIIIIIIIIIIIIIIIII RG:Z:abcdefg +seq19 65 phiX174 3304 37 18M = 3324 20 TTCCTGCGCTTAATGCTT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:6A11 +seq19 129 phiX174 3324 37 18M = 3304 -20 GAGCGTCCTGGTGCTGAT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:6G11 +seq20 65 phiX174 1082 37 18M = 1100 18 CTTATTACCATTTCAACT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq20 129 phiX174 1100 37 18M = 1082 -18 ACTCCGGTTATCGCTGGC IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq21 65 phiX174 1344 23 18M = 1363 19 CTGATACCAATAAAACCC II#IIIIIII$5+.(9II RG:Z:abcdefg XT:A:U NM:i:1 SM:i:23 AM:i:23 X0:i:1 X1:i:1 XM:i:1 XO:i:0 XG:i:0 MD:Z:15T2 XA:Z:phiX174,+1344,15M1D3M,2; +seq21 129 phiX174 1363 37 18M = 1344 -19 TAAGCATTTGGTTCAGGG IIIII$%*$G$A31I&&B RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:23 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:10T7 +seq22 69 phiX174 5215 0 * = 5215 0 AATCAAACTTACCAAGGG IIIIIIIIIIIIIIIIII RG:Z:abcdefg +seq22 137 phiX174 5215 37 18M = 5215 0 GTTACGACGCGACGCCGT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq23 65 phiX174 4289 37 18M = 4308 19 TGTGCTTCCCCAACTTGA IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:25 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:6C11 +seq23 129 phiX174 4308 25 18M = 4289 -19 TTTAATAACCCTATAGAC IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:2 SM:i:25 AM:i:25 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:0A8A8 +seq24 65 phiX174 4084 37 18M = 4101 17 TTTCTCAATCCCCAATGC IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq24 129 phiX174 4101 37 18M = 4084 -17 CTTGGCTTCCCTAAGCAG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:10A7 +seq25 65 phiX174 520 37 18M = 537 17 TTGCTACTGACCGCTCTT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:17C0 +seq25 129 phiX174 537 37 18M = 520 -17 CGTGCTCGTTGCTGCGTT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:9C8 +seq26 65 phiX174 1976 37 18M = 1994 18 CCGCGTGAAATTTCTATG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq26 129 phiX174 1994 37 18M = 1976 -18 AAGGATGTTTTCCGTTCT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:18 +seq27 65 phiX174 2598 37 18M = 2614 16 CGCTAATCAAGTTGTTTC IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:9G8 +seq27 129 phiX174 2614 37 3M1D15M = 2598 -16 TGTTTGGTGCTGATATTG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:2 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:1C1^G15 +seq28 65 phiX174 2890 25 18M = 2906 16 AAAGAGATTATTTGTCGG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:2 SM:i:25 AM:i:25 X0:i:1 X1:i:0 XM:i:2 XO:i:0 XG:i:0 MD:Z:16T0C0 +seq28 129 phiX174 2906 37 18M = 2890 -16 TCCAGCCACTAAAGTGAG IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:25 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:10T7 +seq29 73 phiX174 5339 37 18M = 5339 0 CAAATTAATGCGCGCTTC IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:6T11 +seq29 133 phiX174 5339 0 * = 5339 0 GATAATGATTGGGGTATC IIIIIIIIIIIIIIIIII RG:Z:abcdefg +seq30 65 phiX174 4091 37 18M = 4108 17 ATCCCCTATGCTTGGCTT IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:6A11 +seq30 129 phiX174 4108 37 18M = 4091 -17 ACCATAAGCAGATGGATA IIIIIIIIIIIIIIIIII RG:Z:abcdefg XT:A:U NM:i:1 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:0T17
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/phiX.fasta Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,79 @@ +>phiX174 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT +GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA +ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG +TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA +GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT +TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT +CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT +TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG +TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC +GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG +TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT +AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC +CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA +TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC +TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA +GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT +GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC +TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT +TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC +ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC +CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT +GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC +TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG +TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT +TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA +AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT +TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC +GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC +TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT +TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA +TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG +TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG +AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC +CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT +TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG +CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA +AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA +TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT +CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG +TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA +GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC +CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA +AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC +TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT +CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA +TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG +TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT +TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC +ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG +TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA +ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG +GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT +GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG +GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT +ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG +CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC +CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT +CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG +CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA +TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG +TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC +AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC +TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_ARRG_input1.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,25 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:10001 +@SQ SN:chr2 LN:100001 +@SQ SN:chr3 LN:10001 +@SQ SN:chr4 LN:1001 +@RG ID:rg1 SM:s1 +@RG ID:rg2 SM:s3 +bar:record:4 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:6 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 +bar:record:1 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:3 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 +bar:record:1 141 chr1 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:7 77 chr1 20 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 +bar:record:8 77 chr1 30 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 +bar:record:4 141 chr1 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:5 77 chr1 40 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg2 +bar:record:6 141 chr1 50 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg2 +bar:record:2 77 chr2 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:2 141 chr2 30 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg2 +bar:record:3 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:8 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:5 141 chr3 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:9 77 chr4 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:7 141 chr4 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:9 141 chr4 60 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_ARRG_input2.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,23 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:10001 +@SQ SN:chr2 LN:100001 +@SQ SN:chr3 LN:10001 +@SQ SN:chr4 LN:1001 +bar:record:4 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 +bar:record:6 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 +bar:record:1 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 +bar:record:3 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 +bar:record:1 141 chr1 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 +bar:record:7 77 chr1 20 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 +bar:record:8 77 chr1 30 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 +bar:record:4 141 chr1 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 +bar:record:5 77 chr1 40 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 +bar:record:6 141 chr1 50 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 +bar:record:2 77 chr2 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 +bar:record:2 141 chr2 30 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 +bar:record:3 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 +bar:record:8 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 +bar:record:5 141 chr3 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 +bar:record:9 77 chr4 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 +bar:record:7 141 chr4 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 +bar:record:9 141 chr4 60 0 * * 0 0 CCCCCCCCCCCCC 2222222222222
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_ARRG_output1.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,24 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:10001 +@SQ SN:chr2 LN:100001 +@SQ SN:chr3 LN:10001 +@SQ SN:chr4 LN:1001 +@RG ID:one PL:illumina PU:peaewe LB:lib SM:sam1 +bar:record:4 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:one +bar:record:6 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:one +bar:record:1 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:one +bar:record:3 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:one +bar:record:1 141 chr1 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:one +bar:record:7 77 chr1 20 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:one +bar:record:8 77 chr1 30 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:one +bar:record:4 141 chr1 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:one +bar:record:5 77 chr1 40 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:one +bar:record:6 141 chr1 50 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:one +bar:record:2 77 chr2 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:one +bar:record:2 141 chr2 30 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:one +bar:record:3 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:one +bar:record:8 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:one +bar:record:5 141 chr3 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:one +bar:record:9 77 chr4 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:one +bar:record:7 141 chr4 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:one +bar:record:9 141 chr4 60 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:one
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_ARRG_output2.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,24 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:10001 +@SQ SN:chr2 LN:100001 +@SQ SN:chr3 LN:10001 +@SQ SN:chr4 LN:1001 +@RG ID:M5 PL:IL PU:PLAT LB:LIB DS:description with spaces SM:smp CN:FamousCenter +bar:record:4 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M5 +bar:record:6 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M5 +bar:record:1 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M5 +bar:record:3 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M5 +bar:record:1 141 chr1 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M5 +bar:record:7 77 chr1 20 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M5 +bar:record:8 77 chr1 30 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M5 +bar:record:4 141 chr1 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M5 +bar:record:5 77 chr1 40 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M5 +bar:record:6 141 chr1 50 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M5 +bar:record:2 77 chr2 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M5 +bar:record:2 141 chr2 30 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M5 +bar:record:3 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M5 +bar:record:8 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M5 +bar:record:5 141 chr3 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M5 +bar:record:9 77 chr4 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M5 +bar:record:7 141 chr4 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M5 +bar:record:9 141 chr4 60 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_ARRG_output3.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,24 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:10001 +@SQ SN:chr2 LN:100001 +@SQ SN:chr3 LN:10001 +@SQ SN:chr4 LN:1001 +@RG ID:M6 PL:IL PU:PLAT LB:LIB SM:smp1 +bar:record:4 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M6 +bar:record:6 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M6 +bar:record:1 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M6 +bar:record:3 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M6 +bar:record:1 141 chr1 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M6 +bar:record:7 77 chr1 20 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M6 +bar:record:8 77 chr1 30 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M6 +bar:record:4 141 chr1 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M6 +bar:record:5 77 chr1 40 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M6 +bar:record:6 141 chr1 50 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M6 +bar:record:2 77 chr2 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M6 +bar:record:2 141 chr2 30 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M6 +bar:record:3 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M6 +bar:record:8 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M6 +bar:record:5 141 chr3 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M6 +bar:record:9 77 chr4 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:M6 +bar:record:7 141 chr4 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M6 +bar:record:9 141 chr4 60 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:M6
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_BIS_input1.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,18 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:101 +@SQ SN:chr7 LN:404 +@SQ SN:chr8 LN:202 +@SQ SN:chr10 LN:303 +@SQ SN:chr14 LN:505 +@RG ID:0 SM:Hi,Mom! +@RG ID:1 SM:samplesample DS:ClearDescription +@PG ID:1 PN:Hey! VN:2.0 +@CO Just a generic comment to make the header longer +both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 147 chr7 16 255 101M = 21 -96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 99 chr7 21 255 101M = 16 96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_marked 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +read_2_too_many_gaps 163 chr7 302 255 10M1D10M5I76M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +both_reads_present_only_first_aligns 165 * 0 0 * chr7 1 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_BIS_output1.txt Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,39 @@ +<style type="text/css"> + tr.d0 td {background-color: oldlace; color: black;} + tr.d1 td {background-color: aliceblue; color: black;} + </style><?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy picard_wrapper tool output - see http://getgalaxy.org/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +Galaxy tool BamIndexStats run at 12/05/2011 14:18:06</b><br/><b>The following output files were created (click the filename to view/download a copy):</b><hr/><table> +<tr><td><a href="BamIndexStats.log">BamIndexStats.log</a></td></tr> +<tr><td><a href="BamIndexStats.metrics.txt">BamIndexStats.metrics.txt</a></td></tr> +</table><p/> +<b>Picard on line resources</b><ul> +<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li> +<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/> +<b>Picard output</b><hr/> +<table cellpadding="3" > +<tr class="d0"><td colspan="2">chr1 length= 101 Aligned= 0 Unaligned= 0</td></tr> +<tr class="d1"><td colspan="2">chr7 length= 404 Aligned= 7 Unaligned= 0</td></tr> +<tr class="d0"><td colspan="2">chr8 length= 202 Aligned= 0 Unaligned= 0</td></tr> +<tr class="d1"><td colspan="2">NoCoordinateCount= 1</td></tr> +</table> +<b>Picard Tool Run Log</b><hr/> +<pre>Thu, 12 May 2011 14:18:06 INFO + ## executing java -Xmx2g -jar /udd/rerla/galaxy-central/tool-data/shared/jars/BamIndexStats.jar VALIDATION_STRINGENCY=LENIENT INPUT=/udd/rerla/galaxy-central/database/job_working_directory/955/dataset_1015_files/tmp45bd_D.bam returned status 0 and stderr: +[Thu May 12 14:18:06 EDT 2011] net.sf.picard.sam.BamIndexStats INPUT=/udd/rerla/galaxy-central/database/job_working_directory/955/dataset_1015_files/tmp45bd_D.bam VALIDATION_STRINGENCY=LENIENT TMP_DIR=/tmp/rerla VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +[Thu May 12 14:18:06 EDT 2011] net.sf.picard.sam.BamIndexStats done. +Runtime.totalMemory()=9109504 + + +</pre><hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> +generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool</div></body></html> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_BIS_output2.txt Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,41 @@ +<style type="text/css"> + tr.d0 td {background-color: oldlace; color: black;} + tr.d1 td {background-color: aliceblue; color: black;} + </style><?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy picard_wrapper tool output - see http://getgalaxy.org/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +Galaxy tool BamIndexStats run at 12/05/2011 14:18:22</b><br/><b>The following output files were created (click the filename to view/download a copy):</b><hr/><table> +<tr><td><a href="BamIndexStats.log">BamIndexStats.log</a></td></tr> +<tr><td><a href="BamIndexStats.metrics.txt">BamIndexStats.metrics.txt</a></td></tr> +</table><p/> +<b>Picard on line resources</b><ul> +<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li> +<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/> +<b>Picard output</b><hr/> +<table cellpadding="3" > +<tr class="d0"><td colspan="2">chr1 length= 101 Aligned= 0 Unaligned= 0</td></tr> +<tr class="d1"><td colspan="2">chr7 length= 404 Aligned= 7 Unaligned= 0</td></tr> +<tr class="d0"><td colspan="2">chr8 length= 202 Aligned= 0 Unaligned= 0</td></tr> +<tr class="d1"><td colspan="2">chr10 length= 303 Aligned= 0 Unaligned= 0</td></tr> +<tr class="d0"><td colspan="2">chr14 length= 505 Aligned= 0 Unaligned= 0</td></tr> +<tr class="d1"><td colspan="2">NoCoordinateCount= 1</td></tr> +</table> +<b>Picard Tool Run Log</b><hr/> +<pre>Thu, 12 May 2011 14:18:22 INFO + ## executing java -Xmx2g -jar /udd/rerla/galaxy-central/tool-data/shared/jars/BamIndexStats.jar VALIDATION_STRINGENCY=LENIENT INPUT=/udd/rerla/galaxy-central/database/job_working_directory/957/dataset_1017_files/tmpqXGksN.bam returned status 0 and stderr: +[Thu May 12 14:18:21 EDT 2011] net.sf.picard.sam.BamIndexStats INPUT=/udd/rerla/galaxy-central/database/job_working_directory/957/dataset_1017_files/tmpqXGksN.bam VALIDATION_STRINGENCY=LENIENT TMP_DIR=/tmp/rerla VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +[Thu May 12 14:18:22 EDT 2011] net.sf.picard.sam.BamIndexStats done. +Runtime.totalMemory()=9109504 + + +</pre><hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> +generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool</div></body></html> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_MD_output1.txt Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,112 @@ +## net.sf.picard.metrics.StringHeader +# net.sf.picard.sam.MarkDuplicates INPUT=picard_input_tiny_coord.bam OUTPUT=picard_MD_output2.bam METRICS_FILE=picard_MD_output1.txt REMOVE_DUPLICATES=false ASSUME_SORTED=true READ_NAME_REGEX=[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).* OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 VALIDATION_STRINGENCY=LENIENT MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=50000 MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=8000 TMP_DIR=/tmp/kpvincent VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +## net.sf.picard.metrics.StringHeader +# Started on: Tue Apr 19 15:28:21 EDT 2011 + +## METRICS CLASS net.sf.picard.sam.DuplicationMetrics +LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATES PERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE + 1 3 1 1 1 0 0.428571 3 + +## HISTOGRAM java.lang.Double +BIN VALUE +1.0 0.948181 +2.0 1.296997 +3.0 1.425319 +4.0 1.472527 +5.0 1.489893 +6.0 1.496282 +7.0 1.498632 +8.0 1.499497 +9.0 1.499815 +10.0 1.499932 +11.0 1.499975 +12.0 1.499991 +13.0 1.499997 +14.0 1.499999 +15.0 1.5 +16.0 1.5 +17.0 1.5 +18.0 1.5 +19.0 1.5 +20.0 1.5 +21.0 1.5 +22.0 1.5 +23.0 1.5 +24.0 1.5 +25.0 1.5 +26.0 1.5 +27.0 1.5 +28.0 1.5 +29.0 1.5 +30.0 1.5 +31.0 1.5 +32.0 1.5 +33.0 1.5 +34.0 1.5 +35.0 1.5 +36.0 1.5 +37.0 1.5 +38.0 1.5 +39.0 1.5 +40.0 1.5 +41.0 1.5 +42.0 1.5 +43.0 1.5 +44.0 1.5 +45.0 1.5 +46.0 1.5 +47.0 1.5 +48.0 1.5 +49.0 1.5 +50.0 1.5 +51.0 1.5 +52.0 1.5 +53.0 1.5 +54.0 1.5 +55.0 1.5 +56.0 1.5 +57.0 1.5 +58.0 1.5 +59.0 1.5 +60.0 1.5 +61.0 1.5 +62.0 1.5 +63.0 1.5 +64.0 1.5 +65.0 1.5 +66.0 1.5 +67.0 1.5 +68.0 1.5 +69.0 1.5 +70.0 1.5 +71.0 1.5 +72.0 1.5 +73.0 1.5 +74.0 1.5 +75.0 1.5 +76.0 1.5 +77.0 1.5 +78.0 1.5 +79.0 1.5 +80.0 1.5 +81.0 1.5 +82.0 1.5 +83.0 1.5 +84.0 1.5 +85.0 1.5 +86.0 1.5 +87.0 1.5 +88.0 1.5 +89.0 1.5 +90.0 1.5 +91.0 1.5 +92.0 1.5 +93.0 1.5 +94.0 1.5 +95.0 1.5 +96.0 1.5 +97.0 1.5 +98.0 1.5 +99.0 1.5 +100.0 1.5 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_MD_output3.txt Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,112 @@ +## net.sf.picard.metrics.StringHeader +# net.sf.picard.sam.MarkDuplicates INPUT=picard_input_tiny_coord.sam OUTPUT=picard_MD_output4_.sam METRICS_FILE=picard_MD_output3_.txt REMOVE_DUPLICATES=true ASSUME_SORTED=false READ_NAME_REGEX=[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).* OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 VALIDATION_STRINGENCY=LENIENT MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=50000 MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=8000 TMP_DIR=/tmp/kpvincent VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +## net.sf.picard.metrics.StringHeader +# Started on: Tue Apr 19 15:32:38 EDT 2011 + +## METRICS CLASS net.sf.picard.sam.DuplicationMetrics +LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATES PERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE + 1 3 1 1 1 0 0.428571 3 + +## HISTOGRAM java.lang.Double +BIN VALUE +1.0 0.948181 +2.0 1.296997 +3.0 1.425319 +4.0 1.472527 +5.0 1.489893 +6.0 1.496282 +7.0 1.498632 +8.0 1.499497 +9.0 1.499815 +10.0 1.499932 +11.0 1.499975 +12.0 1.499991 +13.0 1.499997 +14.0 1.499999 +15.0 1.5 +16.0 1.5 +17.0 1.5 +18.0 1.5 +19.0 1.5 +20.0 1.5 +21.0 1.5 +22.0 1.5 +23.0 1.5 +24.0 1.5 +25.0 1.5 +26.0 1.5 +27.0 1.5 +28.0 1.5 +29.0 1.5 +30.0 1.5 +31.0 1.5 +32.0 1.5 +33.0 1.5 +34.0 1.5 +35.0 1.5 +36.0 1.5 +37.0 1.5 +38.0 1.5 +39.0 1.5 +40.0 1.5 +41.0 1.5 +42.0 1.5 +43.0 1.5 +44.0 1.5 +45.0 1.5 +46.0 1.5 +47.0 1.5 +48.0 1.5 +49.0 1.5 +50.0 1.5 +51.0 1.5 +52.0 1.5 +53.0 1.5 +54.0 1.5 +55.0 1.5 +56.0 1.5 +57.0 1.5 +58.0 1.5 +59.0 1.5 +60.0 1.5 +61.0 1.5 +62.0 1.5 +63.0 1.5 +64.0 1.5 +65.0 1.5 +66.0 1.5 +67.0 1.5 +68.0 1.5 +69.0 1.5 +70.0 1.5 +71.0 1.5 +72.0 1.5 +73.0 1.5 +74.0 1.5 +75.0 1.5 +76.0 1.5 +77.0 1.5 +78.0 1.5 +79.0 1.5 +80.0 1.5 +81.0 1.5 +82.0 1.5 +83.0 1.5 +84.0 1.5 +85.0 1.5 +86.0 1.5 +87.0 1.5 +88.0 1.5 +89.0 1.5 +90.0 1.5 +91.0 1.5 +92.0 1.5 +93.0 1.5 +94.0 1.5 +95.0 1.5 +96.0 1.5 +97.0 1.5 +98.0 1.5 +99.0 1.5 +100.0 1.5 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_MD_output4.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,11 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:101 +@SQ SN:chr7 LN:404 +@SQ SN:chr8 LN:202 +@RG ID:0 SM:Hi,Mom! +@PG ID:1 PN:Hey! VN:2.0 +both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 147 chr7 16 255 101M = 21 -96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 99 chr7 21 255 101M = 16 96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_marked 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +both_reads_present_only_first_aligns 165 * 0 0 * chr7 1 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_RSH_input1.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,14 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:101 +@SQ SN:chr7 LN:404 +@SQ SN:chr8 LN:202 +@RG ID:0 SM:What_is_that_sound? +@PG ID:1 PN:Greetings! VN:2.0 +both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 147 chr7 16 255 101M = 21 -96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 99 chr7 21 255 101M = 16 96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_marked 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +read_2_too_many_gaps 163 chr7 302 255 10M1D10M5I76M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +both_reads_present_only_first_aligns 165 * 0 0 * chr7 1 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_RSH_output1.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,14 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:101 +@SQ SN:chr7 LN:404 +@SQ SN:chr8 LN:202 +@RG ID:0 SM:What_is_that_sound? +@PG ID:1 PN:Greetings! VN:2.0 +both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 147 chr7 16 255 101M = 21 -96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 99 chr7 21 255 101M = 16 96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_marked 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +read_2_too_many_gaps 163 chr7 302 255 10M1D10M5I76M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +both_reads_present_only_first_aligns 165 * 0 0 * chr7 1 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_RSH_output2.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,14 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:101 +@SQ SN:chr7 LN:404 +@SQ SN:chr8 LN:202 +@RG ID:0 SM:What_is_that_sound? +@PG ID:1 PN:Greetings! VN:2.0 +both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 147 chr7 16 255 101M = 21 -96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 99 chr7 21 255 101M = 16 96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_marked 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +read_2_too_many_gaps 163 chr7 302 255 10M1D10M5I76M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +both_reads_present_only_first_aligns 165 * 0 0 * chr7 1 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_RS_input2.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,22 @@ +@HD VN:1.0 SO:unsorted +@SQ SN:phiX174 LN:5386 +@PG ID:Bowtie VN:0.12.7 CL:"bowtie -q -p 4 -S --phred33-quals /genome/phiX/bowtie_index/phiX dataset_437.dat" +HWI-EAS91_1_30788AAXX:1:1:1761:343/1 0 phiX174 5 255 50M * 0 0 TTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGA IIIIIII""IIIIIIIIIII?I0IIIIHIIIGIIIII0II?I""IIIIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1647:512/1 0 phiX174 401 255 50M * 0 0 TACTGAACAATCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCA IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1578:331/1 0 phiX174 209 255 50M * 0 0 TGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGC IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1746:351/1 0 phiX174 1218 255 50M * 0 0 CTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1582:633/1 0 phiX174 1517 255 50M * 0 0 CCGCTTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTCTACCAC IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1599:330/1 0 phiX174 803 255 50M * 0 0 AGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAGTGATGTAATGTC IIIIIII""IIIIIIIIIIIIIIIIIII<III@II"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1598:534/1 0 phiX174 1717 255 50M * 0 0 GCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAA IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1749:466/1 0 phiX174 2201 255 50M * 0 0 TTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1570:620/1 4 * 0 0 * * 0 0 GAGTAACAAAGTAAAGTTTGGACCGTTTTTGTCTCGTGCTCGTCGCTGCG IIIIIII""IIIIIIIIIIIIIBIIIIIIIIIIII"IIIIIII000IIII XM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1629:446/1 0 phiX174 2301 255 50M * 0 0 AGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1763:587/1 0 phiX174 2501 255 50M * 0 0 AAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1616:363/1 0 phiX174 2718 255 50M * 0 0 TGTTTATCCTTTGAATGGTCGCCATGATGGTGGTTATTATACCGTCAAGG IIIIIII""IIIIIIIIIIIIIIIIIIIIIIDIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1572:270/1 0 phiX174 3518 255 50M * 0 0 TGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTA IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1768:715/1 0 phiX174 3118 255 50M * 0 0 ACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTGGTGCTATGGCTAAAGCT IIIIIII""IIIIIIIIIIIIIDIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1785:1272/1 0 phiX174 3818 255 50M * 0 0 TATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCT III""""""IIIIIIIII""FI"IIII""II+ICI"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1673:514/1 0 phiX174 4018 255 50M * 0 0 ATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTATTGAGGCT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIII1"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1652:270/1 4 * 0 0 * * 0 0 AGCGTAAAGGCGCTCGTCTTTGGTATGTAGGACTTTGCATTGTTTAATTG IIIIIII""IIIIIIIIIIIIIIIIIIIIIII6II"IIIIIII000IIII XM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1572:324/1 4 * 0 0 * * 0 0 AAGGTGCTTAAATTCGTGGGTCCTGAGCTGGCGACCCTGTTTTGTATGGC IIIIIII""IIIIIIIIIIIIIIIIIIII+7I05I"IIIIIII000IIII XM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1634:330/1 0 phiX174 4418 255 50M * 0 0 GGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATG IIIIIII""IIIIIIIIIIIIIIIIIIIIIII8II"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_RS_input3.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,26 @@ +@HD VN:1.0 SO:unsorted +@SQ SN:phiX1 LN:1120 +@SQ SN:phiX2 LN:1190 +@SQ SN:phiX3 LN:1330 +@SQ SN:phiX4 LN:910 +@SQ SN:phiX5 LN:828 +@PG ID:Bowtie VN:0.12.7 CL:"bowtie -q -p 4 -S --phred33-quals /tmp/tmp1E6jpl/tmpDg42o9 dataset_437.dat" +HWI-EAS91_1_30788AAXX:1:1:1578:331/1 0 phiX1 209 255 50M * 0 0 TGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGC IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1761:343/1 0 phiX1 5 255 50M * 0 0 TTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGA IIIIIII""IIIIIIIIIII?I0IIIIHIIIGIIIII0II?I""IIIIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1647:512/1 0 phiX1 401 255 50M * 0 0 TACTGAACAATCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCA IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1746:351/1 0 phiX2 98 255 50M * 0 0 CTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1582:633/1 0 phiX2 397 255 50M * 0 0 CCGCTTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTCTACCAC IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1570:620/1 4 * 0 0 * * 0 0 GAGTAACAAAGTAAAGTTTGGACCGTTTTTGTCTCGTGCTCGTCGCTGCG IIIIIII""IIIIIIIIIIIIIBIIIIIIIIIIII"IIIIIII000IIII XM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1598:534/1 0 phiX2 597 255 50M * 0 0 GCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAA IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1599:330/1 0 phiX1 803 255 50M * 0 0 AGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAGTGATGTAATGTC IIIIIII""IIIIIIIIIIIIIIIIIII<III@II"IIIIIII000IIII XA:i:0 MD:Z:30G19 NM:i:1 +HWI-EAS91_1_30788AAXX:1:1:1749:466/1 0 phiX2 1081 255 50M * 0 0 TTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1572:324/1 4 * 0 0 * * 0 0 AAGGTGCTTAAATTCGTGGGTCCTGAGCTGGCGACCCTGTTTTGTATGGC IIIIIII""IIIIIIIIIIIIIIIIIIII+7I05I"IIIIIII000IIII XM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1763:587/1 0 phiX3 191 255 50M * 0 0 AAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1616:363/1 0 phiX3 408 255 50M * 0 0 TGTTTATCCTTTGAATGGTCGCCATGATGGTGGTTATTATACCGTCAAGG IIIIIII""IIIIIIIIIIIIIIIIIIIIIIDIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1652:270/1 4 * 0 0 * * 0 0 AGCGTAAAGGCGCTCGTCTTTGGTATGTAGGACTTTGCATTGTTTAATTG IIIIIII""IIIIIIIIIIIIIIIIIIIIIII6II"IIIIIII000IIII XM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1768:715/1 0 phiX3 808 255 50M * 0 0 ACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTGGTGCTATGGCTAAAGCT IIIIIII""IIIIIIIIIIIIIDIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1572:270/1 0 phiX3 1208 255 50M * 0 0 TGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTA IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1629:446/1 4 * 0 0 * * 0 0 AGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1673:514/1 0 phiX4 378 255 50M * 0 0 ATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTATTGAGGCT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIII1"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1785:1272/1 0 phiX4 178 255 50M * 0 0 TATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCT III""""""IIIIIIIII""FI"IIII""II+ICI"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1634:330/1 0 phiX4 778 255 50M * 0 0 GGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATG IIIIIII""IIIIIIIIIIIIIIIIIIIIIII8II"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_RS_input4.fasta Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,82 @@ +>phiX1 length=1120 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT +GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA +ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG +TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA +GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT +TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT +CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT +TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG +TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC +GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCGGAAGGAG +TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT +AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC +CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA +TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC +>phiX2 length=1190 +TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA +GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT +GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC +TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT +TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC +ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCATGATGTTATTTCTTCATTTGGAGGTAAAAC +CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT +GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC +TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG +TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT +TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA +AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT +TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC +>phiX3 length=1330 +GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC +TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT +TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA +TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG +TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG +AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC +CGGGCAATAACGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT +TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG +CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA +AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA +TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT +CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG +TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA +GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC +CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA +>phiX4 length=910 +AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC +TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT +CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA +TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG +TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT +TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC +ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG +TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA +ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG +GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT +GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTTTGATGAATGCAATGCGACAG +>phiX5 length=828 +GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT +ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG +CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC +CGTCTTCATTTCCATGCGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT +CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG +CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA +TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG +TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC +AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC +TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_RS_output2.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,22 @@ +@HD VN:1.0 SO:unsorted +@SQ SN:phiX174 LN:5386 UR:file:/afs/bx.psu.edu/user/kpvincent/working/phiX.fa M5:f479307bca04825e98008f37e4f6251a +@PG ID:Bowtie VN:0.12.7 CL:"bowtie -q -p 4 -S --phred33-quals /genome/phiX/bowtie_index/phiX dataset_437.dat" +HWI-EAS91_1_30788AAXX:1:1:1761:343/1 0 phiX174 5 255 50M * 0 0 TTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGA IIIIIII""IIIIIIIIIII?I0IIIIHIIIGIIIII0II?I""IIIIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1647:512/1 0 phiX174 401 255 50M * 0 0 TACTGAACAATCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCA IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1578:331/1 0 phiX174 209 255 50M * 0 0 TGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGC IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1746:351/1 0 phiX174 1218 255 50M * 0 0 CTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1582:633/1 0 phiX174 1517 255 50M * 0 0 CCGCTTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTCTACCAC IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1599:330/1 0 phiX174 803 255 50M * 0 0 AGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAGTGATGTAATGTC IIIIIII""IIIIIIIIIIIIIIIIIII<III@II"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1598:534/1 0 phiX174 1717 255 50M * 0 0 GCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAA IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1749:466/1 0 phiX174 2201 255 50M * 0 0 TTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1570:620/1 4 * 0 0 * * 0 0 GAGTAACAAAGTAAAGTTTGGACCGTTTTTGTCTCGTGCTCGTCGCTGCG IIIIIII""IIIIIIIIIIIIIBIIIIIIIIIIII"IIIIIII000IIII XM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1629:446/1 0 phiX174 2301 255 50M * 0 0 AGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1763:587/1 0 phiX174 2501 255 50M * 0 0 AAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1616:363/1 0 phiX174 2718 255 50M * 0 0 TGTTTATCCTTTGAATGGTCGCCATGATGGTGGTTATTATACCGTCAAGG IIIIIII""IIIIIIIIIIIIIIIIIIIIIIDIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1572:270/1 0 phiX174 3518 255 50M * 0 0 TGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTA IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1768:715/1 0 phiX174 3118 255 50M * 0 0 ACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTGGTGCTATGGCTAAAGCT IIIIIII""IIIIIIIIIIIIIDIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1785:1272/1 0 phiX174 3818 255 50M * 0 0 TATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCT III""""""IIIIIIIII""FI"IIII""II+ICI"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1673:514/1 0 phiX174 4018 255 50M * 0 0 ATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTATTGAGGCT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIII1"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1652:270/1 4 * 0 0 * * 0 0 AGCGTAAAGGCGCTCGTCTTTGGTATGTAGGACTTTGCATTGTTTAATTG IIIIIII""IIIIIIIIIIIIIIIIIIIIIII6II"IIIIIII000IIII XM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1572:324/1 4 * 0 0 * * 0 0 AAGGTGCTTAAATTCGTGGGTCCTGAGCTGGCGACCCTGTTTTGTATGGC IIIIIII""IIIIIIIIIIIIIIIIIIII+7I05I"IIIIIII000IIII XM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1634:330/1 0 phiX174 4418 255 50M * 0 0 GGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATG IIIIIII""IIIIIIIIIIIIIIIIIIIIIII8II"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_RS_output3.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,26 @@ +@HD VN:1.0 SO:unsorted +@SQ SN:phiX1 LN:1120 UR:picard_RS_input4.fasta AS:phiX_buildBlah1.1 M5:40156fecb557ec9a4e0e7d0d5379d346 SP:phiX174 +@SQ SN:phiX2 LN:1190 UR:picard_RS_input4.fasta AS:phiX_buildBlah1.1 M5:3ecdb3921cbd184296cefdc675595fc1 SP:phiX174 +@SQ SN:phiX3 LN:1330 UR:picard_RS_input4.fasta AS:phiX_buildBlah1.1 M5:60435b7625ee8862e4af3e839b195198 SP:phiX174 +@SQ SN:phiX4 LN:910 UR:picard_RS_input4.fasta AS:phiX_buildBlah1.1 M5:ae8509d7b91ed64a44dd718513b1fd06 SP:phiX174 +@SQ SN:phiX5 LN:828 UR:picard_RS_input4.fasta AS:phiX_buildBlah1.1 M5:459c70115963cbce5021cac2bc9dfbd1 SP:phiX174 +@PG ID:Bowtie VN:0.12.7 CL:"bowtie -q -p 4 -S --phred33-quals /tmp/tmp1E6jpl/tmpDg42o9 dataset_437.dat" +HWI-EAS91_1_30788AAXX:1:1:1570:620/1 4 * 0 0 * * 0 0 GAGTAACAAAGTAAAGTTTGGACCGTTTTTGTCTCGTGCTCGTCGCTGCG IIIIIII""IIIIIIIIIIIIIBIIIIIIIIIIII"IIIIIII000IIII XM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1572:270/1 0 phiX3 1208 255 50M * 0 0 TGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTA IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1572:324/1 4 * 0 0 * * 0 0 AAGGTGCTTAAATTCGTGGGTCCTGAGCTGGCGACCCTGTTTTGTATGGC IIIIIII""IIIIIIIIIIIIIIIIIIII+7I05I"IIIIIII000IIII XM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1578:331/1 0 phiX1 209 255 50M * 0 0 TGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGC IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1582:633/1 0 phiX2 397 255 50M * 0 0 CCGCTTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTCTACCAC IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1598:534/1 0 phiX2 597 255 50M * 0 0 GCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAA IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1599:330/1 0 phiX1 803 255 50M * 0 0 AGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAGTGATGTAATGTC IIIIIII""IIIIIIIIIIIIIIIIIII<III@II"IIIIIII000IIII XA:i:0 MD:Z:30G19 NM:i:1 +HWI-EAS91_1_30788AAXX:1:1:1616:363/1 0 phiX3 408 255 50M * 0 0 TGTTTATCCTTTGAATGGTCGCCATGATGGTGGTTATTATACCGTCAAGG IIIIIII""IIIIIIIIIIIIIIIIIIIIIIDIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1629:446/1 4 * 0 0 * * 0 0 AGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1634:330/1 0 phiX4 778 255 50M * 0 0 GGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATG IIIIIII""IIIIIIIIIIIIIIIIIIIIIII8II"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1647:512/1 0 phiX1 401 255 50M * 0 0 TACTGAACAATCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCA IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1652:270/1 4 * 0 0 * * 0 0 AGCGTAAAGGCGCTCGTCTTTGGTATGTAGGACTTTGCATTGTTTAATTG IIIIIII""IIIIIIIIIIIIIIIIIIIIIII6II"IIIIIII000IIII XM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1673:514/1 0 phiX4 378 255 50M * 0 0 ATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTATTGAGGCT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIII1"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1746:351/1 0 phiX2 98 255 50M * 0 0 CTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1749:466/1 0 phiX2 1081 255 50M * 0 0 TTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1761:343/1 0 phiX1 5 255 50M * 0 0 TTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGA IIIIIII""IIIIIIIIIII?I0IIIIHIIIGIIIII0II?I""IIIIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1763:587/1 0 phiX3 191 255 50M * 0 0 AAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGT IIIIIII""IIIIIIIIIIIIIIIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1768:715/1 0 phiX3 808 255 50M * 0 0 ACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTGGTGCTATGGCTAAAGCT IIIIIII""IIIIIIIIIIIIIDIIIIIIIIIIII"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0 +HWI-EAS91_1_30788AAXX:1:1:1785:1272/1 0 phiX4 178 255 50M * 0 0 TATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCT III""""""IIIIIIIII""FI"IIII""II+ICI"IIIIIII000IIII XA:i:0 MD:Z:50 NM:i:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_input_bait.bed Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,8 @@ +chr1 1 300 - CCDS635.1_cds_0_0_chr1_67052401_r +chr2 1 300 - CCDS635.1_cds_1_0_chr1_67060632_r +chr3 1 300 - CCDS635.1_cds_2_0_chr1_67065091_r +chr4 1 300 - CCDS635.1_cds_3_0_chr1_67066083_r +chr5 1 300 - CCDS635.1_cds_4_0_chr1_67071856_r +chr6 1 300 - CCDS635.1_cds_5_0_chr1_67072262_r +chr7 1 300 - CCDS635.1_cds_6_0_chr1_67073897_r +chr8 1 300 - CCDS635.1_cds_7_0_chr1_67075981_r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_input_hg18.trimmed.fasta Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,302 @@ +>chrM +GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTT +CGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTC +GCAGTATCTGTCTTTGATTCCTGCCTCATTCTATTATTTATCGCACCTACGTTCAATATT +ACAGGCGAACATACCTACTAAAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATA +ACAATTGAATGTCTGCACAGCCGCTTTCCACACAGACATCATAACAAAAAATTTCCACCA +AACCCCCCCCTCCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGCCAAACCCCAA +AAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAATTTTATCTTTAGGCGGTATGC +ACTTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTA +ATCTCATCAATACAACCCCCGCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCA +TACCCCGAACCAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCA +AAGCAATACACTGAAAATGTTTAGACGGGCTCACATCACCCCATAAACAAATAGGTTTGG +TCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAGCATCCCCGTTCCAGTGA +GTTCACCCTCTAAATCACCACGATCAAAAGGGACAAGCATCAAGCACGCAGCAATGCAGC +TCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAAT +>chr1 +AAACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACC +GCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCC +CCTCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAACTCCAGTTGACACAAAATAG +ACTACGAAAGTGGCTTTAACATATCTGAACACACAATAGCTAAGACCCAAACTGGGATTA +GATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAG +AACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGA +GGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCTCAGCCTATA +TACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAA +AGACGTTAGGTCAAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCC +AGAAAACTACGATAGCCCTTATGAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTG +AGAGTAGAGTGCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCC +>chr2 +TCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATATAGAGGAGACAA +GTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTAGCTTAACA +CAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACC +TAGCCCCAAACCCACTCCACCTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAAT +AAAGTATAGGCGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGA +TGAAAAATTATAACCAAGCATAATATAGCAAGGACTAACCCCTATACCTTCTGCATAATG +AATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAAGACCCCCGAAACCAGACGAG +CTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCAAAATAGTGGGAAGATTTA +TAGGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTT +>chr3 +AGTTCAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAG +TCCAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATT +TAACACCCATAGTAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACC +CACTACCTAAAAAATCCCAAACATATAACTGAACTCCTCACACCCAATTGGACCAATCTA +TCACCCTATAGAAGAACTAATGTTAGTATAAGTAACATGAAAACATTCTCCTCCGCATAA +GCCTGCGTCAGATCAAAACACTGAACTGACAATTAACAGCCCAATATCTACAATCAACCA +ACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGAAAGGTTAA +AAAAAGTAAAAGGAACTCGGCAAACCTTACCCCGCCTGTTTACCAAAAACATCACCTCTA +GCATCACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACC +CTAACCGTGCAaaggtagcataatcacttgttccttaaatagggacctgtatgaatggct +ccacgagggttcagctgtctcttacttttaaccagtgaaattgacctgcccgtgaagagg +cgggcatgacacagcaagacgagaagaccctatggagctttaatttaTTAATGCAAACAG +TACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATTAAAAATTTCGGTTGGGGC +GACCTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAGTCAAAGCG +AACTACTATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAA +CAGCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGAT +CAGGACATCCCGATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCT +ACGTGATCTGAGTTCAGACCGGAGTAATCCAGGTCGGTTTCTATCTACTTCAAATTCCTC +CCTGTACGAAAGGACAAGAGAAATAAGGCCTACTTCACAAAGCGCCTTCCCCCGTAAATG +ATATCATCTCAACTTAGTATTATACCCACACCCACCCAAGAACAGGGTTTgttaagatgg +cagagcccggtaatcgcataaaacttaaaactttacagtcagaggttcaattcctcttct +taacaacaTACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGC +ATTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAA +CGTTGTAGGCCCCTACGGGCTACTACAACCCTTCGCTGACGCCATAAAACTCTTCACCAA +AGAGCCCCTAAAACCCGCCACATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGC +TCTCACCATCGCTCTTCTACTATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCT +CAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACTCAATCCTCTG +>chr4 +ATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGCAGTAGCCCA +AACAATCTCATATGAAGTCACCCTAGCCATCATTCTACTATCAACATTACTAATAAGTGG +CTCCTTTAACCTCTCCACCCTTATCACAACACAAGAACACCTCTGATTACTCCTGCCATC +ATGACCCTTGGCCATAATATGATTTATCTCCACACTAGCAGAGACCAACCGAACCCCCTT +CGACCTTGCCGAAGGGGAGTCCGAACTAGTCTCAGGCTTCAACATCGAATACGCCGCAGG +CCCCTTCGCCCTATTCTTCATAGCCGAATACACAAACATTATTATAATAAACACCCTCAC +CACTACAATCTTCCTAGGAACAACATATGACGCACTCTCCCCTGAACTCTACACAACATA +TTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCC +CCGATTCCGCTACGACCAACTCATACACCTCCTATGAAAAAACTTCCTACCACTCACCCT +AGCATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAAC +CTAAGAAATATGTCTGATAAAAGAGTTACTTTGATAGAGTAAATAATAGGAGCTTAAACC +CCCTTATTTctaggactatgagaatcgaacccatccctgagaatccaaaattctccgtgc +cacctatcacaccccatcctaAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCATACCCC +GAAAATGTTGGTTATACCCTTCCCGTACTAATTAATCCCCTGGCCCAACCCGTCATCTAC +TCTACCATCTTTGCAGGCACACTCATCACAGCGCTAAGCTCGCACTGATTTTTTACCTGA +GTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTCTAACCAAAAAAATAAACCCT +CGTTCCACAGAAGCTGCCATCAAGTATTTCCTCACGCAAGCAACCGCATCCATAATCCTT +CTAATAGCTATCCTCTTCAACAATATACTCTCCGGACAATGAACCATAACCAATACTACC +AATCAATACTCATCATTAATAATCATAATGGCTATAGCAATAAAACTAGGAATAGCCCCC +TTTCACTTCTGAGTCCCAGAGGTTACCCAAGGCACCCCTCTGACATCCGGCCTGCTTCTT +CTCACATGACAAAAACTAGCCCCCATCTCAATCATATACCAAATCTCTCCCTCACTAAAC +GTAAGCCTTCTCCTCACTCTCTCAATCTTATCCATCATAGCAGGCAGTTGAGGTGGATTA +AACCAAACCCAGCTACGCAAAATCTTAGCATACTCCTCAATTACCCACATAGGATGAATA +ATAGCAGTTCTACCGTACAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATC +CTAACTACTACCGCATTCCTACTACTCAACTTAAACTCCAGCACCACGACCCTACTACTA +TCTCGCACCTGAAACAAGCTAACATGACTAACACCCTTAATTCCATCCACCCTCCTCTCC +CTAGGAGGCCTGCCCCCGCTAACCGGCTTTTTGCCCAAATGGGCCATTATCGAAGAATTC +ACAAAAAACAATAGCCTCATCATCCCCACCATCATAGCCACCATCACCCTCCTTAACCTC +TACTTCTACCTACGCCTAATCTACTCCACCTCAATCACACTACTCCCCATATCTAACAAC +>chr5 +GTAAAAATAAAATGACAGTTTGAACATACAAAACCCACCCCATTCCTCCCCACACTCATC +GCCCTTACCACGCTACTCCTACCTATCTCCCCTTTTATACTAATAATCTTATAGAAATTT +AGGTTAAATACAGACCAAGAGCCTTCAAAGCCCTCAGTAAGTTGCAATACTTAATTTCTG +CAACAGCTAAGGACTGCAAAACCCCACTCTGCATCAACTGAACGCAAATCAGCCACTTTA +ATTAAGCTAAGCCCTTACTAGACCAATGGGACTTAAACCCACAAACACTTAGTTAACAGC +TAAGCACCCTAATCAACTGGCTTCAATCTACTTCTCCCGCCGCCGGGAAAAAAGGCGGGA +GAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAAAATCACCT +CGGAGCTGGTAAAAAGAGGCCTAACCCCTGTCTTTAGATTTACAGTCCAATGCTTCACTC +AGCCATTTTACCTCACCCCCACTGATGTTCGCCGACCGTTGACTATTCTCTACAAACCAC +AAAGACATTGGAACACTATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCT +CTAAGCCTCCTTATTCGAGCCGAGCTGGGCCAGCCAGGCAACCTTCTAGGTAACGACCAC +ATCTACAACGTTATCGTCACAGCCCATGCATTTGTAATAATCTTCTTCATAGTAATACCC +ATCATAATCGGAGGCTTTGGCAACTGACTAGTTCCCCTAATAATCGGTGCCCCCGATATG +GCGTTTCCCCGCATAAACAACATAAGCTTCTGACTCTTACCTCCCTCTCTCCTACTCCTG +CTCGCATCTGCTATAGTGGAGGCCGGAGCAGGAACAGGTTGAACAGTCTACCCTCCCTTA +GCAGGGAACTACTCCCACCCTGGAGCCTCCGTAGACCTAACCATCTTCTCCTTACACCTA +GCAGGTGTCTCCTCTATCTTAGGGGCCATCAATTTCATCACAACAATTATCAATATAAAA +CCCCCTGCCATAACCCAATACCAAACGCCCCTCTTCGTCTGATCCGTCCTAATCACAGCA +GTCCTACTTCTCCTATCTCTCCCAGTCCTAGCTGCTGGCATCACTATACTACTAACAGAC +CGCAACCTCAACACCACCTTCTTCGACCCCGCCGGAGGAGGAGACCCCATTCTATACCAA +CACCTATTCTGATTTTTCGGTCACCCTGAAGTTTATATTCTTATCCTACCAGGCTTCGGA +ATAATCTCCCATATTGTAACTTACTACTCCGGAAAAAAAGAACCATTTGGATACATAGGT +ATGGTCTGAGCTATGATATCAATTGGCTTCCTAGGGTTTATCGTGTGAGCACACCATATA +>chr6 +TTTACAGTAGGAATAGACGTAGACACACGAGCATATTTCACCTCCGCTACCATAATCATC +GCTATCCCCACCGGCGTCAAAGTATTTAGCTGACTCGCCACACTCCACGGAAGCAATATG +AAATGATCTGCTGCAGTGCTCTGAGCCCTAGGATTCATCTTTCTTTTCACCGTAGGTGGC +CTGACTGGCATTGTATTAGCAAACTCATCACTAGACATCGTACTACACGACACGTACTAC +GTTGTAGCTCACTTCCACTATGTCCTATCAATAGGAGCTGTATTTGCCATCATAGGAGGC +TTCATTCACTGATTTCCCCTATTCTCAGGCTACACCCTAGACCAAACCTACGCCAAAATC +CATTTCACTATCATATTCATCGGCGTAAATCTAACTTTCTTCCCACAACACTTTCTCGGC +CTATCCGGAATGCCCCGACGTTACTCGGACTACCCCGATGCATACACCACATGAAACATC +CTATCATCTGTAGGCTCATTCATTTCTCTAACAGCAGTAATATTAATAATTTTCATGATT +TGAGAAGCCTTCGCTTCGAAGCGAAAAGTCCTAATAGTAGAAGAACCCTCCATAAACCTG +GAGTGACTATATGGATGCCCCCCACCCTACCACACATTCGAAGAACCCGTATACATAAAA +TCTAGACAaaaaaggaaggaatcgaaccccccaaagctggtttcaagccaaccccatggc +ctccatgactttttcAAAAAGGTATTAGAAAAACCATTTCATAACTTTGTCAAAGTTAAA +TTATAGGCTAAATCCTATATATCTTAATGGCACATGCAGCGCAAGTAGGTCTACAAGACG +CTACTTCCCCTATCATAGAAGAGCTTATCACCTTTCATGATCACGCCCTCATAATCATTT +TCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTA +ATACTAACATCTCAGACGCTCAGGAAATAGAAACCGTCTGAACTATCCTGCCCGCCATCA +TCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAACAGACGAGGTCAACG +ATCCCTCCCTTACCATCAAATCAATTGGCCACCAATGGTACTGAACCTACGAGTACACCG +ACTACGGCGGACTAATCTTCAACTCCTACATACTTCCCCCATTATTCCTAGAACCAGGCG +>chr7 +ACCTGCGACTCCTTGACGTTGACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTA +TAATAATTACATCACAAGACGTCTTGCACTCATGAGCTGTCCCCACATTAGGCTTAAAAA +CAGATGCAATTCCCGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTAT +ACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAG +AATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTTACCCTATAGCACCCCCTCT +ACCCCCTCTAGAGCCCACTGTAAAGCTAACTTAGCATTAACCTTTTAAGTTAAAGATTAA +GAGAACCAACACCTCTTTACAGTGAAATGCCCCAACTAAATACTACCGTATGGCCCACCA +TAATTACCCCCATACTCCTTACACTATTCCTCATCACCCAACTAAAAATATTAAACACAA +ACTACCACCTACCTCCCTCACCAAAGCCCATAAAAATAAAAAATTATAACAAACCCTGAG +AACCAAAATGAACGAAAATCTGTTCGCTTCATTCATTGCCCCCACAATCCTAGGCCTACC +CGCCGCAGTACTGATCATTCTATTTCCCCCTCTATTGATCCCCACCTCCAAATATCTCAT +>chr8 +CAACAACCGACTAATCACCACCCAACAATGACTAATCAAACTAACCTCAAAACAAATGAT +AGCCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCTTAATCATTTT +TATTGCCACAACTAACCTCCTCGGACTCCTGCCTCACTCATTTACACCAACCACCCAACT +ATCTATAAACCTAGCCATGGCCATCCCCTTATGAGCGGGCGCAGTGATTATAGGCTTTCG +CTCTAAGATTAAAAATGCCCTAGCCCACTTCTTACCACAAGGCACACCTACACCCCTTAT +CCCCATACTAGTTATTATCGAAACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGT +ACGCCTAACCGCTAACATTACTGCAGGCCACCTACTCATGCACCTAATTGGAAGCGCCAC +CCTAGCAATATCAACCATTAACCTTCCCTCTACACTTATCATCTTCACAATTCTAATTCT +ACTGACTATCCTAGAAATCGCTGTCGCCTTAATCCAAGCCTACGTTTTCACACTTCTAGT +AAGCCTCTACCTGCACGACAACACATAATGACCCACCAATCACATGCCTATCATATAGTA +AAACCCAGCCCATGACCCCTAACAGGGGCCCTCTCAGCCCTCCTAATGACCTCCGGCCTA +GCCATGTGATTTCACTTCCACTCCATAACGCTCCTCATACTAGGCCTACTAACCAACACA +CTAACCATATACCAATGGTGGCGCGATGTAACACGAGAAAGCACATACCAAGGCCACCAC +ACACCACCTGTCCAAAAAGGCCTTCGATACGGGATAATCCTATTTATTACCTCAGAAGTT +TTTTTCTTCGCAGGATTTTTCTGAGCCTTTTACCACTCCAGCCTAGCCCCTACCCCCCAA +CTAGGAGGGCACTGGCCCCCAACAGGCATCACCCCGCTAAATCCCCTAGAAGTCCCACTC +CTAAACACATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCACCATAGTCTA +ATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTATTACAATTTTACTGGGTCTC +TATTTTACCCTCCTACAAGCCTCAGAGTACTTCGAGTCTCCCTTCACCATTTCCGACGGC +>chr9 +ATCTACGGCTCAACATTTTTTGTAGCCACAGGCTTCCACGGACTTCACGTCATTATTGGC +TCAACTTTCCTCACTATCTGCTTCATCCGCCAACTAATATTTCACTTTACATCCAAACAT +CACTTTGGCTTCGAAGCCGCCGCCTGATACTGGCATTTTGTAGATGTGGTTTGACTATTT +CTGTATGTCTCCATCTATTGATGAGGGTCTTACTCTTTTAGTATAAATAGTACCGTTAAC +TTCCAATTAACTAGTTTTGACAACATTCAAAAAAGAGTAATAAACTTCGCCTTAATTTTA +ATAATCAACACCCTCCTAGCCTTACTACTAATAATTATTACATTTTGACTACCACAACTC +AACGGCTACATAGAAAAATCCACCCCTTACGAGTGCGGCTTCGACCCTATATCCCCCGCC +CGCGTCCCTTTCTCCATAAAATTCTTCTTAGTAGCTATTACCTTCTTATTATTTGATCTA +GAAATTGCCCTCCTTTTACCCCTACCATGAGCCCTACAAACAACTAACCTGCCACTAATA +GTTATGTCATCCCTCTTATTAATCATCATCCTAGCCCTAAGTCTGGCCTATGAGTGACTA +CAAAAAGGATTAGACTGAGCCGAATTGGTATATAGTTTAAACAAAACGAATGATTTCGAC +TCATTAAATTATGATAATCATATTTACCAAATGCCCCTCATTTACATAAATATTATACTA +GCATTTACCATCTCACTTCTAGGAATACTAGTATATCGCTCACACCTCATATCCTCCCTA +CTATGCCTAGAAGGAATAATACTATCGCTGTTCATTATAGCTACTCTCATAACCCTCAAC +ACCCACTCCCTCTTAGCCAATATTGTGCCTATTGCCATACTAGTCTTTGCCGCCTGCGAA +GCAGCGGTGGGCCTAGCCCTACTAGTCTCAATCTCCAACACATATGGCCTAGACTACGTA +CATAACCTAAACCTACTCCAATGCTAAAACTAATCGTCCCAACAATTATATTACTACCAC +TGACATGACTTTCCAAAAAGCACATAATTTGAATCAACACAACCACCCACAGCCTAATTA +>chr10 +TTAGCATCATCCCCCTACTATTTTTTAACCAAATCAACAACAACCTATTTAGCTGTTCCC +CAACCTTTTCCTCCGACCCCCTAACAACCCCCCTCCTAATACTAACTACCTGACTCCTAC +CCCTCACAATCATGGCAAGCCAACGCCACTTATCCAGCGAACCACTATCACGAAAAAAAC +TCTACCTCTCTATACTAATCTCCCTACAAATCTCCTTAATTATAACATTCACAGCCACAG +AACTAATCATATTTTATATCTTCTTCGAAACCACACTTATCCCCACCTTGGCTATCATCA +CCCGATGAGGCAACCAGCCAGAACGCCTGAACGCAGGCACATACTTCCTATTCTACACCC +TAGTAGGCTCCCTTCCCCTACTCATCGCACTAATTTACACTCACAACACCCTAGGCTCAC +TAAACATTCTACTACTCACTCTCACTGCCCAAGAACTATCAAACTCCTGAGCCAACAACT +TAATATGACTAGCTTACACAATAGCTTTTATAGTAAAGATACCTCTTTACGGACTCCACT +TATGACTCCCTAAAGCCCATGTCGAAGCCCCCATCGCTGGGTCAATAGTACTTGCCGCAG +TACTCTTAAAACTAGGCGGCTATGGTATAATACGCCTCACACTCATTCTCAACCCCCTGA +CAAAACACATAGCCTACCCCTTCCTTGTACTATCCCTATGAGGCATAATTATAACAAGCT +CCATCTGCCTACGACAAACAGACCTAAAATCGCTCATTGCATACTCTTCAATCAGCCACA +TAGCCCTCGTAGTAACAGCCATTCTCATCCAAACCCCCTGAAGCTTCACCGGCGCAGTCA +TTCTCATAATCGCCCACGGACTCACATCCTCATTACTATTCTGCCTAGCAAACTCAAACT +ACGAACGCACTCACAGTCGCATCATAATCCTCTCTCAAGGACTTCAAACTCTACTCCCAC +TAATAGCTTTTTGATGACTTCTAGCAAGCCTCGCTAACCTCGCCTTACCCCCCACTATTA +ACCTACTGGGAGAACTCTCTGTGCTAGTAACCACGTTCTCCTGATCAAATATCACTCTCC +TACTTACAGGACTCAACATACTAGTCACAGCCCTATACTCCCTCTACATATTTACCACAA +CACAATGGGGCTCACTCACCCACCACATTAACAACATAAAACCCTCATTCACACGAGAAA +ACACCCTCATGTTCATACACCTATCCCCCATTCTCCTCCTATCCCTCAACCCCGACATCA +TTACCGGGTTTTCCTCTTGTAAATATAGTTTAACCAAAACATCAGATTGTGAATCTGACA +ACAGAGGCTTACGACCCCTTATTTACCGAGAAAGCTCACAAGAACTGCTAACTCATGCCC +CCATGTCTAACAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTA +GGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACCATGCACACTACTATAAC +CACCCTAACCCTGACTTCCCTAATTCCCCCCATCCTTACCACCCTCGTTAACCCTAACAA +AAAAAACTCATACCCCCATTATGTAAAATCCATTGTCGCATCCACCTTTATTATCAGTCT +CTTCCCCACAACAATATTCATGTGCCTAGACCAAGAAGTTATTATCTCGAACTGACACTG +AGCCACAACCCAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAAT +ATTCATCCCTGTAGCATTGTTCGTTACATGGTCCATCATAGAATTCTCACTGTGATATAT +AAACTCAGACCCAAACATTAATCAGTTCTTCAAATATCTACTCATTTTCCTAATTACCAT +>chr11 +ACTAATCTTAGTTACCGCTAACAACCTATTCCAACTGTTCATCGGCTGAGAGGGCGTAGG +AATTATATCCTTCTTGCTCATCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGC +CATTCAAGCAGTCCTATACAACCGTATCGGCGATATCGGTTTCATCCTCGCCTTAGCATG +ATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAACGCTAATCC +AAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCAGGCAAATCAGCCCAATTAGG +TCTCCACCCCTGACTCCCCTCAGCCATAGAAGGCCCCACCCCAGTCTCAGCCCTACTCCA +CTCAAGCACTATAGTTGTAGCAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGA +AAATAGCCCACTAATCCAAACTCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGC +AGCAGTCTGCGCCCTTACACAAAATGACATCAAAAAAATCGTAGCCTTCTCCACTTCAAG +TCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAACCACACCTAGCATTCCTGCA +CATCTGTACCCACGCCTTCTTCAAAGCCATACTATTTATGTGCTCCGGGTCCATCATCCA +CAACCTTAACAATGAACAAGATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCT +CACTTCAACCTCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGG +TTTCTACTCCAAAGACCACATCATCGAAACCGCAAACATATCATACACAAACGCCTGAGC +CCTATCTATTACTCTCATCGCTACCTCCCTGACAAGCGCCTATAGCACTCGAATAATTCT +TCTCACCCTAACAGGTCAACCTCGCTTCCCCACCCTTACTAACATTAACGAAAATAACCC +CACCCTACTAAACCCCATTAAACGCCTGGCAGCCGGAAGCCTATTCGCAGGATTTCTCAT +TACTAACAACATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTACCTAAAACT +>chr12 +CACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCAACTACCTAAC +CAACAAACTTAAAATAAAATCCCCACTATGCACATTTTATTTCTCCAACATACTCGGATT +CTACCCTAGCATCACACACCGCACAATCCCCTATCTAGGCCTTCTTACGAGCCAAAACCT +GCCCCTACTCCTCCTAGACCTAACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACA +GCACCAAATCTCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTT +CCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAACCTATTCCCCC +GAGCAATCTCAATTACAATATATACACCAACAAACAATGTTCAACCAGTAACCACTACTA +ATCAACGCCCATAATCATACAAAGCCCCCGCACCAATAGGATCCTCCCGAATCAACCCTG +ACCCCTCTCCTTCATAAATTATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCA +CCCCATCATACTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAA +CACTCACCAAGACCTCAACCCCTGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCG +CTGTAGTATATCCAAAGACAACCATCATTCCCCCTAAATAAATTAAAAAAACTATTAAAC +CCATATAACCTCCCCCAAAATTCAGAATAATAACACACCCGACCACACCGCTAACAATCA +GTACTAAACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTA +AACCCACACTCAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGA +CCAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATGACCCCAATAC +GCAAAATTAACCCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCAT +CCAACATCTCCGCATGATGAAACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAA +TCACCACAGGACTATTCCTAGCCATACACTACTCACCAGACGCCTCAACCGCCTTTTCAT +CAATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCA +ATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCCTATATTACG +GATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAG +CAACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAG +TAATTACAAACTTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCT +GAGGAGGCTACTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCT +TACCCTTCATTATTGCAGCCCTAGCAGCACTCCACCTCCTATTCTTGCACGAAACGGGAT +CAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATCACCTTCCACCCTTACTACA +CAATCAAAGACGCCCTCGGCTTACTTCTCTTCCTTCTCTCCTTAATGACATTAACACTAT +TCTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCC +CTCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCC +CTAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCC +CCATCCTCCATATATCCAAACAACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTT +ATTGACTCCTAGCCGCAGACCTCCTCATTCTAACCTGAATCGGAGGACAACCAGTAAGCT +>chr13 +ACCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATACTTCACAACAATCCTAATCC +TAATACCAACTATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTCCTTGTAGTA +TAAACTAATACACCAGTCTTGTAAACCGGAGACGAAAACCTTTTTCCAAGGACAAATCAG +AGAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTC +TCTGTTCTTTCATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTCACCCATCAAC +AACCGCTATGTATTTCGTACATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAA +TACTTGACCACCTGTAGTACATAAAAACCCAACCCACATCAAACCCCCCCCCCCCATGCT +TACAAGCAAGTACAGCAATCAACCTTCAACTATCACACATCAACTGCAACTCCAAAGCCA +CCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAGTACATAAA +GTCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCC +CCTCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTG +CTACTCTCCTCGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGAC +ATCTGGTTCCTACTTCAGGGCCATAAAGCCTAAATAGCCCACACGTTCCCCTTAAATAAG +ACATCACGATG +>chr20 +NNNNNNNNNNNNNNNNNNNNGATCCAgaggtggaagaggaaggaagcttggaaccctata +gagttgctgagtgccaggaccagatcctggccctaaacaggtggtaaggaaggagagagt +gaaggaactgccaggtgacacactcccaccatggacctctgggatcctagctttaagaga +tcccatcacccacatgaacgtttgaattgacagggggagctgcctggagagtaggcagat +gcagagctcaagcctgtgcagagcccaggttttgtgagtgggacagttgcagcaaaacac +aaccataggtgcccatccaccaaggcaggctctccatcttgctcagagtggctctagccc +ttgctgactgctgggcagggagagagcagagctaacttcctcatgggacctgggtgtgtc +tgatctgtgcacaccactatccaaccgatcccgaggctccaccctggccactcttgtgtg +cacacagcacagcctctactgctacacctgagtactttgccagtggcctggaagcacttt +gtcccccctggcacaaatggtgctggaccacgaggggccagagaacaaagccttgggcgt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_input_sorted_pair.bam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,10 @@ +bar:record:1 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:1 141 chr1 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:2 77 chr2 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:2 141 chr2 30 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:3 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:3 141 chr3 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:4 77 chr1 1 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:4 141 chr1 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:5 77 chr1 40 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:5 141 chr3 40 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_input_sorted_pair.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,15 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:10001 +@SQ SN:chr2 LN:10001 +@SQ SN:chr3 LN:10001 +@RG ID:rg1 SM:Z +bar:record:1 77 chr1 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:1 141 chr1 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:2 77 chr1 40 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:2 141 chr1 50 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:3 77 chr2 10 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:3 141 chr2 20 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:4 77 chr2 50 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:4 141 chr2 60 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:5 77 chr3 40 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:5 141 chr3 50 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_input_summary_alignment_stats.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,28 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:255 +@SQ SN:chr2 LN:255 +@SQ SN:chr3 LN:255 +@SQ SN:chr4 LN:255 +@SQ SN:chr5 LN:255 +@SQ SN:chr6 LN:255 +@SQ SN:chr7 LN:255 +@SQ SN:chr8 LN:255 +@RG ID:0 SM:Hi,Mom! +SL-XAV:1:1:0:764#0/1 89 chr1 1 255 101M * 0 0 TTCATGCTGANGCNCTCTTACGATCGTACAGATGCAAATATTAACANNCNTTNAAGNNCANNNNNNNNNCAATACAATANTAGAGTACGTNAACACTCCAN &/,&-.1/6/&&)&).)/,&0768)&/.,/874,&.4137572)&/&&,&1-&.0/&&*,&&&&&&&&&&18775799,&16:8775-56256/69::;0& RG:Z:0 XN:i:1 +SL-XAV:1:1:0:1668#0/2 153 chr2 1 255 101M * 0 0 CATCTCTACANGCGCGTCCTACCAGACGCGCTTCCGATCTGAGAGCATACTTTTCATTGGATTCCAGCACAACTCCATTTTTGATCCACTNGACACCTTTN (/,'-/'0////(1'&&1&&&&'2''-6/,/3-33653.6:1'.86/-++32.-4864653/5/583/346423203+28888644446688456/4880& RG:Z:0 XN:i:1 +SL-XAV:1:1:0:1914#0/2 153 chr3 1 255 101M * 0 0 CGTATGCGCTNTTTATGTCGCCCACAGTGCCTAGTATAGCCCCTGCTAATAAAAAGAGATGAATACGTTTACTTAAAAAACTGAAACTAGNAATGTGCAAN (0,7&&*/*0*,)10/).-*&.&*/6669.&-337599;3,&,6/.,5::999987893+387020775777547999::668997448:::9;999::0& RG:Z:0 +SL-XAV:1:1:0:1639#0/2 153 chr4 1 255 101M * 0 0 CGTGATACCANCTCATGTTCACAGCCAAAGCCTGAAGCTGTCTATTATATTTCTCAACCATAAACTTTTGCCTCAGGCATCCGCAGAATGNTTTGCAGCCN '.&.&&'.0+01'2(1'(''-)','+0041/.+032;:867115/5267-.0/)-5.&-26200224,,0+0/0275/5605688::646875568882*& RG:Z:0 +SL-XAV:1:1:0:68#0/2 137 chr5 1 255 101M * 0 0 NTCTCATTTANAAATGGTTATAAAAACATTTATGCTGAAAAGGTGAAGTTCATTAATGAACAGGCTGACTGTCTCACTATCGCGTTCGCANGACGTTATCT &1<<999;;;;<<<87579:556972789977444.'.023.&,7621/54.49.)/53055-22--''+(.'-))6-168/(3&&0(<).))*&&&&&'0 RG:Z:0 +SL-XAV:1:1:0:700#0/2 137 chr6 1 255 101M * 0 0 NAATTGTTCTNAGTTTCTCGGTTTATGTGCTCTTCCAGGTGGGTAACACAATAATGGCCTTCCAGATCGTAAGAGCGACGTGTGTTGCACNAGTGTCGATC &0::887::::6/646::838388811/679:87640&./2+/-4/28:3,536/4''&&.78/(/554/./02*)*',-(57()&.6(6:(0601'/(,* RG:Z:0 +SL-XAV:1:1:0:1721#0/1 83 chr7 1 255 101M = 102 40 CAACAGAAGGNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCGAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +SL-XAV:1:1:0:1721#0/2 163 chr7 102 255 101M = 1 -40 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTCACTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +SL-XAV:1:1:0:105#0/2 147 chr8 1 255 101M = 102 79 CACATCGTGANTCTTACAATCTGCGGTTTCAGATGTGGAGCGATGTGTGAGAGATTGAGCAACTGATCTGAAAAGCAGACACAGCTATTCNTAAGATGACN /))3--/&*()&)&&+'++.'-&,(.))'4,)&'&&,')8,&&*'.&*0'225/&)3-8//)*,5-*).7851453583.3568526:863688:::85.& RG:Z:0 +SL-XAV:1:1:0:105#0/1 99 chr8 102 255 101M = 1 -79 NCAGGTTCAANTGTGCAGCCCNTTTTGAGAGATNNNNNNNNTGNNCTGNAANANNGACACAGCTATTCCTAAGATGACAAGATCAGANAANAAGTCAAGCA &06665578::41.*/7577/&/77403-324.&&&&&&&&/.&&..&&.0&&&&',:9:/-/(55002020+3'12+2/&.2-&//&),&*&&&&&&&51 RG:Z:0 +SL-XAV:1:1:0:1300#0/1 77 * 0 0 * * 0 0 NAAACACAAGNNANAGTCTTANCNGCTATTCCNNNNNNNNNCTNNNCTNAGNANNACATACAACAGTATCCACACAAGTGTACTCGTNCANACATGTGAAC &*5535)*-,,&.&.*-1)*,&'&)&1&&.,)&&&&&&&&&)0&&&0'&&&&.&&*2'/4''0/**&)&,'-&*,&,&&&.0.&)&&&**&,.&&&')&&) RG:Z:0 +SL-XAV:1:1:0:1300#0/2 141 * 0 0 * * 0 0 NGATCATGGANGACTCTCCCCATCCCCCGCTCCAGCGCTCAGTTATATGCCTAGCCTCGGACACGTCACCAACATCTCACGCACTCTGCANAGTCTCTCAC &&'+''3*&-/)/1'26/*-2-/542-*&-&/'/*/&-'&)-')&.'-/&&2+122*'&+,(/-&)((,/-,,.'2(2'+)/&/&-66-&&/16&)&*&'3 RG:Z:0 +SL-XAV:1:1:0:1639#0/1 101 * 0 0 * chr1 1 0 NCCCTCTCAGNNTNTCTGCCANANCCTTAAGCNNNNNNNNNTANNNCTNAANCNNAAACTTTTGCCTCAGGCATCCGCAGAATGTTTNTCNGCCTATATCG &1::::::64/&/&0:3.280&/&087881,/&&&&&&&&&..&&&..&,,&-&&,265341-)/5680&-.5552-25/322/42/&)&&).421&-&-/ RG:Z:0 +SL-XAV:1:1:0:1668#0/1 101 * 0 0 * chr2 1 0 NATAGCATACNNTNCATTGGANTNCAGCACAANNNNNNNNNTGNNNCANTNNANNCCTTTGAGATCGGAAGAGCGGTTCAGCAGGAANNCNCAGACCGATC &1988998890&0&.8863//&.&.0-2875.&&&&&&&&&.)&&&..&.&&.&&.5782-2+262)&-0-0510*.332-2.-,0*&&*&'.&-2-)0., RG:Z:0 +SL-XAV:1:1:0:1914#0/1 101 * 0 0 * chr3 1 0 NTTTTTCTCCNNCNGTGCCTANTNTAGCCCCTNNNNNNNNNAANNNATNANNANNTTTACTTAAAAAACTGAAACTAGTAATGTGCANNANATCGNAAGAG &0::::<<;90&/&.244760&,&.414798/&&&&&&&&&00&&&0.&/&&-&&.4475687363504.&.557/.*)65.&/*./&&.&.+*)&..).& RG:Z:0 +SL-XAV:1:1:0:68#0/1 581 * 0 0 * chr4 1 0 NAATATTCATNNGNTCAGCCTNTNCATTAATTNNNNNNNNNTTNNNATNATNANNTTTTTTATAACCATTTATAAATGAGAGAGATCNTANCACAATATCA &0<<:::::</&&&.73'290&.&0;:::90&&&&&&&&&&..&&&0)&0-&0&&&.743799995253348597921.,.'050.*&.0&)*)&&&&*). RG:Z:0 +SL-XAV:1:1:0:700#0/1 581 * 0 0 * chr5 1 0 NGAAGCCCATNNTNGTGTTACNCNCCTGGAAGNNNNNNNNNACNNNGANACNTNNAACAATTCAGATCGGAAGAGCGGTTCAGCAGANNTNCCGAGACCGA &.88888:88/&0&,03189.&/&.8/))12/&&&&&&&&&./&&&&.&1.&)&&/35962/6432-3&),0&/2+0,),61&-6,&&&'&/,.0&...)0 RG:Z:0 +SL-XAV:1:1:0:764#0/2 165 * 0 0 * chr6 1 0 NACAGATGCANATATTAACAGGCTTTAAAGGACAGATGGACTGCAATACAATAATAGAGTACGTCAACACTCCACAGATCGCTAGAGCATNACATCGGTGT &/:5358::9999::99998255::7275,,/5567-'+387537857:54-4.51'31059547320;73/720+22.4(6.;((.;(;8()(''&&2&& RG:Z:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_input_tiny.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,14 @@ +@HD VN:1.0 SO:queryname +@SQ SN:chr1 LN:101 +@SQ SN:chr7 LN:404 +@SQ SN:chr8 LN:202 +@RG ID:0 SM:Hi,Mom! +@PG ID:1 PN:Hey! VN:2.0 +both_reads_align_clip_adapter 99 chr7 21 255 101M = 16 96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 147 chr7 16 255 101M = 21 -96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_marked 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_present_only_first_aligns 165 * 0 0 * chr7 1 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +read_2_too_many_gaps 163 chr7 302 255 10M1D10M5I76M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_input_tiny_coord.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,14 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:101 +@SQ SN:chr7 LN:404 +@SQ SN:chr8 LN:202 +@RG ID:0 SM:Hi,Mom! +@PG ID:1 PN:Hey! VN:2.0 +both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 147 chr7 16 255 101M = 21 -96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_adapter 99 chr7 21 255 101M = 16 96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +both_reads_align_clip_marked 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +read_2_too_many_gaps 163 chr7 302 255 10M1D10M5I76M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +both_reads_present_only_first_aligns 165 * 0 0 * chr7 1 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_output_AsMetrics_indexed_hg18_sorted_pair.html Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,57 @@ +<style type="text/css"> + tr.d0 td {background-color: oldlace; color: black;} + tr.d1 td {background-color: aliceblue; color: black;} + </style><?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy picard_wrapper tool output - see http://getgalaxy.org/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +Galaxy tool CollectAlignmentSummaryMetrics run at 11/11/2011 08:07:27</b><br/><b>The following output files were created (click the filename to view/download a copy):</b><hr/><table> +<tr><td><a href="CollectAlignmentSummaryMetrics.log">CollectAlignmentSummaryMetrics.log</a></td></tr> +<tr><td><a href="CollectAlignmentSummaryMetrics.metrics.txt">CollectAlignmentSummaryMetrics.metrics.txt</a></td></tr> +</table><p/> +<b>Picard on line resources</b><ul> +<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li> +<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/> +<b>Picard output (transposed to make it easier to see)</b><hr/> +<table cellpadding="3" > +<tr class="d0"><td colspan="2">## net.sf.picard.metrics.StringHeader</td></tr><tr class="d1"><td colspan="2"># net.sf.picard.analysis.CollectAlignmentSummaryMetrics MAX_INSERT_SIZE=100000 ADAPTER_SEQUENCE=[AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG, IS_BISULFITE_SEQUENCED=false] INPUT=/data/tmp/tmpLLcl1w/database/files/000/dataset_4.dat OUTPUT=/data/home/rlazarus/galaxy/database/job_working_directory/5/dataset_5_files/CollectAlignmentSummaryMetrics.metrics.txt REFERENCE_SEQUENCE=/data/home/rlazarus/galaxy/database/job_working_directory/5/dataset_5_files/hg19.fa_fake.fasta ASSUME_SORTED=true TMP_DIR=[/tmp] VALIDATION_STRINGENCY=LENIENT METRIC_ACCUMULATION_LEVEL=[ALL_READS] IS_BISULFITE_SEQUENCED=false STOP_AFTER=0 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false</td></tr><tr class="d0"><td colspan="2">## net.sf.picard.metrics.StringHeader</td></tr><tr class="d1"><td colspan="2"># Started on: Fri Nov 11 08:07:22 EST 2011</td></tr><tr class="d0"><td colspan="2">## METRICS CLASS net.sf.picard.analysis.AlignmentSummaryMetrics</td></tr><tr class="d0"><td>CATEGORY</td><td>FIRST_OF_PAIR </td></tr> +<tr class="d1"><td>TOTAL_READS</td><td>5 </td></tr> +<tr class="d0"><td>PF_READS</td><td>5 </td></tr> +<tr class="d1"><td>PCT_PF_READS</td><td>1 </td></tr> +<tr class="d0"><td>PF_NOISE_READS</td><td>0 </td></tr> +<tr class="d1"><td>PF_READS_ALIGNED</td><td>0 </td></tr> +<tr class="d0"><td>PCT_PF_READS_ALIGNED</td><td>0 </td></tr> +<tr class="d1"><td>PF_ALIGNED_BASES</td><td>0 </td></tr> +<tr class="d0"><td>PF_HQ_ALIGNED_READS</td><td>0 </td></tr> +<tr class="d1"><td>PF_HQ_ALIGNED_BASES</td><td>0 </td></tr> +<tr class="d0"><td>PF_HQ_ALIGNED_Q20_BASES</td><td>0 </td></tr> +<tr class="d1"><td>PF_HQ_MEDIAN_MISMATCHES</td><td>0 </td></tr> +<tr class="d0"><td>PF_MISMATCH_RATE</td><td>0 </td></tr> +<tr class="d1"><td>PF_HQ_ERROR_RATE</td><td>0 </td></tr> +<tr class="d0"><td>PF_INDEL_RATE</td><td>0 </td></tr> +<tr class="d1"><td>MEAN_READ_LENGTH</td><td>13 </td></tr> +<tr class="d0"><td>READS_ALIGNED_IN_PAIRS</td><td>0 </td></tr> +<tr class="d1"><td>PCT_READS_ALIGNED_IN_PAIRS</td><td>0 </td></tr> +<tr class="d0"><td>BAD_CYCLES</td><td>0 </td></tr> +<tr class="d1"><td>STRAND_BALANCE</td><td>0 </td></tr> +<tr class="d0"><td>PCT_CHIMERAS</td><td>0 </td></tr> +<tr class="d1"><td>PCT_ADAPTER</td><td>0 </td></tr> +<tr class="d0"><td>SAMPLE</td><td> </td></tr> +<tr class="d1"><td>LIBRARY</td><td> </td></tr> +<tr class="d0"><td>READ_GROUP +</td><td> + </td></tr> +</table> +<b>Picard Tool Run Log</b><hr/> +<pre>INFO:root:## executing java -Xmx4g -jar /data/home/rlazarus/galaxy/tool-data/shared/jars/picard/CollectAlignmentSummaryMetrics.jar VALIDATION_STRINGENCY=LENIENT ASSUME_SORTED=true ADAPTER_SEQUENCE= IS_BISULFITE_SEQUENCED=false MAX_INSERT_SIZE=100000 OUTPUT=/data/home/rlazarus/galaxy/database/job_working_directory/5/dataset_5_files/CollectAlignmentSummaryMetrics.metrics.txt R=/data/home/rlazarus/galaxy/database/job_working_directory/5/dataset_5_files/hg19.fa_fake.fasta TMP_DIR=/tmp INPUT=/data/tmp/tmpLLcl1w/database/files/000/dataset_4.dat returned status 0 and nothing on stderr + +</pre><hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> +generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool</div></body></html> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_output_GcBias_uploaded_hg18_summary_alignment_stats.html Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,28 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:255 +@SQ SN:chr2 LN:255 +@SQ SN:chr3 LN:255 +@SQ SN:chr4 LN:255 +@SQ SN:chr5 LN:255 +@SQ SN:chr6 LN:255 +@SQ SN:chr7 LN:255 +@SQ SN:chr8 LN:255 +@RG ID:0 SM:Hi,Mom! +SL-XAV:1:1:0:764#0/1 89 chr1 1 255 101M * 0 0 TTCATGCTGANGCNCTCTTACGATCGTACAGATGCAAATATTAACANNCNTTNAAGNNCANNNNNNNNNCAATACAATANTAGAGTACGTNAACACTCCAN &/,&-.1/6/&&)&).)/,&0768)&/.,/874,&.4137572)&/&&,&1-&.0/&&*,&&&&&&&&&&18775799,&16:8775-56256/69::;0& RG:Z:0 XN:i:1 +SL-XAV:1:1:0:1668#0/2 153 chr2 1 255 101M * 0 0 CATCTCTACANGCGCGTCCTACCAGACGCGCTTCCGATCTGAGAGCATACTTTTCATTGGATTCCAGCACAACTCCATTTTTGATCCACTNGACACCTTTN (/,'-/'0////(1'&&1&&&&'2''-6/,/3-33653.6:1'.86/-++32.-4864653/5/583/346423203+28888644446688456/4880& RG:Z:0 XN:i:1 +SL-XAV:1:1:0:1914#0/2 153 chr3 1 255 101M * 0 0 CGTATGCGCTNTTTATGTCGCCCACAGTGCCTAGTATAGCCCCTGCTAATAAAAAGAGATGAATACGTTTACTTAAAAAACTGAAACTAGNAATGTGCAAN (0,7&&*/*0*,)10/).-*&.&*/6669.&-337599;3,&,6/.,5::999987893+387020775777547999::668997448:::9;999::0& RG:Z:0 +SL-XAV:1:1:0:1639#0/2 153 chr4 1 255 101M * 0 0 CGTGATACCANCTCATGTTCACAGCCAAAGCCTGAAGCTGTCTATTATATTTCTCAACCATAAACTTTTGCCTCAGGCATCCGCAGAATGNTTTGCAGCCN '.&.&&'.0+01'2(1'(''-)','+0041/.+032;:867115/5267-.0/)-5.&-26200224,,0+0/0275/5605688::646875568882*& RG:Z:0 +SL-XAV:1:1:0:68#0/2 137 chr5 1 255 101M * 0 0 NTCTCATTTANAAATGGTTATAAAAACATTTATGCTGAAAAGGTGAAGTTCATTAATGAACAGGCTGACTGTCTCACTATCGCGTTCGCANGACGTTATCT &1<<999;;;;<<<87579:556972789977444.'.023.&,7621/54.49.)/53055-22--''+(.'-))6-168/(3&&0(<).))*&&&&&'0 RG:Z:0 +SL-XAV:1:1:0:700#0/2 137 chr6 1 255 101M * 0 0 NAATTGTTCTNAGTTTCTCGGTTTATGTGCTCTTCCAGGTGGGTAACACAATAATGGCCTTCCAGATCGTAAGAGCGACGTGTGTTGCACNAGTGTCGATC &0::887::::6/646::838388811/679:87640&./2+/-4/28:3,536/4''&&.78/(/554/./02*)*',-(57()&.6(6:(0601'/(,* RG:Z:0 +SL-XAV:1:1:0:1721#0/1 83 chr7 1 255 101M = 102 40 CAACAGAAGGNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCGAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +SL-XAV:1:1:0:1721#0/2 163 chr7 102 255 101M = 1 -40 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTCACTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +SL-XAV:1:1:0:105#0/2 147 chr8 1 255 101M = 102 79 CACATCGTGANTCTTACAATCTGCGGTTTCAGATGTGGAGCGATGTGTGAGAGATTGAGCAACTGATCTGAAAAGCAGACACAGCTATTCNTAAGATGACN /))3--/&*()&)&&+'++.'-&,(.))'4,)&'&&,')8,&&*'.&*0'225/&)3-8//)*,5-*).7851453583.3568526:863688:::85.& RG:Z:0 +SL-XAV:1:1:0:105#0/1 99 chr8 102 255 101M = 1 -79 NCAGGTTCAANTGTGCAGCCCNTTTTGAGAGATNNNNNNNNTGNNCTGNAANANNGACACAGCTATTCCTAAGATGACAAGATCAGANAANAAGTCAAGCA &06665578::41.*/7577/&/77403-324.&&&&&&&&/.&&..&&.0&&&&',:9:/-/(55002020+3'12+2/&.2-&//&),&*&&&&&&&51 RG:Z:0 +SL-XAV:1:1:0:1300#0/1 77 * 0 0 * * 0 0 NAAACACAAGNNANAGTCTTANCNGCTATTCCNNNNNNNNNCTNNNCTNAGNANNACATACAACAGTATCCACACAAGTGTACTCGTNCANACATGTGAAC &*5535)*-,,&.&.*-1)*,&'&)&1&&.,)&&&&&&&&&)0&&&0'&&&&.&&*2'/4''0/**&)&,'-&*,&,&&&.0.&)&&&**&,.&&&')&&) RG:Z:0 +SL-XAV:1:1:0:1300#0/2 141 * 0 0 * * 0 0 NGATCATGGANGACTCTCCCCATCCCCCGCTCCAGCGCTCAGTTATATGCCTAGCCTCGGACACGTCACCAACATCTCACGCACTCTGCANAGTCTCTCAC &&'+''3*&-/)/1'26/*-2-/542-*&-&/'/*/&-'&)-')&.'-/&&2+122*'&+,(/-&)((,/-,,.'2(2'+)/&/&-66-&&/16&)&*&'3 RG:Z:0 +SL-XAV:1:1:0:1639#0/1 101 * 0 0 * chr1 1 0 NCCCTCTCAGNNTNTCTGCCANANCCTTAAGCNNNNNNNNNTANNNCTNAANCNNAAACTTTTGCCTCAGGCATCCGCAGAATGTTTNTCNGCCTATATCG &1::::::64/&/&0:3.280&/&087881,/&&&&&&&&&..&&&..&,,&-&&,265341-)/5680&-.5552-25/322/42/&)&&).421&-&-/ RG:Z:0 +SL-XAV:1:1:0:1668#0/1 101 * 0 0 * chr2 1 0 NATAGCATACNNTNCATTGGANTNCAGCACAANNNNNNNNNTGNNNCANTNNANNCCTTTGAGATCGGAAGAGCGGTTCAGCAGGAANNCNCAGACCGATC &1988998890&0&.8863//&.&.0-2875.&&&&&&&&&.)&&&..&.&&.&&.5782-2+262)&-0-0510*.332-2.-,0*&&*&'.&-2-)0., RG:Z:0 +SL-XAV:1:1:0:1914#0/1 101 * 0 0 * chr3 1 0 NTTTTTCTCCNNCNGTGCCTANTNTAGCCCCTNNNNNNNNNAANNNATNANNANNTTTACTTAAAAAACTGAAACTAGTAATGTGCANNANATCGNAAGAG &0::::<<;90&/&.244760&,&.414798/&&&&&&&&&00&&&0.&/&&-&&.4475687363504.&.557/.*)65.&/*./&&.&.+*)&..).& RG:Z:0 +SL-XAV:1:1:0:68#0/1 581 * 0 0 * chr4 1 0 NAATATTCATNNGNTCAGCCTNTNCATTAATTNNNNNNNNNTTNNNATNATNANNTTTTTTATAACCATTTATAAATGAGAGAGATCNTANCACAATATCA &0<<:::::</&&&.73'290&.&0;:::90&&&&&&&&&&..&&&0)&0-&0&&&.743799995253348597921.,.'050.*&.0&)*)&&&&*). RG:Z:0 +SL-XAV:1:1:0:700#0/1 581 * 0 0 * chr5 1 0 NGAAGCCCATNNTNGTGTTACNCNCCTGGAAGNNNNNNNNNACNNNGANACNTNNAACAATTCAGATCGGAAGAGCGGTTCAGCAGANNTNCCGAGACCGA &.88888:88/&0&,03189.&/&.8/))12/&&&&&&&&&./&&&&.&1.&)&&/35962/6432-3&),0&/2+0,),61&-6,&&&'&/,.0&...)0 RG:Z:0 +SL-XAV:1:1:0:764#0/2 165 * 0 0 * chr6 1 0 NACAGATGCANATATTAACAGGCTTTAAAGGACAGATGGACTGCAATACAATAATAGAGTACGTCAACACTCCACAGATCGCTAGAGCATNACATCGGTGT &/:5358::9999::99998255::7275,,/5567-'+387537857:54-4.51'31059547320;73/720+22.4(6.;((.;(;8()(''&&2&& RG:Z:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_output_alignment_summary_metrics.html Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,59 @@ +<style type="text/css"> + tr.d0 td {background-color: oldlace; color: black;} + tr.d1 td {background-color: aliceblue; color: black;} + </style><?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy picard_wrapper tool output - see http://getgalaxy.org/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +Galaxy tool CollectAlignmentSummaryMetrics run at 11/11/2011 08:07:10</b><br/><b>The following output files were created (click the filename to view/download a copy):</b><hr/><table> +<tr><td><a href="CollectAlignmentSummaryMetrics.log">CollectAlignmentSummaryMetrics.log</a></td></tr> +<tr><td><a href="CollectAlignmentSummaryMetrics.metrics.txt">CollectAlignmentSummaryMetrics.metrics.txt</a></td></tr> +</table><p/> +<b>Picard on line resources</b><ul> +<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li> +<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/> +<b>Picard output (transposed to make it easier to see)</b><hr/> +<table cellpadding="3" > +<tr class="d0"><td colspan="2">## net.sf.picard.metrics.StringHeader</td></tr><tr class="d1"><td colspan="2"># net.sf.picard.analysis.CollectAlignmentSummaryMetrics MAX_INSERT_SIZE=100000 ADAPTER_SEQUENCE=[AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG, IS_BISULFITE_SEQUENCED=false] INPUT=/data/tmp/tmpLLcl1w/database/files/000/dataset_2.dat OUTPUT=/data/home/rlazarus/galaxy/database/job_working_directory/3/dataset_3_files/CollectAlignmentSummaryMetrics.metrics.txt REFERENCE_SEQUENCE=/data/home/rlazarus/galaxy/database/job_working_directory/3/dataset_3_files/CollectAlignmentSummaryMetricsfq2hit.fasta_fake.fasta ASSUME_SORTED=true TMP_DIR=[/tmp] VALIDATION_STRINGENCY=LENIENT METRIC_ACCUMULATION_LEVEL=[ALL_READS] IS_BISULFITE_SEQUENCED=false STOP_AFTER=0 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false</td></tr><tr class="d0"><td colspan="2">## net.sf.picard.metrics.StringHeader</td></tr><tr class="d1"><td colspan="2"># Started on: Fri Nov 11 08:07:10 EST 2011</td></tr><tr class="d0"><td colspan="2">## METRICS CLASS net.sf.picard.analysis.AlignmentSummaryMetrics</td></tr><tr class="d0"><td>CATEGORY</td><td>FIRST_OF_PAIR </td></tr> +<tr class="d1"><td>TOTAL_READS</td><td>4 </td></tr> +<tr class="d0"><td>PF_READS</td><td>4 </td></tr> +<tr class="d1"><td>PCT_PF_READS</td><td>1 </td></tr> +<tr class="d0"><td>PF_NOISE_READS</td><td>0 </td></tr> +<tr class="d1"><td>PF_READS_ALIGNED</td><td>4 </td></tr> +<tr class="d0"><td>PCT_PF_READS_ALIGNED</td><td>1 </td></tr> +<tr class="d1"><td>PF_ALIGNED_BASES</td><td>404 </td></tr> +<tr class="d0"><td>PF_HQ_ALIGNED_READS</td><td>4 </td></tr> +<tr class="d1"><td>PF_HQ_ALIGNED_BASES</td><td>404 </td></tr> +<tr class="d0"><td>PF_HQ_ALIGNED_Q20_BASES</td><td>28 </td></tr> +<tr class="d1"><td>PF_HQ_MEDIAN_MISMATCHES</td><td>78 </td></tr> +<tr class="d0"><td>PF_MISMATCH_RATE</td><td>0.777228 </td></tr> +<tr class="d1"><td>PF_HQ_ERROR_RATE</td><td>0.777228 </td></tr> +<tr class="d0"><td>PF_INDEL_RATE</td><td>0 </td></tr> +<tr class="d1"><td>MEAN_READ_LENGTH</td><td>101 </td></tr> +<tr class="d0"><td>READS_ALIGNED_IN_PAIRS</td><td>3 </td></tr> +<tr class="d1"><td>PCT_READS_ALIGNED_IN_PAIRS</td><td>0.75 </td></tr> +<tr class="d0"><td>BAD_CYCLES</td><td>63 </td></tr> +<tr class="d1"><td>STRAND_BALANCE</td><td>0.25 </td></tr> +<tr class="d0"><td>PCT_CHIMERAS</td><td>0 </td></tr> +<tr class="d1"><td>PCT_ADAPTER</td><td>0 </td></tr> +<tr class="d0"><td>SAMPLE</td><td> </td></tr> +<tr class="d1"><td>LIBRARY</td><td> </td></tr> +<tr class="d0"><td>READ_GROUP +</td><td> + </td></tr> +</table> +<b>Picard Tool Run Log</b><hr/> +<pre>INFO:root:## executing java -Xmx4g -jar /data/home/rlazarus/galaxy/tool-data/shared/jars/picard/CreateSequenceDictionary.jar REFERENCE=/tmp/CollectAlignmentSummaryMetricsfq2hit.fasta OUTPUT=/tmp/CollectAlignmentSummaryMetricsfq2hit.dict URI=dataset_1.dat TRUNCATE_NAMES_AT_WHITESPACE=None returned status 0 and nothing on stderr + +INFO:root:## executing java -Xmx4g -jar /data/home/rlazarus/galaxy/tool-data/shared/jars/picard/CollectAlignmentSummaryMetrics.jar VALIDATION_STRINGENCY=LENIENT ASSUME_SORTED=true ADAPTER_SEQUENCE= IS_BISULFITE_SEQUENCED=false MAX_INSERT_SIZE=100000 OUTPUT=/data/home/rlazarus/galaxy/database/job_working_directory/3/dataset_3_files/CollectAlignmentSummaryMetrics.metrics.txt R=/data/home/rlazarus/galaxy/database/job_working_directory/3/dataset_3_files/CollectAlignmentSummaryMetricsfq2hit.fasta_fake.fasta TMP_DIR=/tmp INPUT=/data/tmp/tmpLLcl1w/database/files/000/dataset_2.dat returned status 0 and nothing on stderr + +</pre><hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> +generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool</div></body></html> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_output_collect_AS_sorted_pair.txt Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,12 @@ +## net.sf.picard.metrics.StringHeader +# net.sf.picard.analysis.CollectAlignmentSummaryMetrics INPUT=testdata/picard/sam/bam2fastq/paired/ok/sorted-pair.sam OUTPUT=output_test_as/test_as_sorted-pair.txt REFERENCE_SEQUENCE=testdata/picard/reference/Homo_sapiens_assembly18.trimmed.fasta ASSUME_SORTED=true MAX_INSERT_SIZE=100000 ADAPTER_SEQUENCE=[AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG] IS_BISULFITE_SEQUENCED=false TMP_DIR=/tmp/raphael VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +## net.sf.picard.metrics.StringHeader +# Started on: Tue Oct 26 14:06:32 EDT 2010 + +## METRICS CLASS net.sf.picard.analysis.AlignmentSummaryMetrics +CATEGORY TOTAL_READS PF_READS PCT_PF_READS PF_NOISE_READS PF_READS_ALIGNED PCT_PF_READS_ALIGNED PF_HQ_ALIGNED_READS PF_HQ_ALIGNED_BASES PF_HQ_ALIGNED_Q20_BASES PF_HQ_MEDIAN_MISMATCHES PF_HQ_ERROR_RATE MEAN_READ_LENGTH READS_ALIGNED_IN_PAIRS PCT_READS_ALIGNED_IN_PAIRS BAD_CYCLES STRAND_BALANCE PCT_CHIMERAS PCT_ADAPTER +FIRST_OF_PAIR 5 5 1 0 0 0 0 0 0 0 ? 13 0 ? 0 ? ? 0 +SECOND_OF_PAIR 5 5 1 0 0 0 0 0 0 0 ? 13 0 ? 0 ? ? 0 +PAIR 10 10 1 0 0 0 0 0 0 0 ? 13 0 ? 0 ? ? 0 + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_output_estlibcomplexity_tinysam.html Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,38 @@ +<style type="text/css"> + tr.d0 td {background-color: oldlace; color: black;} + tr.d1 td {background-color: aliceblue; color: black;} + </style><?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy rgEstLibComplexity.py tool output - see http://g2.trac.bx.psu.edu/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +<b><a href="http://rgenetics.org">Galaxy Rgenetics</a> tool output rgEstLibComplexity.py run at 10/04/2011 14:52:24</b><b>Your job produced the following output files.</b><hr/> +<table> +<tr><td><a href="estlibcompout.txt">estlibcompout.txt</a></td></tr> +</table><p/> +<b>Picard on line resources</b><ul> +<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li> +<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/> +<b>Picard output (transposed to make it easier to see)</b><hr/> +<table cellpadding="3" > +<tr class="d0"><td>## net.sf.picard.metrics.StringHeader</td></tr><tr class="d1"><td># net.sf.picard.sam.EstimateLibraryComplexity INPUT=[/export/tmp/tmpBfU1Um/database/files/000/dataset_111.dat] OUTPUT=estlibcompout.txt MIN_IDENTICAL_BASES=5 MAX_DIFF_RATE=0.03 MIN_MEAN_QUALITY=20 READ_NAME_REGEX=[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).* OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 VALIDATION_STRINGENCY=LENIENT TMP_DIR=/tmp/rerla VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false</td></tr><tr class="d0"><td>## net.sf.picard.metrics.StringHeader</td></tr><tr class="d1"><td># Started on: Sun Apr 10 14:52:24 EDT 2011</td></tr></table> +<b>Picard log</b><hr/> +<pre>## executing java -Xmx2g -jar /udd/rerla/rgalaxy/tool-data/shared/jars/EstimateLibraryComplexity.jar MIN_IDENTICAL_BASES=5 O=estlibcompout.txt VALIDATION_STRINGENCY=LENIENT MIN_MEAN_QUALITY=20 READ_NAME_REGEX="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 I=/export/tmp/tmpBfU1Um/database/files/000/dataset_111.dat MAX_DIFF_RATE=0.03 returned status 0 and log (stdout/stderr) records: +[Sun Apr 10 14:52:24 EDT 2011] net.sf.picard.sam.EstimateLibraryComplexity INPUT=[/export/tmp/tmpBfU1Um/database/files/000/dataset_111.dat] OUTPUT=estlibcompout.txt MIN_IDENTICAL_BASES=5 MAX_DIFF_RATE=0.03 MIN_MEAN_QUALITY=20 READ_NAME_REGEX=[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).* OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 VALIDATION_STRINGENCY=LENIENT TMP_DIR=/tmp/rerla VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +INFO 2011-04-10 14:52:24 EstimateLibraryComplexity Will store 3098916 read pairs in memory before sorting. +INFO 2011-04-10 14:52:24 EstimateLibraryComplexity Finished reading - moving on to scanning for duplicates. +[Sun Apr 10 14:52:24 EDT 2011] net.sf.picard.sam.EstimateLibraryComplexity done. +Runtime.totalMemory()=33947648 + + +</pre><hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> +generated all outputs reported here, using this command line:<br/> +<pre>java -Xmx2g -jar /udd/rerla/rgalaxy/tool-data/shared/jars/EstimateLibraryComplexity.jar MIN_IDENTICAL_BASES=5 O=estlibcompout.txt VALIDATION_STRINGENCY=LENIENT MIN_MEAN_QUALITY=20 READ_NAME_REGEX="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 I=/export/tmp/tmpBfU1Um/database/files/000/dataset_111.dat MAX_DIFF_RATE=0.03</pre> +</div></body></html> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_output_fixmate_sorted_pair.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,15 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:10001 +@SQ SN:chr2 LN:10001 +@SQ SN:chr3 LN:10001 +@RG ID:rg1 SM:Z +bar:record:1 77 * 0 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:1 141 * 0 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:2 77 * 0 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:2 141 * 0 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:3 77 * 0 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:3 141 * 0 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:4 77 * 0 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:4 141 * 0 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1 +bar:record:5 77 * 0 0 * * 0 0 AAAAAAAAAAAAA 1111111111111 RG:Z:rg1 +bar:record:5 141 * 0 0 * * 0 0 CCCCCCCCCCCCC 2222222222222 RG:Z:rg1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_output_gc_summary_alignment_stats.pdf Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,829 @@ +%PDF-1.4 +%âãÏÓ\r +1 0 obj +<< +/CreationDate (D:20101026135453) +/ModDate (D:20101026135453) +/Title (R Graphics Output) +/Producer (R 2.10.1) +/Creator (R) +>> +endobj +2 0 obj +<< +/Type /Catalog +/Pages 3 0 R +>> +endobj +5 0 obj +<< +/Type /Page +/Parent 3 0 R +/Contents 6 0 R +/Resources 4 0 R +>> +endobj +6 0 obj +<< +/Length 7 0 R +>> +stream +q +Q q 57.60 72.00 388.80 374.40 re W n +0.255 0.412 0.882 RG +0.75 w +[] 0 d +1 J +1 j +10.00 M +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 90.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 94.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 97.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 101.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 105.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 108.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 112.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 115.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 119.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 123.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 126.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 130.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 133.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 137.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 141.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 144.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 148.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 151.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 155.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 159.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 162.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 166.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 169.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 173.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 177.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 180.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 184.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 187.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 191.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 195.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 198.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 202.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 205.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 209.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 213.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 216.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 220.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 223.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 227.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 231.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 234.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 238.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 241.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 245.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 249.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 252.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 256.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 263.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 267.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 270.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 274.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 277.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 281.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 285.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 288.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 292.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 295.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 299.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 303.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 306.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 310.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 313.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 317.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 321.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 324.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 328.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 331.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 335.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 339.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 342.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 346.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 349.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 353.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 357.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 360.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 364.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 367.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 371.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 375.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 378.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 382.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 385.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 389.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 393.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 396.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 400.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 403.84 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 407.44 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 411.04 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 414.64 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 418.24 83.27 Tm (l) Tj 0 Tr +ET +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 421.84 83.27 Tm (l) Tj 0 Tr +ET +Q q +0.000 0.000 0.000 RG +0.75 w +[] 0 d +1 J +1 j +10.00 M +72.00 72.00 m 432.00 72.00 l S +72.00 72.00 m 72.00 64.80 l S +144.00 72.00 m 144.00 64.80 l S +216.00 72.00 m 216.00 64.80 l S +288.00 72.00 m 288.00 64.80 l S +360.00 72.00 m 360.00 64.80 l S +432.00 72.00 m 432.00 64.80 l S +BT +0.000 0.000 0.000 rg +/F2 1 Tf 12.00 0.00 -0.00 12.00 68.66 46.08 Tm (0) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 137.33 46.08 Tm (20) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 209.33 46.08 Tm (40) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 281.33 46.08 Tm (60) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 353.33 46.08 Tm (80) Tj +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 421.99 46.08 Tm (100) Tj +ET +57.60 85.87 m 57.60 432.53 l S +57.60 85.87 m 50.40 85.87 l S +57.60 172.53 m 50.40 172.53 l S +57.60 259.20 m 50.40 259.20 l S +57.60 345.87 m 50.40 345.87 l S +57.60 432.53 m 50.40 432.53 l S +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 40.32 77.53 Tm (0.0) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 40.32 164.19 Tm (0.5) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 40.32 250.86 Tm (1.0) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 40.32 337.53 Tm (1.5) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 40.32 424.19 Tm (2.0) Tj +ET +57.60 72.00 m +446.40 72.00 l +446.40 446.40 l +57.60 446.40 l +57.60 72.00 l +S +Q q +BT +0.000 0.000 0.000 rg +/F3 1 Tf 14.00 0.00 -0.00 14.00 85.04 478.81 Tm [(summar) -10 (y_alignment_stats_test.sam GC Bias Plot )] TJ +ET +BT +/F3 1 Tf 14.00 0.00 -0.00 14.00 138.82 461.53 Tm [( T) 80 (otal c) 20 (luster) 15 (s: 9, Aligned reads: 2)] TJ +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 180.06 17.28 Tm [(GC% of 100 base windo) 15 (ws)] TJ +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 11.52 173.80 Tm [(Fr) 10 (action of nor) -25 (maliz) 15 (ed co) 15 (v) 25 (er) 10 (age)] TJ +ET +Q q 57.60 72.00 388.80 374.40 re W n +0.827 0.827 0.827 RG +0.75 w +[] 0 d +1 J +1 j +10.00 M +57.60 259.20 m 446.40 259.20 l S +252.00 72.00 m 252.00 446.40 l S +0.745 0.745 0.745 RG +93.60 85.87 m 93.60 85.87 l S +97.20 85.87 m 97.20 85.87 l S +100.80 85.87 m 100.80 85.87 l S +104.40 85.87 m 104.40 85.87 l S +108.00 85.87 m 108.00 85.87 l S +111.60 85.87 m 111.60 85.87 l S +115.20 85.87 m 115.20 85.87 l S +118.80 85.87 m 118.80 85.87 l S +122.40 85.87 m 122.40 85.87 l S +126.00 85.87 m 126.00 85.87 l S +129.60 85.87 m 129.60 85.87 l S +133.20 85.87 m 133.20 85.87 l S +136.80 85.87 m 136.80 85.87 l S +140.40 85.87 m 140.40 85.87 l S +144.00 85.87 m 144.00 85.87 l S +147.60 85.87 m 147.60 85.87 l S +151.20 85.87 m 151.20 85.87 l S +154.80 85.87 m 154.80 85.87 l S +158.40 85.87 m 158.40 85.87 l S +162.00 85.87 m 162.00 85.87 l S +165.60 85.87 m 165.60 85.87 l S +169.20 85.87 m 169.20 85.87 l S +172.80 85.87 m 172.80 85.87 l S +176.40 85.87 m 176.40 85.87 l S +180.00 85.87 m 180.00 85.87 l S +183.60 85.87 m 183.60 85.87 l S +187.20 85.87 m 187.20 85.87 l S +190.80 85.87 m 190.80 85.87 l S +194.40 85.87 m 194.40 85.87 l S +198.00 85.87 m 198.00 85.87 l S +201.60 85.87 m 201.60 85.87 l S +205.20 85.87 m 205.20 85.87 l S +208.80 85.87 m 208.80 85.87 l S +212.40 85.87 m 212.40 85.87 l S +216.00 85.87 m 216.00 85.87 l S +219.60 85.87 m 219.60 85.87 l S +223.20 85.87 m 223.20 85.87 l S +226.80 85.87 m 226.80 85.87 l S +230.40 85.87 m 230.40 85.87 l S +234.00 85.87 m 234.00 85.87 l S +237.60 85.87 m 237.60 85.87 l S +241.20 85.87 m 241.20 85.87 l S +244.80 85.87 m 244.80 85.87 l S +248.40 85.87 m 248.40 85.87 l S +252.00 85.87 m 252.00 85.87 l S +255.60 85.87 m 255.60 85.87 l S +259.20 85.87 m 259.20 85.87 l S +262.80 85.87 m 262.80 504.00 l S +259.20 85.87 m +262.80 85.87 l +266.40 85.87 l +S +266.40 85.87 m 266.40 85.87 l S +270.00 85.87 m 270.00 85.87 l S +273.60 85.87 m 273.60 85.87 l S +277.20 85.87 m 277.20 85.87 l S +280.80 85.87 m 280.80 85.87 l S +284.40 85.87 m 284.40 85.87 l S +288.00 85.87 m 288.00 85.87 l S +291.60 85.87 m 291.60 85.87 l S +295.20 85.87 m 295.20 85.87 l S +298.80 85.87 m 298.80 85.87 l S +302.40 85.87 m 302.40 85.87 l S +306.00 85.87 m 306.00 85.87 l S +309.60 85.87 m 309.60 85.87 l S +313.20 85.87 m 313.20 85.87 l S +316.80 85.87 m 316.80 85.87 l S +320.40 85.87 m 320.40 85.87 l S +324.00 85.87 m 324.00 85.87 l S +327.60 85.87 m 327.60 85.87 l S +331.20 85.87 m 331.20 85.87 l S +334.80 85.87 m 334.80 85.87 l S +338.40 85.87 m 338.40 85.87 l S +342.00 85.87 m 342.00 85.87 l S +345.60 85.87 m 345.60 85.87 l S +349.20 85.87 m 349.20 85.87 l S +352.80 85.87 m 352.80 85.87 l S +356.40 85.87 m 356.40 85.87 l S +360.00 85.87 m 360.00 85.87 l S +363.60 85.87 m 363.60 85.87 l S +367.20 85.87 m 367.20 85.87 l S +370.80 85.87 m 370.80 85.87 l S +374.40 85.87 m 374.40 85.87 l S +378.00 85.87 m 378.00 85.87 l S +381.60 85.87 m 381.60 85.87 l S +385.20 85.87 m 385.20 85.87 l S +388.80 85.87 m 388.80 85.87 l S +392.40 85.87 m 392.40 85.87 l S +396.00 85.87 m 396.00 85.87 l S +399.60 85.87 m 399.60 85.87 l S +403.20 85.87 m 403.20 85.87 l S +406.80 85.87 m 406.80 85.87 l S +410.40 85.87 m 410.40 85.87 l S +414.00 85.87 m 414.00 85.87 l S +417.60 85.87 m 417.60 85.87 l S +421.20 85.87 m 421.20 85.87 l S +424.80 85.87 m 424.80 85.87 l S +1.000 0.667 0.667 RG +2.25 w +[] 0 d +93.60 85.87 m 93.60 85.87 l S +97.20 85.87 m 97.20 85.88 l S +100.80 85.87 m 100.80 85.88 l S +104.40 85.87 m 104.40 85.97 l S +108.00 85.87 m 108.00 85.99 l S +111.60 85.87 m 111.60 86.06 l S +115.20 85.87 m 115.20 86.12 l S +118.80 85.87 m 118.80 86.06 l S +122.40 85.87 m 122.40 86.17 l S +126.00 85.87 m 126.00 86.28 l S +129.60 85.87 m 129.60 86.51 l S +133.20 85.87 m 133.20 86.69 l S +136.80 85.87 m 136.80 87.09 l S +140.40 85.87 m 140.40 87.41 l S +144.00 85.87 m 144.00 88.39 l S +147.60 85.87 m 147.60 89.49 l S +151.20 85.87 m 151.20 90.62 l S +154.80 85.87 m 154.80 92.04 l S +158.40 85.87 m 158.40 94.69 l S +162.00 85.87 m 162.00 97.81 l S +165.60 85.87 m 165.60 101.23 l S +169.20 85.87 m 169.20 104.69 l S +172.80 85.87 m 172.80 109.93 l S +176.40 85.87 m 176.40 114.60 l S +180.00 85.87 m 180.00 119.18 l S +183.60 85.87 m 183.60 124.12 l S +187.20 85.87 m 187.20 128.11 l S +190.80 85.87 m 190.80 133.13 l S +194.40 85.87 m 194.40 137.97 l S +198.00 85.87 m 198.00 141.85 l S +201.60 85.87 m 201.60 145.39 l S +205.20 85.87 m 205.20 149.98 l S +208.80 85.87 m 208.80 152.24 l S +212.40 85.87 m 212.40 153.52 l S +216.00 85.87 m 216.00 153.49 l S +219.60 85.87 m 219.60 155.48 l S +223.20 85.87 m 223.20 158.11 l S +226.80 85.87 m 226.80 159.15 l S +230.40 85.87 m 230.40 160.39 l S +234.00 85.87 m 234.00 162.70 l S +237.60 85.87 m 237.60 165.88 l S +241.20 85.87 m 241.20 168.75 l S +244.80 85.87 m 244.80 172.15 l S +248.40 85.87 m 248.40 172.53 l S +252.00 85.87 m 252.00 171.70 l S +255.60 85.87 m 255.60 171.32 l S +259.20 85.87 m 259.20 169.62 l S +262.80 85.87 m 262.80 166.52 l S +266.40 85.87 m 266.40 161.93 l S +270.00 85.87 m 270.00 157.27 l S +273.60 85.87 m 273.60 152.98 l S +277.20 85.87 m 277.20 149.45 l S +280.80 85.87 m 280.80 142.42 l S +284.40 85.87 m 284.40 135.15 l S +288.00 85.87 m 288.00 127.42 l S +291.60 85.87 m 291.60 119.32 l S +295.20 85.87 m 295.20 113.09 l S +298.80 85.87 m 298.80 107.18 l S +302.40 85.87 m 302.40 101.98 l S +306.00 85.87 m 306.00 98.53 l S +309.60 85.87 m 309.60 95.97 l S +313.20 85.87 m 313.20 94.20 l S +316.80 85.87 m 316.80 92.38 l S +320.40 85.87 m 320.40 91.32 l S +324.00 85.87 m 324.00 90.56 l S +327.60 85.87 m 327.60 90.04 l S +331.20 85.87 m 331.20 89.26 l S +334.80 85.87 m 334.80 89.08 l S +338.40 85.87 m 338.40 88.64 l S +342.00 85.87 m 342.00 88.16 l S +345.60 85.87 m 345.60 87.76 l S +349.20 85.87 m 349.20 87.87 l S +352.80 85.87 m 352.80 87.90 l S +356.40 85.87 m 356.40 88.04 l S +360.00 85.87 m 360.00 87.71 l S +363.60 85.87 m 363.60 87.66 l S +367.20 85.87 m 367.20 87.22 l S +370.80 85.87 m 370.80 86.98 l S +374.40 85.87 m 374.40 86.78 l S +378.00 85.87 m 378.00 86.38 l S +381.60 85.87 m 381.60 86.30 l S +385.20 85.87 m 385.20 86.04 l S +388.80 85.87 m 388.80 85.95 l S +392.40 85.87 m 392.40 85.95 l S +396.00 85.87 m 396.00 85.92 l S +399.60 85.87 m 399.60 85.96 l S +403.20 85.87 m 403.20 85.94 l S +406.80 85.87 m 406.80 85.89 l S +410.40 85.87 m 410.40 85.92 l S +414.00 85.87 m 414.00 85.89 l S +417.60 85.87 m 417.60 85.89 l S +421.20 85.87 m 421.20 85.89 l S +424.80 85.87 m 424.80 85.89 l S +0.486 0.804 0.486 RG +0.75 w +[] 0 d +93.60 85.87 m +97.20 85.87 l +100.80 85.87 l +104.40 85.87 l +108.00 85.87 l +111.60 85.87 l +115.20 85.87 l +118.80 85.87 l +122.40 85.87 l +126.00 85.87 l +129.60 85.87 l +133.20 85.87 l +136.80 85.87 l +140.40 85.87 l +144.00 85.87 l +147.60 85.87 l +151.20 85.87 l +154.80 85.87 l +158.40 85.87 l +162.00 85.87 l +165.60 85.87 l +169.20 85.87 l +172.80 85.87 l +176.40 85.87 l +180.00 85.87 l +183.60 85.87 l +187.20 85.87 l +190.80 85.87 l +194.40 85.87 l +198.00 85.87 l +201.60 85.87 l +205.20 85.87 l +208.80 85.87 l +212.40 85.87 l +216.00 85.87 l +219.60 85.87 l +223.20 85.87 l +226.80 85.87 l +230.40 85.87 l +234.00 85.87 l +237.60 85.87 l +241.20 85.87 l +244.80 85.87 l +248.40 85.87 l +252.00 85.87 l +255.60 85.87 l +259.20 85.87 l +262.80 94.53 l +266.40 85.87 l +270.00 85.87 l +273.60 85.87 l +277.20 85.87 l +280.80 85.87 l +284.40 85.87 l +288.00 85.87 l +291.60 85.87 l +295.20 85.87 l +298.80 85.87 l +302.40 85.87 l +306.00 85.87 l +309.60 85.87 l +313.20 85.87 l +316.80 85.87 l +320.40 85.87 l +324.00 85.87 l +327.60 85.87 l +331.20 85.87 l +334.80 85.87 l +338.40 85.87 l +342.00 85.87 l +345.60 85.87 l +349.20 85.87 l +352.80 85.87 l +356.40 85.87 l +360.00 85.87 l +363.60 85.87 l +367.20 85.87 l +370.80 85.87 l +374.40 85.87 l +378.00 85.87 l +381.60 85.87 l +385.20 85.87 l +388.80 85.87 l +392.40 85.87 l +396.00 85.87 l +399.60 85.87 l +403.20 85.87 l +406.80 85.87 l +410.40 85.87 l +414.00 85.87 l +417.60 85.87 l +421.20 85.87 l +424.80 85.87 l +S +Q q +0.000 0.000 0.000 RG +0.75 w +[] 0 d +1 J +1 j +10.00 M +446.40 85.87 m 446.40 432.53 l S +446.40 85.87 m 453.60 85.87 l S +446.40 172.53 m 453.60 172.53 l S +446.40 259.20 m 453.60 259.20 l S +446.40 345.87 m 453.60 345.87 l S +446.40 432.53 m 453.60 432.53 l S +BT +0.000 0.000 0.000 rg +/F2 1 Tf 0.00 12.00 -12.00 0.00 472.32 82.53 Tm (0) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 472.32 165.86 Tm (10) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 472.32 252.53 Tm (20) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 472.32 339.19 Tm (30) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 472.32 425.86 Tm (40) Tj +ET +BT +/F2 1 Tf 0.00 12.00 -12.00 0.00 493.92 210.51 Tm (Mean base quality) Tj +ET +Q q 57.60 72.00 388.80 374.40 re W n +0.000 0.000 0.000 RG +0.75 w +[] 0 d +1 J +1 j +10.00 M +57.60 446.40 142.55 -57.60 re S +0.255 0.412 0.882 RG +BT +/F1 1 Tf 1 Tr 7.48 0 0 7.48 65.44 429.40 Tm (l) Tj 0 Tr +ET +1.000 0.667 0.667 rg + 65.70 414.90 m + 71.10 414.90 l + 71.10 420.30 l + 65.70 420.30 l +h f +BT +0.486 0.804 0.486 rg +/F2 1 Tf 12.00 0.00 -0.00 12.00 64.90 400.17 Tm (-) Tj +ET +BT +0.000 0.000 0.000 rg +/F2 1 Tf 12.00 0.00 -0.00 12.00 79.20 427.69 Tm [(Nor) -25 (maliz) 15 (ed Co) 15 (v) 25 (er) 10 (age)] TJ +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 79.20 413.29 Tm [(Windo) 15 (ws at GC%)] TJ +ET +BT +/F2 1 Tf 12.00 0.00 -0.00 12.00 79.20 398.89 Tm (Base Quality at GC%) Tj +ET +Q +endstream +endobj +7 0 obj +16529 +endobj +3 0 obj +<< +/Type /Pages +/Kids [ +5 0 R +] +/Count 1 +/MediaBox [0 0 504 504] +>> +endobj +4 0 obj +<< +/ProcSet [/PDF /Text] +/Font << /F1 9 0 R /F2 10 0 R /F3 11 0 R >> +/ExtGState << >> +>> +endobj +8 0 obj +<< +/Type /Encoding +/BaseEncoding /WinAnsiEncoding +/Differences [ 45/minus 96/quoteleft +144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent +/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space] +>> +endobj +9 0 obj +<< +/Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /ZapfDingbats +>> +endobj +10 0 obj << +/Type /Font +/Subtype /Type1 +/Name /F2 +/BaseFont /Helvetica +/Encoding 8 0 R +>> endobj +11 0 obj << +/Type /Font +/Subtype /Type1 +/Name /F3 +/BaseFont /Helvetica-Bold +/Encoding 8 0 R +>> endobj +xref +0 12 +0000000000 65535 f +0000000021 00000 n +0000000164 00000 n +0000016896 00000 n +0000016979 00000 n +0000000213 00000 n +0000000293 00000 n +0000016875 00000 n +0000017083 00000 n +0000017340 00000 n +0000017423 00000 n +0000017520 00000 n +trailer +<< +/Size 12 +/Info 1 0 R +/Root 2 0 R +>> +startxref +17622 +%%EOF
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_output_gc_summary_alignment_stats.txt Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,102 @@ +## net.sf.picard.metrics.StringHeader +# net.sf.picard.analysis.CollectGcBiasMetrics REFERENCE_SEQUENCE=testdata/picard/reference/Homo_sapiens_assembly18.trimmed.nodict.fasta INPUT=testdata/picard/sam/summary_alignment_stats_test.sam OUTPUT=output_test_gc_nodict/test_nodic_ref_gc_summary_alignment_stats_test.txt CHART_OUTPUT=output_test_gc_nodict/test_nodic_ref_gc_summary_alignment_stats_test.pdf WINDOW_SIZE=100 MINIMUM_GENOME_FRACTION=1.0E-5 TMP_DIR=/tmp/raphael VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +## net.sf.picard.metrics.StringHeader +# Started on: Tue Oct 26 13:54:53 EDT 2010 + +## METRICS CLASS net.sf.picard.analysis.GcBiasDetailMetrics +GC WINDOWS READ_STARTS MEAN_BASE_QUALITY NORMALIZED_COVERAGE ERROR_BAR_WIDTH +6 3 0 0 0 0 +7 7 0 0 0 0 +8 6 0 0 0 0 +9 45 0 0 0 0 +10 54 0 0 0 0 +11 83 0 0 0 0 +12 110 0 0 0 0 +13 83 0 0 0 0 +14 130 0 0 0 0 +15 178 0 0 0 0 +16 276 0 0 0 0 +17 352 0 0 0 0 +18 519 0 0 0 0 +19 658 0 0 0 0 +20 1073 0 0 0 0 +21 1543 0 0 0 0 +22 2024 0 0 0 0 +23 2631 0 0 0 0 +24 3758 0 0 0 0 +25 5087 0 0 0 0 +26 6543 0 0 0 0 +27 8020 0 0 0 0 +28 10251 0 0 0 0 +29 12241 0 0 0 0 +30 14189 0 0 0 0 +31 16295 0 0 0 0 +32 17995 0 0 0 0 +33 20131 0 0 0 0 +34 22194 0 0 0 0 +35 23848 0 0 0 0 +36 25355 0 0 0 0 +37 27310 0 0 0 0 +38 28274 0 0 0 0 +39 28818 0 0 0 0 +40 28806 0 0 0 0 +41 29651 0 0 0 0 +42 30773 0 0 0 0 +43 31217 0 0 0 0 +44 31743 0 0 0 0 +45 32727 0 0 0 0 +46 34084 0 0 0 0 +47 35307 0 0 0 0 +48 36752 0 0 0 0 +49 36917 0 0 0 0 +50 36561 0 0 0 0 +51 36399 0 0 0 0 +52 35677 0 0 0 0 +53 34354 1 1 29.352448 29.352448 +54 32401 0 0 0 0 +55 30417 0 0 0 0 +56 28588 0 0 0 0 +57 27083 0 0 0 0 +58 24090 0 0 0 0 +59 20995 0 0 0 0 +60 17702 0 0 0 0 +61 14252 0 0 0 0 +62 11595 0 0 0 0 +63 9077 0 0 0 0 +64 6863 0 0 0 0 +65 5394 0 0 0 0 +66 4304 0 0 0 0 +67 3551 0 0 0 0 +68 2776 0 0 0 0 +69 2321 0 0 0 0 +70 1999 0 0 0 0 +71 1777 0 0 0 0 +72 1444 0 0 0 0 +73 1367 0 0 0 0 +74 1182 0 0 0 0 +75 975 0 0 0 0 +76 807 0 0 0 0 +77 852 0 0 0 0 +78 867 0 0 0 0 +79 925 0 0 0 0 +80 786 0 0 0 0 +81 762 0 0 0 0 +82 578 0 0 0 0 +83 475 0 0 0 0 +84 391 0 0 0 0 +85 219 0 0 0 0 +86 184 0 0 0 0 +87 75 0 0 0 0 +88 36 0 0 0 0 +89 37 0 0 0 0 +90 24 0 0 0 0 +91 40 0 0 0 0 +92 33 0 0 0 0 +93 11 0 0 0 0 +94 24 0 0 0 0 +95 12 0 0 0 0 +96 11 0 0 0 0 +97 11 0 0 0 0 +98 9 0 0 0 0 + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_output_hs_transposed_summary_alignment_stats.html Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,167 @@ +<style type="text/css"> + tr.d0 td {background-color: oldlace; color: black;} + tr.d1 td {background-color: aliceblue; color: black;} + </style><?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy picard_wrapper tool output - see http://getgalaxy.org/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +Galaxy tool wrapper run picard_wrapper at 07/05/2011 00:15:30</b><br/><b>The following output files were created (click the filename to view/download a copy):</b><hr/><table> +<tr><td><a href="tempSamHead.txt">tempSamHead.txt</a></td></tr> +<tr><td><a href="rgPicardHsMetrics.bait">rgPicardHsMetrics.bait</a></td></tr> +<tr><td><a href="CalculateHsMetrics.metrics.txt">CalculateHsMetrics.metrics.txt</a></td></tr> +</table><p/> +<b>Picard on line resources - maxrows=100</b><ul> +<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li> +<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/> +<b>Picard output (transposed to make it easier to see)</b><hr/> +<table cellpadding="3" > +<tr class="d0"><td>## net.sf.picard.metrics.StringHeader</td></tr><tr class="d1"><td># net.sf.picard.analysis.directed.CalculateHsMetrics BAIT_INTERVALS=/udd/rerla/rgalaxy/database/job_working_directory/95/dataset_97_files/rgPicardHsMetrics.bait TARGET_INTERVALS=/udd/rerla/rgalaxy/database/job_working_directory/95/dataset_97_files/rgPicardHsMetrics.bait INPUT=/tmp/6728811.1.all.q/tmpCsP1vm/database/files/000/dataset_95.dat OUTPUT=/udd/rerla/rgalaxy/database/job_working_directory/95/dataset_97_files/CalculateHsMetrics.metrics.txt TMP_DIR=/tmp/6728811.1.all.q/tmpCsP1vm/database/tmp VALIDATION_STRINGENCY=LENIENT VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false</td></tr><tr class="d0"><td>## net.sf.picard.metrics.StringHeader</td></tr><tr class="d1"><td># Started on: Sat May 07 00:15:15 EDT 2011</td></tr><tr class="d0"><td>## METRICS CLASS net.sf.picard.analysis.directed.HsMetrics</td></tr><tr class="d0"><td>BAIT_SET</td><td>rgPicardHsMetrics</td></tr> +<tr class="d1"><td>GENOME_SIZE</td><td>2040</td></tr> +<tr class="d0"><td>BAIT_TERRITORY</td><td>14601938</td></tr> +<tr class="d1"><td>TARGET_TERRITORY</td><td>14601938</td></tr> +<tr class="d0"><td>BAIT_DESIGN_EFFICIENCY</td><td>1</td></tr> +<tr class="d1"><td>TOTAL_READS</td><td>18</td></tr> +<tr class="d0"><td>PF_READS</td><td>16</td></tr> +<tr class="d1"><td>PF_UNIQUE_READS</td><td>16</td></tr> +<tr class="d0"><td>PCT_PF_READS</td><td>0.888889</td></tr> +<tr class="d1"><td>PCT_PF_UQ_READS</td><td>0.888889</td></tr> +<tr class="d0"><td>PF_UQ_READS_ALIGNED</td><td>10</td></tr> +<tr class="d1"><td>PCT_PF_UQ_READS_ALIGNED</td><td>0.625</td></tr> +<tr class="d0"><td>PF_UQ_BASES_ALIGNED</td><td>1010</td></tr> +<tr class="d1"><td>ON_BAIT_BASES</td><td>0</td></tr> +<tr class="d0"><td>NEAR_BAIT_BASES</td><td>0</td></tr> +<tr class="d1"><td>OFF_BAIT_BASES</td><td>1010</td></tr> +<tr class="d0"><td>ON_TARGET_BASES</td><td>0</td></tr> +<tr class="d1"><td>PCT_SELECTED_BASES</td><td>0</td></tr> +<tr class="d0"><td>PCT_OFF_BAIT</td><td>1</td></tr> +<tr class="d1"><td>ON_BAIT_VS_SELECTED</td><td>?</td></tr> +<tr class="d0"><td>MEAN_BAIT_COVERAGE</td><td>0</td></tr> +<tr class="d1"><td>MEAN_TARGET_COVERAGE</td><td>?</td></tr> +<tr class="d0"><td>PCT_USABLE_BASES_ON_BAIT</td><td>0</td></tr> +<tr class="d1"><td>PCT_USABLE_BASES_ON_TARGET</td><td>0</td></tr> +<tr class="d0"><td>FOLD_ENRICHMENT</td><td>0</td></tr> +<tr class="d1"><td>ZERO_CVG_TARGETS_PCT</td><td>0.841776</td></tr> +<tr class="d0"><td>FOLD_80_BASE_PENALTY</td><td>?</td></tr> +<tr class="d1"><td>PCT_TARGET_BASES_2X</td><td>0</td></tr> +<tr class="d0"><td>PCT_TARGET_BASES_10X</td><td>0</td></tr> +<tr class="d1"><td>PCT_TARGET_BASES_20X</td><td>0</td></tr> +<tr class="d0"><td>PCT_TARGET_BASES_30X</td><td>0</td></tr> +<tr class="d1"><td>HS_LIBRARY_SIZE</td><td></td></tr> +<tr class="d0"><td>HS_PENALTY_10X</td><td>0</td></tr> +<tr class="d1"><td>HS_PENALTY_20X</td><td>0</td></tr> +<tr class="d0"><td>HS_PENALTY_30X +</td><td>0 +</td></tr> +</table> +<b>Picard log</b><hr/> +<pre>## got 10 rows of header +## executing java -Xmx8G -jar /udd/rerla/rgalaxy/tool-data/shared/jars/CalculateHsMetrics.jar VALIDATION_STRINGENCY=LENIENT BAIT_INTERVALS=/udd/rerla/rgalaxy/database/job_working_directory/95/dataset_97_files/rgPicardHsMetrics.bait TARGET_INTERVALS=/udd/rerla/rgalaxy/database/job_working_directory/95/dataset_97_files/rgPicardHsMetrics.bait INPUT=/tmp/6728811.1.all.q/tmpCsP1vm/database/files/000/dataset_95.dat OUTPUT=/udd/rerla/rgalaxy/database/job_working_directory/95/dataset_97_files/CalculateHsMetrics.metrics.txt TMP_DIR=/tmp/6728811.1.all.q/tmpCsP1vm/database/tmp returned status 0 and stderr: +[Sat May 07 00:15:15 EDT 2011] net.sf.picard.analysis.directed.CalculateHsMetrics BAIT_INTERVALS=/udd/rerla/rgalaxy/database/job_working_directory/95/dataset_97_files/rgPicardHsMetrics.bait TARGET_INTERVALS=/udd/rerla/rgalaxy/database/job_working_directory/95/dataset_97_files/rgPicardHsMetrics.bait INPUT=/tmp/6728811.1.all.q/tmpCsP1vm/database/files/000/dataset_95.dat OUTPUT=/udd/rerla/rgalaxy/database/job_working_directory/95/dataset_97_files/CalculateHsMetrics.metrics.txt TMP_DIR=/tmp/6728811.1.all.q/tmpCsP1vm/database/tmp VALIDATION_STRINGENCY=LENIENT VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8307873-8307942 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8309830-8309966 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8321581-8321736 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8328921-8329047 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8330342-8330469 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8331089-8331268 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8331692-8331978 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8365935-8366090 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8366606-8366726 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8379231-8379407 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8394536-8394660 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8426591-8426689 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8439724-8439837 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8450410-8450571 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8455465-8455675 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8460994-8461085 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8474118-8474378 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8475226-8475324 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8475761-8476349 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8482861-8482979 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8487241-8487268 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8489646-8489840 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8490753-8491059 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8494260-8494405 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8497300-8497434 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8507847-8508429 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8511276-8511546 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8513512-8513524 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8514924-8515035 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8516626-8516644 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8517344-8517353 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8518590-8518779 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8623316-8623458 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8626698-8626844 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8723779-8723843 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8307873-8307942 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8309830-8309966 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8321581-8321736 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8328921-8329047 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8330342-8330469 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8331089-8331268 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8331692-8331978 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8365935-8366090 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8366606-8366726 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8379231-8379407 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8394536-8394660 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8426591-8426689 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8439724-8439837 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8450410-8450571 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8455465-8455675 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8460994-8461085 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8474118-8474378 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8475226-8475324 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8475761-8476349 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8482861-8482979 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8487241-8487268 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8489646-8489840 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8490753-8491059 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8494260-8494405 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8497300-8497434 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8507847-8508429 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8511276-8511546 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8514924-8515035 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8518590-8518779 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8623316-8623458 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8626698-8626844 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:8723779-8723843 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:16408986-16409647 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:16425552-16427522 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:16542527-16542763 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:16572980-16573083 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:16717794-16717995 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:16728357-16728483 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:16860643-16860646 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:125814448-125814568 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:125816060-125816263 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:125817221-125817625 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:125823198-125823404 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:125834519-125834807 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2019022-2019247 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2022951-2023081 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2029465-2029900 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2037228-2037484 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2044596-2044723 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2046671-2046845 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2048290-2048464 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2050815-2050986 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2060417-2060471 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2063211-2063342 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2063565-2063623 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2066228-2066329 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2067628-2067776 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2071831-2071995 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2073346-2073413 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2074085-2074196 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2076828-2077071 +WARNING 2011-05-07 00:15:16 IntervalList Ignoring interval for unknown reference: chr9:2078499-2078613 + +<b>## WARNING - 434559 log lines truncated - CalculateHsMetrics.log contains entire output</pre><hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> +generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool</div></body></html> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_output_insertsize_tinysam.html Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,53 @@ +<style type="text/css"> + tr.d0 td {background-color: oldlace; color: black;} + tr.d1 td {background-color: aliceblue; color: black;} + </style><?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy picard_wrapper tool output - see http://getgalaxy.org/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +Galaxy tool CollectInsertSizeMetrics run at 13/05/2011 15:20:46</b><br/><table cellpadding="10"><tr><td> +<a href="InsertSizeHist.pdf"><img src="InsertSizeHist.jpg" title="Click image preview for a print quality PDF version" hspace="10" align="middle"></a> +</tr></td></table> +<b>The following output files were created (click the filename to view/download a copy):</b><hr/><table> +<tr><td><a href="CollectInsertSizeMetrics.metrics.txt">CollectInsertSizeMetrics.metrics.txt</a></td></tr> +<tr><td><a href="CollectInsertSizeMetrics.log">CollectInsertSizeMetrics.log</a></td></tr> +<tr><td><a href="InsertSizeHist.pdf">InsertSizeHist.pdf</a></td></tr> +<tr><td><a href="InsertSizeHist.jpg">InsertSizeHist.jpg</a></td></tr> +</table><p/> +<b>Picard on line resources</b><ul> +<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li> +<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/> +<b>Picard output (transposed to make it easier to see)</b><hr/> +<table cellpadding="3" > +<tr class="d0"><td colspan="2">## net.sf.picard.metrics.StringHeader</td></tr><tr class="d1"><td colspan="2"># net.sf.picard.analysis.CollectInsertSizeMetrics HISTOGRAM_FILE=InsertSizeHist.pdf TAIL_LIMIT=10000 MINIMUM_PCT=0.01 INPUT=/dev/shm/tmpHpwBHW/database/files/000/dataset_1.dat OUTPUT=/udd/rerla/rgalaxy/database/job_working_directory/2/dataset_2_files/CollectInsertSizeMetrics.metrics.txt VALIDATION_STRINGENCY=LENIENT ASSUME_SORTED=true STOP_AFTER=0 TMP_DIR=/tmp/rerla VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false</td></tr><tr class="d0"><td colspan="2">## net.sf.picard.metrics.StringHeader</td></tr><tr class="d1"><td colspan="2"># Started on: Fri May 13 15:20:45 EDT 2011</td></tr><tr class="d0"><td colspan="2">## METRICS CLASS net.sf.picard.analysis.InsertSizeMetrics</td></tr><tr class="d1"><td colspan="2">## HISTOGRAM java.lang.Integer</td></tr><tr class="d0"><td colspan="2">MEDIAN_INSERT_SIZE MIN_INSERT_SIZE MAX_INSERT_SIZE MEAN_INSERT_SIZE STANDARD_DEVIATION READ_PAIRS PAIR_ORIENTATION WIDTH_OF_10_PERCENT WIDTH_OF_20_PERCENT WIDTH_OF_30_PERCENT WIDTH_OF_40_PERCENT WIDTH_OF_50_PERCENT WIDTH_OF_60_PERCENT WIDTH_OF_70_PERCENT WIDTH_OF_80_PERCENT WIDTH_OF_90_PERCENT WIDTH_OF_99_PERCENT</td></tr> +<tr class="d1"><td colspan="2">96 96 96 96 ? 1 FR 1 1 1 1 1 1 1 1 1 1</td></tr> +<tr class="d0"><td colspan="2">201 201 201 201 ? 1 RF 1 1 1 1 1 1 1 1 1 1</td></tr> +<tr class="d1"><td colspan="2">insert_size fr_count rf_count</td></tr> +<tr class="d0"><td colspan="2">96 1 0</td></tr> +<tr class="d1"><td colspan="2">201 0 1</td></tr> +</table> +<b>Picard Tool Run Log</b><hr/> +<pre>INFO:root:## executing java -Xmx2g -jar /udd/rerla/rgalaxy/tool-data/shared/jars/CollectInsertSizeMetrics.jar VALIDATION_STRINGENCY=LENIENT I=/dev/shm/tmpHpwBHW/database/files/000/dataset_1.dat O=/udd/rerla/rgalaxy/database/job_working_directory/2/dataset_2_files/CollectInsertSizeMetrics.metrics.txt HISTOGRAM_FILE=InsertSizeHist.pdf TAIL_LIMIT=10000 MINIMUM_PCT=0.01 returned status 0 and stderr: +[Fri May 13 15:20:45 EDT 2011] net.sf.picard.analysis.CollectInsertSizeMetrics HISTOGRAM_FILE=InsertSizeHist.pdf TAIL_LIMIT=10000 MINIMUM_PCT=0.01 INPUT=/dev/shm/tmpHpwBHW/database/files/000/dataset_1.dat OUTPUT=/udd/rerla/rgalaxy/database/job_working_directory/2/dataset_2_files/CollectInsertSizeMetrics.metrics.txt VALIDATION_STRINGENCY=LENIENT ASSUME_SORTED=true STOP_AFTER=0 TMP_DIR=/tmp/rerla VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +WARNING 2011-05-13 15:20:45 SinglePassSamProgram File reports sort order 'queryname', assuming it's coordinate sorted anyway. +INFO 2011-05-13 15:20:46 ProcessExecutor [1] "FR = red" "RF = blue" +INFO 2011-05-13 15:20:46 ProcessExecutor null device +INFO 2011-05-13 15:20:46 ProcessExecutor 1 +[Fri May 13 15:20:46 EDT 2011] net.sf.picard.analysis.CollectInsertSizeMetrics done. +Runtime.totalMemory()=9109504 + + +INFO:root:## executing mogrify -format jpg -resize x400 /udd/rerla/rgalaxy/database/job_working_directory/2/dataset_2_files/InsertSizeHist.pdf returned status 0 and nothing on stderr + +INFO:root:## executing mogrify -format jpg -resize x400 /udd/rerla/rgalaxy/database/job_working_directory/2/dataset_2_files/InsertSizeHist.pdf returned status 0 and nothing on stderr + +</pre><hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> +generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool</div></body></html> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_output_markdups_sortedpairsam.html Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,160 @@ +<style type="text/css"> + tr.d0 td {background-color: oldlace; color: black;} + tr.d1 td {background-color: aliceblue; color: black;} + </style><?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy picard_wrapper tool output - see http://getgalaxy.org/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +Galaxy tool MarkDuplicates run at 12/05/2011 14:34:29</b><br/><b>The following output files were created (click the filename to view/download a copy):</b><hr/><table> +<tr><td><a href="MarkDuplicates.log">MarkDuplicates.log</a></td></tr> +<tr><td><a href="MarkDuplicates.metrics.txt">MarkDuplicates.metrics.txt</a></td></tr> +</table><p/> +<b>Picard on line resources</b><ul> +<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li> +<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/> +<b>Picard output (transposed to make it easier to see)</b><hr/> +<table cellpadding="3" > +<tr class="d0"><td colspan="2">## net.sf.picard.metrics.StringHeader</td></tr><tr class="d1"><td colspan="2"># net.sf.picard.sam.MarkDuplicates INPUT=/tmp/6729253.1.all.q/tmpM5wI_h/database/files/001/dataset_1097.dat OUTPUT=/tmp/6729253.1.all.q/tmpM5wI_h/database/files/001/dataset_1098.dat METRICS_FILE=/udd/rerla/galaxy-central/database/job_working_directory/1032/dataset_1099_files/MarkDuplicates.metrics.txt REMOVE_DUPLICATES=true ASSUME_SORTED=true READ_NAME_REGEX=[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).* OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 VALIDATION_STRINGENCY=LENIENT MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=50000 MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=8000 TMP_DIR=/tmp/rerla VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false</td></tr><tr class="d0"><td colspan="2">## net.sf.picard.metrics.StringHeader</td></tr><tr class="d1"><td colspan="2"># Started on: Thu May 12 14:34:28 EDT 2011</td></tr><tr class="d0"><td colspan="2">## METRICS CLASS net.sf.picard.sam.DuplicationMetrics</td></tr><tr class="d0"><td>LIBRARY</td><td> </td></tr> +<tr class="d1"><td>UNPAIRED_READS_EXAMINED</td><td>1 </td></tr> +<tr class="d0"><td>READ_PAIRS_EXAMINED</td><td>3 </td></tr> +<tr class="d1"><td>UNMAPPED_READS</td><td>1 </td></tr> +<tr class="d0"><td>UNPAIRED_READ_DUPLICATES</td><td>1 </td></tr> +<tr class="d1"><td>READ_PAIR_DUPLICATES</td><td>1 </td></tr> +<tr class="d0"><td>READ_PAIR_OPTICAL_DUPLICATES</td><td>0 </td></tr> +<tr class="d1"><td>PERCENT_DUPLICATION</td><td>0.428571 </td></tr> +<tr class="d0"><td>ESTIMATED_LIBRARY_SIZE +</td><td>3 + </td></tr> +<tr class="d0"><td colspan="2">## HISTOGRAM java.lang.Double</td></tr><tr class="d0"><td colspan="2">BIN VALUE</td></tr> +<tr class="d1"><td colspan="2">1.0 0.948181</td></tr> +<tr class="d0"><td colspan="2">2.0 1.296997</td></tr> +<tr class="d1"><td colspan="2">3.0 1.425319</td></tr> +<tr class="d0"><td colspan="2">4.0 1.472527</td></tr> +<tr class="d1"><td colspan="2">5.0 1.489893</td></tr> +<tr class="d0"><td colspan="2">6.0 1.496282</td></tr> +<tr class="d1"><td colspan="2">7.0 1.498632</td></tr> +<tr class="d0"><td colspan="2">8.0 1.499497</td></tr> +<tr class="d1"><td colspan="2">9.0 1.499815</td></tr> +<tr class="d0"><td colspan="2">10.0 1.499932</td></tr> +<tr class="d1"><td colspan="2">11.0 1.499975</td></tr> +<tr class="d0"><td colspan="2">12.0 1.499991</td></tr> +<tr class="d1"><td colspan="2">13.0 1.499997</td></tr> +<tr class="d0"><td colspan="2">14.0 1.499999</td></tr> +<tr class="d1"><td colspan="2">15.0 1.5</td></tr> +<tr class="d0"><td colspan="2">16.0 1.5</td></tr> +<tr class="d1"><td colspan="2">17.0 1.5</td></tr> +<tr class="d0"><td colspan="2">18.0 1.5</td></tr> +<tr class="d1"><td colspan="2">19.0 1.5</td></tr> +<tr class="d0"><td colspan="2">20.0 1.5</td></tr> +<tr class="d1"><td colspan="2">21.0 1.5</td></tr> +<tr class="d0"><td colspan="2">22.0 1.5</td></tr> +<tr class="d1"><td colspan="2">23.0 1.5</td></tr> +<tr class="d0"><td colspan="2">24.0 1.5</td></tr> +<tr class="d1"><td colspan="2">25.0 1.5</td></tr> +<tr class="d0"><td colspan="2">26.0 1.5</td></tr> +<tr class="d1"><td colspan="2">27.0 1.5</td></tr> +<tr class="d0"><td colspan="2">28.0 1.5</td></tr> +<tr class="d1"><td colspan="2">29.0 1.5</td></tr> +<tr class="d0"><td colspan="2">30.0 1.5</td></tr> +<tr class="d1"><td colspan="2">31.0 1.5</td></tr> +<tr class="d0"><td colspan="2">32.0 1.5</td></tr> +<tr class="d1"><td colspan="2">33.0 1.5</td></tr> +<tr class="d0"><td colspan="2">34.0 1.5</td></tr> +<tr class="d1"><td colspan="2">35.0 1.5</td></tr> +<tr class="d0"><td colspan="2">36.0 1.5</td></tr> +<tr class="d1"><td colspan="2">37.0 1.5</td></tr> +<tr class="d0"><td colspan="2">38.0 1.5</td></tr> +<tr class="d1"><td colspan="2">39.0 1.5</td></tr> +<tr class="d0"><td colspan="2">40.0 1.5</td></tr> +<tr class="d1"><td colspan="2">41.0 1.5</td></tr> +<tr class="d0"><td colspan="2">42.0 1.5</td></tr> +<tr class="d1"><td colspan="2">43.0 1.5</td></tr> +<tr class="d0"><td colspan="2">44.0 1.5</td></tr> +<tr class="d1"><td colspan="2">45.0 1.5</td></tr> +<tr class="d0"><td colspan="2">46.0 1.5</td></tr> +<tr class="d1"><td colspan="2">47.0 1.5</td></tr> +<tr class="d0"><td colspan="2">48.0 1.5</td></tr> +<tr class="d1"><td colspan="2">49.0 1.5</td></tr> +<tr class="d0"><td colspan="2">50.0 1.5</td></tr> +<tr class="d1"><td colspan="2">51.0 1.5</td></tr> +<tr class="d0"><td colspan="2">52.0 1.5</td></tr> +<tr class="d1"><td colspan="2">53.0 1.5</td></tr> +<tr class="d0"><td colspan="2">54.0 1.5</td></tr> +<tr class="d1"><td colspan="2">55.0 1.5</td></tr> +<tr class="d0"><td colspan="2">56.0 1.5</td></tr> +<tr class="d1"><td colspan="2">57.0 1.5</td></tr> +<tr class="d0"><td colspan="2">58.0 1.5</td></tr> +<tr class="d1"><td colspan="2">59.0 1.5</td></tr> +<tr class="d0"><td colspan="2">60.0 1.5</td></tr> +<tr class="d1"><td colspan="2">61.0 1.5</td></tr> +<tr class="d0"><td colspan="2">62.0 1.5</td></tr> +<tr class="d1"><td colspan="2">63.0 1.5</td></tr> +<tr class="d0"><td colspan="2">64.0 1.5</td></tr> +<tr class="d1"><td colspan="2">65.0 1.5</td></tr> +<tr class="d0"><td colspan="2">66.0 1.5</td></tr> +<tr class="d1"><td colspan="2">67.0 1.5</td></tr> +<tr class="d0"><td colspan="2">68.0 1.5</td></tr> +<tr class="d1"><td colspan="2">69.0 1.5</td></tr> +<tr class="d0"><td colspan="2">70.0 1.5</td></tr> +<tr class="d1"><td colspan="2">71.0 1.5</td></tr> +<tr class="d0"><td colspan="2">72.0 1.5</td></tr> +<tr class="d1"><td colspan="2">73.0 1.5</td></tr> +<tr class="d0"><td colspan="2">74.0 1.5</td></tr> +<tr class="d1"><td colspan="2">75.0 1.5</td></tr> +<tr class="d0"><td colspan="2">76.0 1.5</td></tr> +<tr class="d1"><td colspan="2">77.0 1.5</td></tr> +<tr class="d0"><td colspan="2">78.0 1.5</td></tr> +<tr class="d1"><td colspan="2">79.0 1.5</td></tr> +<tr class="d0"><td colspan="2">80.0 1.5</td></tr> +<tr class="d1"><td colspan="2">81.0 1.5</td></tr> +<tr class="d0"><td colspan="2">82.0 1.5</td></tr> +<tr class="d1"><td colspan="2">83.0 1.5</td></tr> +<tr class="d0"><td colspan="2">84.0 1.5</td></tr> +<tr class="d1"><td colspan="2">85.0 1.5</td></tr> +<tr class="d0"><td colspan="2">86.0 1.5</td></tr> +<tr class="d1"><td colspan="2">87.0 1.5</td></tr> +<tr class="d0"><td colspan="2">88.0 1.5</td></tr> +<tr class="d1"><td colspan="2">89.0 1.5</td></tr> +<tr class="d0"><td colspan="2">90.0 1.5</td></tr> +<tr class="d1"><td colspan="2">91.0 1.5</td></tr> +<tr class="d0"><td colspan="2">92.0 1.5</td></tr> +<tr class="d1"><td colspan="2">93.0 1.5</td></tr> +<tr class="d0"><td colspan="2">94.0 1.5</td></tr> +<tr class="d1"><td colspan="2">95.0 1.5</td></tr> +<tr class="d0"><td colspan="2">96.0 1.5</td></tr> +<tr class="d1"><td colspan="2">97.0 1.5</td></tr> +<tr class="d0"><td colspan="2">98.0 1.5</td></tr> +<tr class="d1"><td colspan="2">99.0 1.5</td></tr> +</table> +<b>Picard Tool Run Log</b><hr/> +<pre>Thu, 12 May 2011 14:34:29 INFO + ## executing java -Xmx2g -jar /udd/rerla/galaxy-central/tool-data/shared/jars/MarkDuplicates.jar VALIDATION_STRINGENCY=LENIENT ASSUME_SORTED=true INPUT=/tmp/6729253.1.all.q/tmpM5wI_h/database/files/001/dataset_1097.dat OUTPUT=/tmp/6729253.1.all.q/tmpM5wI_h/database/files/001/dataset_1098.dat METRICS_FILE=/udd/rerla/galaxy-central/database/job_working_directory/1032/dataset_1099_files/MarkDuplicates.metrics.txt REMOVE_DUPLICATES=true READ_NAME_REGEX="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 returned status 0 and stderr: +[Thu May 12 14:34:28 EDT 2011] net.sf.picard.sam.MarkDuplicates INPUT=/tmp/6729253.1.all.q/tmpM5wI_h/database/files/001/dataset_1097.dat OUTPUT=/tmp/6729253.1.all.q/tmpM5wI_h/database/files/001/dataset_1098.dat METRICS_FILE=/udd/rerla/galaxy-central/database/job_working_directory/1032/dataset_1099_files/MarkDuplicates.metrics.txt REMOVE_DUPLICATES=true ASSUME_SORTED=true READ_NAME_REGEX=[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).* OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 VALIDATION_STRINGENCY=LENIENT MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=50000 MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=8000 TMP_DIR=/tmp/rerla VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +INFO 2011-05-12 14:34:28 MarkDuplicates Start of doWork freeMemory: 8644632; totalMemory: 9109504; maxMemory: 1908932608 +INFO 2011-05-12 14:34:28 MarkDuplicates Reading input file and constructing read end information. +INFO 2011-05-12 14:34:28 MarkDuplicates Will retain up to 7575129 data points before spilling to disk. +INFO 2011-05-12 14:34:28 MarkDuplicates Read 7 records. 0 pairs never matched. +INFO 2011-05-12 14:34:29 MarkDuplicates After buildSortedReadEndLists freeMemory: 8316168; totalMemory: 130351104; maxMemory: 1908932608 +INFO 2011-05-12 14:34:29 MarkDuplicates Will retain up to 59654144 duplicate indices before spilling to disk. +INFO 2011-05-12 14:34:29 MarkDuplicates Traversing read pair information and detecting duplicates. +INFO 2011-05-12 14:34:29 MarkDuplicates Traversing fragment information and detecting duplicates. +INFO 2011-05-12 14:34:29 MarkDuplicates Sorting list of duplicate records. +INFO 2011-05-12 14:34:29 MarkDuplicates After generateDuplicateIndexes freeMemory: 129615120; totalMemory: 607649792; maxMemory: 1908932608 +INFO 2011-05-12 14:34:29 MarkDuplicates Marking 3 records as duplicates. +INFO 2011-05-12 14:34:29 MarkDuplicates Found 0 optical duplicate clusters. +INFO 2011-05-12 14:34:29 MarkDuplicates Before output close freeMemory: 128997520; totalMemory: 607649792; maxMemory: 1908932608 +INFO 2011-05-12 14:34:29 MarkDuplicates After output close freeMemory: 128997280; totalMemory: 607649792; maxMemory: 1908932608 +[Thu May 12 14:34:29 EDT 2011] net.sf.picard.sam.MarkDuplicates done. +Runtime.totalMemory()=607649792 + + +</pre><hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> +generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool</div></body></html> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_output_markdups_sortedpairsam.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,63 @@ +<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> + <meta name="generator" content="Galaxy rgPicardMarkDups.py tool output - see http://g2.trac.bx.psu.edu/" /> + <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> + </head> + <body> + <div class="document"> + <h3><a href="http://rgenetics.org">Rgenetics</a> tool rgPicardMarkDups.py run at 19/11/2010 18:25:23</h3> + <b>Your job produced the following outputs - check here for a record of what was done and any unexpected events</b> + <hr /> + <div> + <b>Output files.</b> + <table> + <tr><td><a href="rgPicardMarkDups.txt">rgPicardMarkDups.txt (1.8 KB)</a></td></tr> + </table> + </div> + <hr /> + <div> + <b>Log of activity</b> + <hr/> + ## executing java -Xmx2g -jar /share/shared/relul.galaxy/tool-data/shared/jars/MarkDuplicates.jar I= /share/shared/relul.galaxy/database/files/000/dataset_57.dat O= /share/shared/relul.galaxy/database/files/000/dataset_99.dat M= rgPicardMarkDupsMetrics.txt READ_NAME_REGEX="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 returned status 1 and log (stdout/stderr) records: +<BR /> +[Fri Nov 19 18:25:23 EST 2010] net.sf.picard.sam.MarkDuplicates INPUT=/share/shared/relul.galaxy/database/files/000/dataset_57.dat OUTPUT=/share/shared/relul.galaxy/database/files/000/dataset_99.dat METRICS_FILE=rgPicardMarkDupsMetrics.txt READ_NAME_REGEX=[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).* OPTICAL_DUPLICATE_PIXEL_DISTANCE=100 REMOVE_DUPLICATES=false ASSUME_SORTED=false MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=50000 TMP_DIR=/tmp/relul VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +<BR /> +INFO 2010-11-19 18:25:23 MarkDuplicates Start of doWork freeMemory: 8645600; totalMemory: 9109504; maxMemory: 1908932608 +<BR /> +INFO 2010-11-19 18:25:23 MarkDuplicates Reading input file and constructing read end information. +<BR /> +INFO 2010-11-19 18:25:23 MarkDuplicates Will retain up to 7575129 data points before spilling to disk. +<BR /> +[Fri Nov 19 18:25:23 EST 2010] net.sf.picard.sam.MarkDuplicates done. +<BR /> +Runtime.totalMemory()=130351104 +<BR /> +Exception in thread "main" net.sf.picard.PicardException: /share/shared/relul.galaxy/database/files/000/dataset_57.dat is not coordinate sorted. +<BR /> + at net.sf.picard.sam.MarkDuplicates.buildSortedReadEndLists(MarkDuplicates.java:248) +<BR /> + at net.sf.picard.sam.MarkDuplicates.doWork(MarkDuplicates.java:109) +<BR /> + at net.sf.picard.cmdline.CommandLineProgram.instanceMain(CommandLineProgram.java:165) +<BR /> + at net.sf.picard.sam.MarkDuplicates.main(MarkDuplicates.java:93) +<BR /> + +<BR /> + + </div> + + <div> + <p>Note: The freely available + <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> + generated all outputs reported here. These third party tools were orchestrated by the Galaxy + rgPicardMarkDups.py wrapper and this command line from the Galaxy form: + </p> + <hr /> + <div>/share/shared/relul.galaxy/tools/development/rgPicardMarkDups.py -i /share/shared/relul.galaxy/database/files/000/dataset_57.dat -n Dupes Marked --tmp_dir /export/tmp/relul -o /share/shared/relul.galaxy/database/files/000/dataset_99.dat --remdups false --assumesorted true --readregex [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).* --optdupedist 100 -j /share/shared/relul.galaxy/tool-data/shared/jars/MarkDuplicates.jar -d /share/shared/relul.galaxy/database/job_working_directory/81/dataset_98_files -t /share/shared/relul.galaxy/database/files/000/dataset_98.dat</div> + </div> + </body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_output_validate_tiny_sam.html Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,46 @@ +<style type="text/css"> + tr.d0 td {background-color: oldlace; color: black;} + tr.d1 td {background-color: aliceblue; color: black;} + </style><?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<meta name="generator" content="Galaxy rgPicardValidate.py tool output - see http://g2.trac.bx.psu.edu/" /> +<title></title> +<link rel="stylesheet" href="/static/style/base.css" type="text/css" /> +</head> +<body> +<div class="document"> +<b><a href="http://rgenetics.org">Galaxy Rgenetics</a> tool output rgPicardValidate.py run at 19/04/2011 11:19:17</b><br/><b>Running this Galaxy tool produced the following output files (click the filename to view/download a copy).</b><hr/><table> +<tr><td><a href="rgPicardValidate.out">rgPicardValidate.out</a></td></tr> +</table><p/> +<b>Picard on line resources</b><ul> +<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li> +<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/> +<b>Picard output</b><hr/> +<table cellpadding="3" > +<tr class="d0"><td>['WARNING: Record 1, Read name both_reads_align_clip_marked, NM tag (nucleotide differences) is missing\n']</td></tr> +<tr class="d1"><td>['WARNING: Record 2, Read name both_reads_present_only_first_aligns, NM tag (nucleotide differences) is missing\n']</td></tr> +<tr class="d0"><td>['WARNING: Record 3, Read name read_2_too_many_gaps, NM tag (nucleotide differences) is missing\n']</td></tr> +<tr class="d1"><td>['ERROR: Record 4, Read name both_reads_align_clip_adapter, The record is out of [queryname] order, prior read name [read_2_too_many_gaps], prior coodinates [1:1]\n']</td></tr> +<tr class="d0"><td>['WARNING: Record 4, Read name both_reads_align_clip_adapter, NM tag (nucleotide differences) is missing\n']</td></tr> +<tr class="d1"><td>['WARNING: Record 5, Read name both_reads_align_clip_adapter, NM tag (nucleotide differences) is missing\n']</td></tr> +<tr class="d0"><td>['WARNING: Record 6, Read name both_reads_align_clip_marked, NM tag (nucleotide differences) is missing\n']</td></tr> +<tr class="d1"><td>['WARNING: Record 7, Read name read_2_too_many_gaps, NM tag (nucleotide differences) is missing\n']</td></tr> +<tr class="d0"><td>['ERROR: Record 8, Read name both_reads_present_only_first_aligns, The record is out of [queryname] order, prior read name [read_2_too_many_gaps], prior coodinates [1:302]\n']</td></tr> +</table> +<b>Picard log</b><hr/> +<pre>## executing samtools sort /udd/rerla/rgalaxy/database/job_working_directory/98/dataset_100_files/tmpELItj4rgSortBamTemp.bam /udd/rerla/rgalaxy/database/job_working_directory/98/dataset_100_files/rgcleansam.sorted returned status 0. Nothing appeared on stderr/stdout + +rectory/98/dataset_100_files/rgPicardValidate.out IGNORE=INVALID_TAG_NM MAX_OUTPUT=100 TMP_DIR=/tmp returned status 1 and log (stdout/stderr) records: +[Tue Apr 19 11:19:17 EDT 2011] net.sf.picard.sam.ValidateSamFile INPUT=/udd/rerla/rgalaxy/database/job_working_directory/98/dataset_100_files/rgcleansam.sorted.bam OUTPUT=/udd/rerla/rgalaxy/database/job_working_directory/98/dataset_100_files/rgPicardValidate.out IGNORE=[INVALID_TAG_NM] MAX_OUTPUT=100 REFERENCE_SEQUENCE=/share/shared/data/hg18/hg18.fasta TMP_DIR=/tmp MODE=VERBOSE IGNORE_WARNINGS=false VALIDATE_INDEX=true IS_BISULFITE_SEQUENCED=false MAX_OPEN_TEMP_FILES=8000 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +[Tue Apr 19 11:19:17 EDT 2011] net.sf.picard.sam.ValidateSamFile done. +Runtime.totalMemory()=9109504 + + +</pre><hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> +generated all outputs reported here, using this command line:<br/> +<pre>java -Xmx8g -jar /udd/rerla/rgalaxy/tool-data/shared/jars/ValidateSamFile.jar I=/udd/rerla/rgalaxy/database/job_working_directory/98/dataset_100_files/rgcleansam.sorted.bam R=/share/shared/data/hg18/hg18.fasta O=/udd/rerla/rgalaxy/database/job_working_directory/98/dataset_100_files/rgPicardValidate.out IGNORE=INVALID_TAG_NM MAX_OUTPUT=100 TMP_DIR=/tmp</pre> +</div></body></html> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/picard_summary_alignment_stats.sam Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,28 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:101 +@SQ SN:chr2 LN:101 +@SQ SN:chr3 LN:101 +@SQ SN:chr4 LN:101 +@SQ SN:chr5 LN:101 +@SQ SN:chr6 LN:101 +@SQ SN:chr7 LN:202 +@SQ SN:chr8 LN:202 +@RG ID:0 SM:Hi,Mom! +SL-XAV:1:1:0:764#0/1 89 chr1 1 255 101M * 0 0 TTCATGCTGANGCNCTCTTACGATCGTACAGATGCAAATATTAACANNCNTTNAAGNNCANNNNNNNNNCAATACAATANTAGAGTACGTNAACACTCCAN &/,&-.1/6/&&)&).)/,&0768)&/.,/874,&.4137572)&/&&,&1-&.0/&&*,&&&&&&&&&&18775799,&16:8775-56256/69::;0& RG:Z:0 XN:i:1 +SL-XAV:1:1:0:1668#0/2 153 chr2 1 255 101M * 0 0 CATCTCTACANGCGCGTCCTACCAGACGCGCTTCCGATCTGAGAGCATACTTTTCATTGGATTCCAGCACAACTCCATTTTTGATCCACTNGACACCTTTN (/,'-/'0////(1'&&1&&&&'2''-6/,/3-33653.6:1'.86/-++32.-4864653/5/583/346423203+28888644446688456/4880& RG:Z:0 XN:i:1 +SL-XAV:1:1:0:1914#0/2 153 chr3 1 255 101M * 0 0 CGTATGCGCTNTTTATGTCGCCCACAGTGCCTAGTATAGCCCCTGCTAATAAAAAGAGATGAATACGTTTACTTAAAAAACTGAAACTAGNAATGTGCAAN (0,7&&*/*0*,)10/).-*&.&*/6669.&-337599;3,&,6/.,5::999987893+387020775777547999::668997448:::9;999::0& RG:Z:0 +SL-XAV:1:1:0:1639#0/2 153 chr4 1 255 101M * 0 0 CGTGATACCANCTCATGTTCACAGCCAAAGCCTGAAGCTGTCTATTATATTTCTCAACCATAAACTTTTGCCTCAGGCATCCGCAGAATGNTTTGCAGCCN '.&.&&'.0+01'2(1'(''-)','+0041/.+032;:867115/5267-.0/)-5.&-26200224,,0+0/0275/5605688::646875568882*& RG:Z:0 +SL-XAV:1:1:0:68#0/2 137 chr5 1 255 101M * 0 0 NTCTCATTTANAAATGGTTATAAAAACATTTATGCTGAAAAGGTGAAGTTCATTAATGAACAGGCTGACTGTCTCACTATCGCGTTCGCANGACGTTATCT &1<<999;;;;<<<87579:556972789977444.'.023.&,7621/54.49.)/53055-22--''+(.'-))6-168/(3&&0(<).))*&&&&&'0 RG:Z:0 +SL-XAV:1:1:0:700#0/2 137 chr6 1 255 101M * 0 0 NAATTGTTCTNAGTTTCTCGGTTTATGTGCTCTTCCAGGTGGGTAACACAATAATGGCCTTCCAGATCGTAAGAGCGACGTGTGTTGCACNAGTGTCGATC &0::887::::6/646::838388811/679:87640&./2+/-4/28:3,536/4''&&.78/(/554/./02*)*',-(57()&.6(6:(0601'/(,* RG:Z:0 +SL-XAV:1:1:0:1721#0/1 83 chr7 1 255 101M = 102 40 CAACAGAAGGNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCGAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 +SL-XAV:1:1:0:1721#0/2 163 chr7 102 255 101M = 1 -40 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTCACTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 +SL-XAV:1:1:0:105#0/2 147 chr8 1 255 101M = 102 79 CACATCGTGANTCTTACAATCTGCGGTTTCAGATGTGGAGCGATGTGTGAGAGATTGAGCAACTGATCTGAAAAGCAGACACAGCTATTCNTAAGATGACN /))3--/&*()&)&&+'++.'-&,(.))'4,)&'&&,')8,&&*'.&*0'225/&)3-8//)*,5-*).7851453583.3568526:863688:::85.& RG:Z:0 +SL-XAV:1:1:0:105#0/1 99 chr8 102 255 101M = 1 -79 NCAGGTTCAANTGTGCAGCCCNTTTTGAGAGATNNNNNNNNTGNNCTGNAANANNGACACAGCTATTCCTAAGATGACAAGATCAGANAANAAGTCAAGCA &06665578::41.*/7577/&/77403-324.&&&&&&&&/.&&..&&.0&&&&',:9:/-/(55002020+3'12+2/&.2-&//&),&*&&&&&&&51 RG:Z:0 +SL-XAV:1:1:0:1300#0/1 77 * 0 0 * * 0 0 NAAACACAAGNNANAGTCTTANCNGCTATTCCNNNNNNNNNCTNNNCTNAGNANNACATACAACAGTATCCACACAAGTGTACTCGTNCANACATGTGAAC &*5535)*-,,&.&.*-1)*,&'&)&1&&.,)&&&&&&&&&)0&&&0'&&&&.&&*2'/4''0/**&)&,'-&*,&,&&&.0.&)&&&**&,.&&&')&&) RG:Z:0 +SL-XAV:1:1:0:1300#0/2 141 * 0 0 * * 0 0 NGATCATGGANGACTCTCCCCATCCCCCGCTCCAGCGCTCAGTTATATGCCTAGCCTCGGACACGTCACCAACATCTCACGCACTCTGCANAGTCTCTCAC &&'+''3*&-/)/1'26/*-2-/542-*&-&/'/*/&-'&)-')&.'-/&&2+122*'&+,(/-&)((,/-,,.'2(2'+)/&/&-66-&&/16&)&*&'3 RG:Z:0 +SL-XAV:1:1:0:1639#0/1 101 * 0 0 * chr1 1 0 NCCCTCTCAGNNTNTCTGCCANANCCTTAAGCNNNNNNNNNTANNNCTNAANCNNAAACTTTTGCCTCAGGCATCCGCAGAATGTTTNTCNGCCTATATCG &1::::::64/&/&0:3.280&/&087881,/&&&&&&&&&..&&&..&,,&-&&,265341-)/5680&-.5552-25/322/42/&)&&).421&-&-/ RG:Z:0 +SL-XAV:1:1:0:1668#0/1 101 * 0 0 * chr2 1 0 NATAGCATACNNTNCATTGGANTNCAGCACAANNNNNNNNNTGNNNCANTNNANNCCTTTGAGATCGGAAGAGCGGTTCAGCAGGAANNCNCAGACCGATC &1988998890&0&.8863//&.&.0-2875.&&&&&&&&&.)&&&..&.&&.&&.5782-2+262)&-0-0510*.332-2.-,0*&&*&'.&-2-)0., RG:Z:0 +SL-XAV:1:1:0:1914#0/1 101 * 0 0 * chr3 1 0 NTTTTTCTCCNNCNGTGCCTANTNTAGCCCCTNNNNNNNNNAANNNATNANNANNTTTACTTAAAAAACTGAAACTAGTAATGTGCANNANATCGNAAGAG &0::::<<;90&/&.244760&,&.414798/&&&&&&&&&00&&&0.&/&&-&&.4475687363504.&.557/.*)65.&/*./&&.&.+*)&..).& RG:Z:0 +SL-XAV:1:1:0:68#0/1 581 * 0 0 * chr4 1 0 NAATATTCATNNGNTCAGCCTNTNCATTAATTNNNNNNNNNTTNNNATNATNANNTTTTTTATAACCATTTATAAATGAGAGAGATCNTANCACAATATCA &0<<:::::</&&&.73'290&.&0;:::90&&&&&&&&&&..&&&0)&0-&0&&&.743799995253348597921.,.'050.*&.0&)*)&&&&*). RG:Z:0 +SL-XAV:1:1:0:700#0/1 581 * 0 0 * chr5 1 0 NGAAGCCCATNNTNGTGTTACNCNCCTGGAAGNNNNNNNNNACNNNGANACNTNNAACAATTCAGATCGGAAGAGCGGTTCAGCAGANNTNCCGAGACCGA &.88888:88/&0&,03189.&/&.8/))12/&&&&&&&&&./&&&&.&1.&)&&/35962/6432-3&),0&/2+0,),61&-6,&&&'&/,.0&...)0 RG:Z:0 +SL-XAV:1:1:0:764#0/2 165 * 0 0 * chr6 1 0 NACAGATGCANATATTAACAGGCTTTAAAGGACAGATGGACTGCAATACAATAATAGAGTACGTCAACACTCCACAGATCGCTAGAGCATNACATCGGTGT &/:5358::9999::99998255::7275,,/5567-'+387537857:54-4.51'31059547320;73/720+22.4(6.;((.;(;8()(''&&2&& RG:Z:0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/random_phiX_1.fastqsanger Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,80 @@ +@random_phiX_region_0 +TTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTT ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_1 +GTTAGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTA ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_2 +AATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTTTCCA ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_3 +GGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTC ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_4 +ACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGAC ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_5 +AGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACTTCCCA ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_6 +CACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGTGTTAATGC ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_7 +AACTACTCCGGTTATCGCTGGCGACTCCTTCGAGATGGACGCCGTTGGCG ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_8 +CTCGTGCTCGTCGCTGCGTTGAGGCTTGCGTTTATGGTACGCTGGACTTT ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_9 +CAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGACTTAGTTCA ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_10 +TACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTC ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_11 +TATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGACTAAAGAGA ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_12 +AGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATA ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_13 +TTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGACTAAAGAGATTCA ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_14 +CCGGGCAATAACGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGC ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_15 +AACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTT ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_16 +GCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCA ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_17 +CATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAG ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_18 +TGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@random_phiX_region_19 +GCTGTCGCTACTTCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACT ++ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/picard_index.loc.sample Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,26 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Picard dict and associated files. You will need +#to create these data files and then create a picard_index.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The picard_index.loc +#file has this format (longer white space is the TAB character): +# +#<unique_build_id> <dbkey> <display_name> <fasta_file_path> +# +#So, for example, if you had hg18 indexed and stored in +#/depot/data2/galaxy/srma/hg18/, +#then the srma_index.loc entry would look like this: +# +#hg18 hg18 hg18 Pretty /depot/data2/galaxy/picard/hg18/hg18.fa +# +#and your /depot/data2/galaxy/srma/hg18/ directory +#would contain the following three files: +#hg18.fa +#hg18.dict +#hg18.fa.fai +# +#The dictionary file for each reference (ex. hg18.dict) must be +#created via Picard (http://picard.sourceforge.net). Note that +#the dict file does not have the .fa extension although the +#path list in the loc file does include it. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Location of Picard dict file and other files --> + <table name="picard_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/picard_index.loc" /> + </table> +</tables> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Aug 13 12:09:14 2013 -0400 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="picard" version="1.56.0"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://downloads.sourceforge.net/project/picard/picard-tools/1.56/picard-tools-1.56.zip</action> + <action type="move_directory_files"> + <source_directory>picard-tools-1.56</source_directory> + <destination_directory>$INSTALL_DIR/jars</destination_directory> + </action> + <action type="set_environment"> + <environment_variable name="JAVA_JAR_PATH" action="set_to">$INSTALL_DIR/jars</environment_variable> + </action> + </actions> + </install> + <readme> + </readme> + </package> +</tool_dependency>