# HG changeset patch # User greg # Date 1630094180 0 # Node ID b34843f09f9f4eb02a1526cccc91b94cb18ca9ba # Parent 61239720da389cc074478b123b78c2ce1a4ef85c Uploaded diff -r 61239720da38 -r b34843f09f9f .shed.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Fri Aug 27 19:56:20 2021 +0000 @@ -0,0 +1,13 @@ +name: vsnp_statistics +owner: greg +description: | + Contains a tool that produces an Excel spreadsheet containing statistics for samples and associated metrics files. +homepage_url: https://github.com/USDA-VS/vSNP +long_description: | + Contains a tool Accepts a single fastqsanger sample, a set of paired read samples, or a collections of samples + along with associated SAMtools idxstats and vSNP zero coverage metrics files and extracts information from them + to produce an Excel spreadsheet containing statistics for each sample. +remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_statistics +type: unrestricted +categories: + - Sequence Analysis diff -r 61239720da38 -r b34843f09f9f test-data/vsnp_statistics1.tabular --- a/test-data/vsnp_statistics1.tabular Tue Aug 24 12:44:52 2021 +0000 +++ b/test-data/vsnp_statistics1.tabular Fri Aug 27 19:56:20 2021 +0000 @@ -1,2 +1,2 @@ -# Reference FASTQ File Size Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count -89 Mcap_Deer_DE_SRR650221_fastq_gz 1.6 MB 121.0 29.7 0.53 4317 17063 223 0.05 8.27% 0.439436 36 +FASTQ File Size Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count Reference +Mcap_Deer_DE_SRR650221_fastq_gz 1.6 MB 121.0 29.7 0.53 4317 17063 223 0.05 8.27% 0.439436 36 89 diff -r 61239720da38 -r b34843f09f9f test-data/vsnp_statistics2.tabular --- a/test-data/vsnp_statistics2.tabular Tue Aug 24 12:44:52 2021 +0000 +++ b/test-data/vsnp_statistics2.tabular Fri Aug 27 19:56:20 2021 +0000 @@ -1,2 +1,2 @@ -# Reference Read1 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Read2 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count -89 13-1941-6_S4_L001_R1_600000_fastq_gz 8.7 KB 25 100.0 65.7 1.00 13-1941-6_S4_L001_R2_600000_fastq_gz 8.5 KB 25 100.0 66.3 1.00 50 45 5 0.10 98.74% 10.338671 611 +Read1 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Read2 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count Reference +13-1941-6_S4_L001_R1_600000_fastq_gz 8.7 KB 25 100.0 65.7 1.00 13-1941-6_S4_L001_R2_600000_fastq_gz 8.5 KB 25 100.0 66.3 1.00 50 45 5 0.10 98.74% 10.338671 611 89 diff -r 61239720da38 -r b34843f09f9f test-data/vsnp_statistics4.tabular --- a/test-data/vsnp_statistics4.tabular Tue Aug 24 12:44:52 2021 +0000 +++ b/test-data/vsnp_statistics4.tabular Fri Aug 27 19:56:20 2021 +0000 @@ -1,2 +1,2 @@ -# Reference Read1 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Read2 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count -89 Unnamed Collection_R1 8.7 KB 25 100.0 65.7 1.00 Unnamed Collection_R2 8.5 KB 25 100.0 66.3 1.00 50 46 4 0.08 0.16% 0.002146 0 +Read1 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Read2 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count Reference +Unnamed Collection_R1 8.7 KB 25 100.0 65.7 1.00 Unnamed Collection_R2 8.5 KB 25 100.0 66.3 1.00 50 46 4 0.08 0.16% 0.002146 0 89 diff -r 61239720da38 -r b34843f09f9f vsnp_statistics.py --- a/vsnp_statistics.py Tue Aug 24 12:44:52 2021 +0000 +++ b/vsnp_statistics.py Fri Aug 27 19:56:20 2021 +0000 @@ -100,23 +100,22 @@ def output_statistics(read1_stats, read2_stats, idxstats_file, metrics_file, output_file): paired_reads = read2_stats is not None if paired_reads: - columns = ['Reference', 'Read1 FASTQ', 'File Size', 'Reads', 'Mean Read Length', 'Mean Read Quality', + columns = ['Read1 FASTQ', 'File Size', 'Reads', 'Mean Read Length', 'Mean Read Quality', 'Reads Passing Q30', 'Read2 FASTQ', 'File Size', 'Reads', 'Mean Read Length', 'Mean Read Quality', 'Reads Passing Q30', 'Total Reads', 'All Mapped Reads', 'Unmapped Reads', 'Unmapped Reads Percentage of Total', 'Reference with Coverage', 'Average Depth of Coverage', - 'Good SNP Count'] + 'Good SNP Count', 'Reference'] else: - columns = ['Reference', 'FASTQ', 'File Size', 'Mean Read Length', 'Mean Read Quality', 'Reads Passing Q30', + columns = ['FASTQ', 'File Size', 'Mean Read Length', 'Mean Read Quality', 'Reads Passing Q30', 'Total Reads', 'All Mapped Reads', 'Unmapped Reads', 'Unmapped Reads Percentage of Total', - 'Reference with Coverage', 'Average Depth of Coverage', 'Good SNP Count'] + 'Reference with Coverage', 'Average Depth of Coverage', 'Good SNP Count', 'Reference'] with open(output_file, "w") as outfh: # Make sure the header starts with a # so # MultiQC can properly handle the output. - outfh.write("# %s\n" % "\t".join(columns)) + outfh.write("%s\n" % "\t".join(columns)) line_items = [] # Get the current stats and associated files. # Get and output the statistics. - line_items.append(read1_stats.reference) line_items.append(read1_stats.fastq_file) line_items.append(read1_stats.file_size) if paired_reads: @@ -152,6 +151,7 @@ line_items.append(ref_with_coverage) line_items.append(avg_depth_of_coverage) line_items.append(good_snp_count) + line_items.append(read1_stats.reference) outfh.write('%s\n' % '\t'.join(str(x) for x in line_items))