Mercurial > repos > qfabrepo > metadegalaxy_pear_stats
changeset 0:ec62f17fcfe6 draft default tip
"planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
author | qfabrepo |
---|---|
date | Mon, 14 Sep 2020 04:50:28 +0000 |
parents | |
children | |
files | pear_stats.py pear_stats.xml test-data/F3D0.log test-data/F3D1.log test-data/test_output.txt |
diffstat | 5 files changed, 242 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pear_stats.py Mon Sep 14 04:50:28 2020 +0000 @@ -0,0 +1,61 @@ +#!/usr/bin/env python +import sys +import argparse + +parser = argparse.ArgumentParser( + description="Parse multiple Pear statistic log to a tabular format\n" + + "Example:\n python pear_stats.py -i \"file1.log,file2.log\" -s \"samplename1 samplename2\" -o outputfile") +parser.add_argument("-v","--version",action="version",version="%(prog)s 1.0") +parser.add_argument("-i","--input",dest="inputfilelist",default=False,help="a list of input file") +parser.add_argument("-s","--samplename", dest="samplename",default=False,help="a list of input filename") +parser.add_argument("-o","--outfile",dest="outputfile",default=False,help="Pear statistic output") + + +if(len(sys.argv) == 1): + parser.print_help(sys.stderr) + sys.exit() + +args = parser.parse_args() + +tags = ['Assembled reads','Discarded reads','Not assembled reads'] +LINESTART=30 +LINEEND =LINESTART+2 + + +inputfiles=args.inputfilelist.split(',') +inputfilenames=args.samplename.split(',') +outputfile=open(args.outputfile,'w') + +allAssembled = 0 + +def processfile(instr): + result=[] + with open(instr,'r') as f: + for linenum,line in enumerate(f): + if LINESTART <= linenum <= LINEEND: + ix = linenum-LINESTART + if (line.startswith(tags[ix])): + result.append(line.rstrip()) + if (ix == 0): + token = line.strip().split('(')[1] + token = token.replace("%)","") + global allAssembled + allAssembled += float(token) + else: + print("ARGH!:", line) + return(result) + +for element in range(0,len(inputfiles)): + output=processfile(inputfiles[element]) + output.insert(0,inputfilenames[element]) + outputfile.write("\t".join(output)) + outputfile.write("\n") + +averageAssembled = allAssembled / len(inputfiles) + +averageAssembledOut=["The above assessment has been performed on 1000 randomly selected reads per sample file.\nAverage % of overlapping paired-end reads =",str(averageAssembled),"\nIf the average percentage is greater than 50%, you can consider using workflow 16S_biodiversity_for_overlap_PE.\nHowever, if the average percentage is less than 50%, use 16S_biodiversity_nonoverlap_PE."] + + +outputfile.write("\n\n\n") +outputfile.write("\t".join(averageAssembledOut)) +outputfile.close()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pear_stats.xml Mon Sep 14 04:50:28 2020 +0000 @@ -0,0 +1,100 @@ +<tool id="pearStat" name="PEAR Statistics" version="1.0.0"> + <description>Generate paired-end reads overlap Statistic from PEAR log file</description> + <version_command> + python ${__tool_directory__}/pear_stats.py --version + </version_command> + <command detect_errors="aggressive"><![CDATA[ + #set files="" + #for $index,$file in enumerate($input): + #set $files += str($file)+"," + #end for + #set $files=$files[:-1] + + #set names=','.join( [ str( $name ) for $name in $input.keys() ]) + + python $__tool_directory__/pear_stats.py -i "$files" -s "$names" -o $output + ]]> </command> + <inputs> + <param name="input" type="data_collection" format="txt" collection_type="list" label="PEAR LOG FILE"/> + </inputs> + <outputs> + <!--<data format="tabular" name="output" label="${tool.name}.${input.display_name}"/>--> + <data format="tabular" name="output" label="${tool.name}.log"/> + </outputs> + <tests> + <test> + <param name="input"> + <collection type="list"> + <element name="F3D0" value="F3D0.log" /> + <element name="F3D1" value="F3D1.log" /> + </collection> + </param> + <output name="output" file="test_output.txt"/> + </test> + </tests> + + <help> +** what it does ** + +Creates summary log file by extracting "Assembled reads", "Discarded reads" and "Not assembled reads" from PEAR_ log. + +.. _PEAR: https://sco.h-its.org/exelixis/web/software/pear/doc.html + +----- + +======= +Example +======= + +PEAR v0.9.6 [January 15, 2015] + +Citation - PEAR: a fast and accurate Illumina Paired-End reAd mergeR +Zhang et al (2014) Bioinformatics 30(5): 614-620 | doi:10.1093/bioinformatics/btt593 + +==================================== ======================================= +Field Parameter and output +------------------------------------ --------------------------------------- +Forward reads file.................: /mnt/galaxy/files/014/dataset_14938.dat +Reverse reads file.................: /mnt/galaxy/files/014/dataset_14939.dat +PHRED..............................: 33 +Using empirical frequencies........: NO +Statistical method.................: OES +Maximum assembly length............: 999999 +Minimum assembly length............: 50 +p-value............................: 0.010000 +Quality score threshold (trimming).: 0 +Minimum read size after trimming...: 1 +Maximal ratio of uncalled bases....: 1.000000 +Minimum overlap....................: 10 +Scoring method.....................: Scaled score +Threads............................: 2 + +Allocating memory..................: 200,000,000 bytes +Assemblying reads: 0% +Assemblying reads: 100% + +Assembled reads ...................: 906 / 990 (91.515%) +Discarded reads ...................: 0 / 990 (0.000%) +Not assembled reads ...............: 84 / 990 (8.485%) +Assembled reads file...............: pear.assembled.fastq +Discarded reads file...............: pear.discarded.fastq +Unassembled forward reads file.....: pear.unassembled.forward.fastq +Unassembled reverse reads file.....: pear.unassembled.reverse.fastq +==================================== ======================================= + + +----- + +========= +Resources +========= + +**Wrapper Authors** + +QFAB Bioinformatics (support@qfab.org) + + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btt593</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/F3D0.log Mon Sep 14 04:50:28 2020 +0000 @@ -0,0 +1,36 @@ + ____ _____ _ ____ +| _ \| ____| / \ | _ \ +| |_) | _| / _ \ | |_) | +| __/| |___ / ___ \| _ < +|_| |_____/_/ \_\_| \_\ + +PEAR v0.9.10 [May 30, 2016] + +Citation - PEAR: a fast and accurate Illumina Paired-End reAd mergeR +Zhang et al (2014) Bioinformatics 30(5): 614-620 | doi:10.1093/bioinformatics/btt593 + +Forward reads file.................: /mnt/galaxy/files/011/dataset_11542.dat +Reverse reads file.................: /mnt/galaxy/files/011/dataset_11543.dat +PHRED..............................: 33 +Using empirical frequencies........: NO +Statistical method.................: OES +Maximum assembly length............: 999999 +Minimum assembly length............: 50 +p-value............................: 0.010000 +Quality score threshold (trimming).: 0 +Minimum read size after trimming...: 1 +Maximal ratio of uncalled bases....: 1.000000 +Minimum overlap....................: 10 +Scoring method.....................: Scaled score +Threads............................: 2 + +Allocating memory..................: 200,000,000 bytes +Assemblying reads: 0% Assemblying reads: 100% + +Assembled reads ...................: 91 / 99 (91.919%) +Discarded reads ...................: 0 / 99 (0.000%) +Not assembled reads ...............: 8 / 99 (8.081%) +Assembled reads file...............: pear.assembled.fastq +Discarded reads file...............: pear.discarded.fastq +Unassembled forward reads file.....: pear.unassembled.forward.fastq +Unassembled reverse reads file.....: pear.unassembled.reverse.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/F3D1.log Mon Sep 14 04:50:28 2020 +0000 @@ -0,0 +1,36 @@ + ____ _____ _ ____ +| _ \| ____| / \ | _ \ +| |_) | _| / _ \ | |_) | +| __/| |___ / ___ \| _ < +|_| |_____/_/ \_\_| \_\ + +PEAR v0.9.10 [May 30, 2016] + +Citation - PEAR: a fast and accurate Illumina Paired-End reAd mergeR +Zhang et al (2014) Bioinformatics 30(5): 614-620 | doi:10.1093/bioinformatics/btt593 + +Forward reads file.................: /mnt/galaxy/files/011/dataset_11546.dat +Reverse reads file.................: /mnt/galaxy/files/011/dataset_11547.dat +PHRED..............................: 33 +Using empirical frequencies........: NO +Statistical method.................: OES +Maximum assembly length............: 999999 +Minimum assembly length............: 50 +p-value............................: 0.010000 +Quality score threshold (trimming).: 0 +Minimum read size after trimming...: 1 +Maximal ratio of uncalled bases....: 1.000000 +Minimum overlap....................: 10 +Scoring method.....................: Scaled score +Threads............................: 2 + +Allocating memory..................: 200,000,000 bytes +Assemblying reads: 0% Assemblying reads: 100% + +Assembled reads ...................: 89 / 97 (91.753%) +Discarded reads ...................: 0 / 97 (0.000%) +Not assembled reads ...............: 8 / 97 (8.247%) +Assembled reads file...............: pear.assembled.fastq +Discarded reads file...............: pear.discarded.fastq +Unassembled forward reads file.....: pear.unassembled.forward.fastq +Unassembled reverse reads file.....: pear.unassembled.reverse.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_output.txt Mon Sep 14 04:50:28 2020 +0000 @@ -0,0 +1,9 @@ +F3D0 Assembled reads ...................: 91 / 99 (91.919%) Discarded reads ...................: 0 / 99 (0.000%) Not assembled reads ...............: 8 / 99 (8.081%) +F3D1 Assembled reads ...................: 89 / 97 (91.753%) Discarded reads ...................: 0 / 97 (0.000%) Not assembled reads ...............: 8 / 97 (8.247%) + + + +The above assessment has been performed on 1000 randomly selected reads per sample file. +Average % of overlapping paired-end reads = 91.836 +If the average percentage is greater than 50%, you can consider using workflow 16S_biodiversity_for_overlap_PE. +However, if the average percentage is less than 50%, use 16S_biodiversity_nonoverlap_PE. \ No newline at end of file