Mercurial > repos > qfabrepo > metadegalaxy_pear_stats
comparison pear_stats.py @ 0:ec62f17fcfe6 draft default tip
"planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/pear_stats commit 0db3cb4e9a87400bb2f8402ffc23334e24ad4b4e"
| author | qfabrepo |
|---|---|
| date | Mon, 14 Sep 2020 04:50:28 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ec62f17fcfe6 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import sys | |
| 3 import argparse | |
| 4 | |
| 5 parser = argparse.ArgumentParser( | |
| 6 description="Parse multiple Pear statistic log to a tabular format\n" + | |
| 7 "Example:\n python pear_stats.py -i \"file1.log,file2.log\" -s \"samplename1 samplename2\" -o outputfile") | |
| 8 parser.add_argument("-v","--version",action="version",version="%(prog)s 1.0") | |
| 9 parser.add_argument("-i","--input",dest="inputfilelist",default=False,help="a list of input file") | |
| 10 parser.add_argument("-s","--samplename", dest="samplename",default=False,help="a list of input filename") | |
| 11 parser.add_argument("-o","--outfile",dest="outputfile",default=False,help="Pear statistic output") | |
| 12 | |
| 13 | |
| 14 if(len(sys.argv) == 1): | |
| 15 parser.print_help(sys.stderr) | |
| 16 sys.exit() | |
| 17 | |
| 18 args = parser.parse_args() | |
| 19 | |
| 20 tags = ['Assembled reads','Discarded reads','Not assembled reads'] | |
| 21 LINESTART=30 | |
| 22 LINEEND =LINESTART+2 | |
| 23 | |
| 24 | |
| 25 inputfiles=args.inputfilelist.split(',') | |
| 26 inputfilenames=args.samplename.split(',') | |
| 27 outputfile=open(args.outputfile,'w') | |
| 28 | |
| 29 allAssembled = 0 | |
| 30 | |
| 31 def processfile(instr): | |
| 32 result=[] | |
| 33 with open(instr,'r') as f: | |
| 34 for linenum,line in enumerate(f): | |
| 35 if LINESTART <= linenum <= LINEEND: | |
| 36 ix = linenum-LINESTART | |
| 37 if (line.startswith(tags[ix])): | |
| 38 result.append(line.rstrip()) | |
| 39 if (ix == 0): | |
| 40 token = line.strip().split('(')[1] | |
| 41 token = token.replace("%)","") | |
| 42 global allAssembled | |
| 43 allAssembled += float(token) | |
| 44 else: | |
| 45 print("ARGH!:", line) | |
| 46 return(result) | |
| 47 | |
| 48 for element in range(0,len(inputfiles)): | |
| 49 output=processfile(inputfiles[element]) | |
| 50 output.insert(0,inputfilenames[element]) | |
| 51 outputfile.write("\t".join(output)) | |
| 52 outputfile.write("\n") | |
| 53 | |
| 54 averageAssembled = allAssembled / len(inputfiles) | |
| 55 | |
| 56 averageAssembledOut=["The above assessment has been performed on 1000 randomly selected reads per sample file.\nAverage % of overlapping paired-end reads =",str(averageAssembled),"\nIf the average percentage is greater than 50%, you can consider using workflow 16S_biodiversity_for_overlap_PE.\nHowever, if the average percentage is less than 50%, use 16S_biodiversity_nonoverlap_PE."] | |
| 57 | |
| 58 | |
| 59 outputfile.write("\n\n\n") | |
| 60 outputfile.write("\t".join(averageAssembledOut)) | |
| 61 outputfile.close() |
