Mercurial > repos > jay > pdaug_fishers_plot
comparison PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py @ 5:7a9f6ac448bc draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
| author | jay |
|---|---|
| date | Tue, 12 Jan 2021 18:35:36 +0000 |
| parents | 3bde5f0d28d9 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:3bde5f0d28d9 | 5:7a9f6ac448bc |
|---|---|
| 1 import os | 1 |
| 2 import argparse | 2 import pandas as pd |
| 3 | 3 |
| 4 | 4 |
| 5 def TSVtoFASTA(InFile, Method, Positive, Negative, OutFile): | 5 def TSVtoFASTA(infile, method, firstdatafile, seconddatafile, outfile, clmpepid, slcclasslabel, peps): |
| 6 | 6 |
| 7 if Method == 'WithClassLabel': | |
| 8 | 7 |
| 9 f = open(InFile) | 8 fn = [firstdatafile, seconddatafile] |
| 10 lines = f.readlines() | |
| 11 | 9 |
| 12 of1 = open(Positive,'w') | |
| 13 of2 = open(Negative,'w') | |
| 14 | 10 |
| 15 n = 0 | 11 df = pd.read_csv(infile, sep="\t") |
| 16 m = 0 | 12 if clmpepid == None: |
| 13 pass | |
| 14 else: | |
| 15 names = df[clmpepid].tolist() | |
| 16 | |
| 17 peps = df[peps].tolist() | |
| 18 | |
| 19 if method == "withoutlabel": | |
| 20 f = open(outfile,'w') | |
| 21 if clmpepid is not None: | |
| 22 for i,n in enumerate(peps): | |
| 23 f.write(">"+names[i]+'\n') | |
| 24 f.write(n+'\n') | |
| 25 f.close() | |
| 26 else: | |
| 27 for i,n in enumerate(peps): | |
| 28 f.write(">"+str(i)+'\n') | |
| 29 f.write(n+'\n') | |
| 30 f.close() | |
| 31 | |
| 32 elif method == "withlabel": | |
| 33 labels = df[slcclasslabel].tolist() | |
| 34 | |
| 35 label = list(set(labels)) | |
| 17 | 36 |
| 18 l = [] | 37 if clmpepid is None: |
| 19 | 38 for i, l in enumerate(label): |
| 20 for line in lines[1:]: | 39 f = open(fn[i],'w') |
| 21 l.append(line.split('\t')[1].strip('\n').strip('\r')) | 40 print('ok1') |
| 22 l = list(set(l)) | 41 for i, L in enumerate(labels): |
| 23 | 42 if l == L: |
| 24 print(l) | 43 f.write(">"+str(i)+"_"+str(l)+'\n') |
| 25 | 44 f.write(peps[i]+'\n') |
| 26 for line in lines: | 45 f.close() |
| 27 | 46 else: |
| 28 if l[1] in line.split('\t')[1].strip('\n').strip('\r'): | 47 for i, l in enumerate(label): |
| 29 n= n+1 | 48 f = open(fn[i],'w') |
| 30 of1.write('>peptide_'+str(n)+'_'+str(l[1])+'\n') | 49 for i, L in enumerate(labels): |
| 31 of1.write(line.split('\t')[0]+'\n') | 50 if l == L: |
| 32 | 51 f.write(">"+names[i]+"_"+l+'\n') |
| 33 if l[0] in line.split('\t')[1].strip('\n').strip('\r'): | 52 f.write(peps[i]+'\n') |
| 34 m= m+1 | 53 f.close() |
| 35 of2.write('>peptide_'+str(m)+'_'+str(l[0])+'\n') | |
| 36 of2.write(line.split('\t')[0]+'\n') | |
| 37 | |
| 38 elif Method == 'NoClassLabel': | |
| 39 | |
| 40 f = open(InFile) | |
| 41 lines = f.readlines() | |
| 42 of1 = open(OutFile,'w') | |
| 43 | |
| 44 for i, line in enumerate(lines[1:]): | |
| 45 of1.write('>peptide_'+str(i)+'\n') | |
| 46 of1.write(line.split('\t')[0]+'\n') | |
| 47 | |
| 48 else: | |
| 49 pass | |
| 50 | 54 |
| 51 if __name__=="__main__": | 55 if __name__=="__main__": |
| 52 | 56 |
| 53 import argparse | 57 import argparse |
| 54 | |
| 55 parser = argparse.ArgumentParser() | 58 parser = argparse.ArgumentParser() |
| 56 | |
| 57 parser.add_argument("-I", "--InFile", required=True, default=None, help=".fasta or .tsv") | 59 parser.add_argument("-I", "--InFile", required=True, default=None, help=".fasta or .tsv") |
| 58 parser.add_argument("-P", "--Postvs", required=False, default='FirstDataFile.fasta', help="Path to target tsv file") | 60 parser.add_argument("-F", "--FirstDataFile", required=False, default='FirstDataFile.fasta', help="Path to target tsv file") |
| 59 parser.add_argument("-N", "--Negtvs", required=False, default='SecondDataFile.fasta', help="Path to target tsv file") | 61 parser.add_argument("-S", "--SecondDataFile", required=False, default='SecondDataFile.fasta', help="Path to target tsv file") |
| 60 parser.add_argument("-O", "--OutFile", required=False, default='OutFile.fasta', help="Path to target tsv file") | 62 parser.add_argument("-O", "--OutFile", required=False, default='OutFile.fasta', help="Path to target tsv file") |
| 61 parser.add_argument("-M", "--Method", required=True, default=None, help="Path to target tsv file") | 63 parser.add_argument("-M", "--Method", required=True, default=None, help="Path to target tsv file") |
| 64 parser.add_argument("-C", "--ClmPepID", required=False, default=None, help="Peptide Column Name") | |
| 65 parser.add_argument("-L", "--SlcClassLabel", required=False, default="Class_label", help="Class Label Column Name") | |
| 66 parser.add_argument("-P", "--PeptideColumn", required=True, default=None, help="Class Label Column Name") | |
| 62 args = parser.parse_args() | 67 args = parser.parse_args() |
| 63 | 68 |
| 64 TSVtoFASTA(args.InFile, args.Method, args.Postvs, args.Negtvs, args.OutFile) | 69 TSVtoFASTA(args.InFile, args.Method, args.FirstDataFile, args.SecondDataFile, args.OutFile, args.ClmPepID, args.SlcClassLabel, args.PeptideColumn) |
| 70 | |
| 71 | |
| 72 |
