Mercurial > repos > jay > pdaug_peptide_core_descriptors
diff PDAUG_Peptide_Core_Functions/PDAUG_Peptide_Core_Functions.py @ 0:0fc091fb7e8f draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author | jay |
---|---|
date | Wed, 28 Oct 2020 02:12:57 +0000 |
parents | |
children | 5820d2486b0d |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PDAUG_Peptide_Core_Functions/PDAUG_Peptide_Core_Functions.py Wed Oct 28 02:12:57 2020 +0000 @@ -0,0 +1,168 @@ +from modlamp.core import BaseSequence +import pandas as pd +import os, sys +import argparse + + +parser = argparse.ArgumentParser(description='Deployment tool') +subparsers = parser.add_subparsers() + +mutateAA = subparsers.add_parser('mutateAA') +mutateAA.add_argument("-I","--InFile", required=True, default=None, help="Input fasta sequence") +mutateAA.add_argument("-N","--nr", required=True, default=None, help="Number of mutations to perform per sequence") +mutateAA.add_argument("-P","--Prob", required=True, default=None, help="Probability of mutating a sequence") +mutateAA.add_argument("-F","--FastOut", required=False, default='Out.fasta', help="Mutated output fasta") + +filterduplicates = subparsers.add_parser('filterduplicates') +filterduplicates.add_argument("-I","--InFile", required=True, default=None, help="Input file") +filterduplicates.add_argument("-F","--FastOut", required=False, default='Out.fasta', help="Output file") + + +keepnaturalaa = subparsers.add_parser('keepnaturalaa') +keepnaturalaa.add_argument("-I","--InFile", required=True, default=None, help="Inputt file") +keepnaturalaa.add_argument("-F","--FastOut", required=False, default='Out.fasta', help="Output file") + + +filteraa = subparsers.add_parser('filteraa') +filteraa.add_argument("-I","--InFile", required=True, default=None, help="Input file") +filteraa.add_argument("-F","--FastOut", required=False, default='Out.fasta', help="Output file") +filteraa.add_argument("-A","--FilterAA", required=True, default=None, help="Filter amino acide") + +args = parser.parse_args() + +if sys.argv[1] == 'mutateAA': + + Pep = [] + Index = [] + + f = open(args.InFile) + lines = f.readlines() + + for line in lines: + if '>' in line: + line = line.strip('\n') + line = line.strip('\r') + Index.append(line) + else: + line = line.strip('\n') + line = line.strip('\r') + Pep.append(line) + + b = BaseSequence(len(Pep)) + b.sequences = Pep + b.mutate_AA(int(args.nr), float(args.Prob)) + OutPep = b.sequences + + OutFasta = open(args.FastOut, 'w') + + for i,O in enumerate(OutPep): + + OutFasta.write(Index[i]+'\n') + OutFasta.write(O+'\n') + + +elif sys.argv[1] == 'filterduplicates': + + Pep = [] + Index = [] + + f = open(args.InFile) + lines = f.readlines() + + for line in lines: + if '>' in line: + line = line.strip('\n') + line = line.strip('\r') + Index.append(line) + else: + line = line.strip('\n') + line = line.strip('\r') + Pep.append(line) + + b = BaseSequence(len(Pep)) + + b.sequences = Pep + + b.filter_duplicates() + + OutPep = b.sequences + + OutFasta = open(args.FastOut, 'w') + + for i,O in enumerate(OutPep): + + OutFasta.write(Index[i]+'\n') + OutFasta.write(O+'\n') + + +elif sys.argv[1] == 'keepnaturalaa': + + Pep = [] + Index = [] + + f = open(args.InFile) + lines = f.readlines() + + for line in lines: + if '>' in line: + line = line.strip('\n') + line = line.strip('\r') + Index.append(line) + else: + line = line.strip('\n') + line = line.strip('\r') + Pep.append(line) + + b = BaseSequence(len(Pep)) + b.sequences = Pep + b.keep_natural_aa() + + OutFasta = open(args.FastOut, 'w') + + OutPep = b.sequences + + for i,O in enumerate(OutPep): + + OutFasta.write(Index[i]+'\n') + OutFasta.write(O+'\n') + + +elif sys.argv[1] == 'filteraa': + + Pep = [] + Index = [] + + f = open(args.InFile) + lines = f.readlines() + + for line in lines: + if '>' in line: + line = line.strip('\n') + line = line.strip('\r') + Index.append(line) + else: + line = line.strip('\n') + line = line.strip('\r') + Pep.append(line) + + + b = BaseSequence(len(Pep)) + b.sequences = Pep + + FilterAA = args.FilterAA.split(',') + + b.filter_aa(FilterAA) + + OutPep = b.sequences + + OutFasta = open(args.FastOut, 'w') + + for i,O in enumerate(OutPep): + + OutFasta.write(Index[i]+'\n') + OutFasta.write(O+'\n') + + + + +