Mercurial > repos > jay > pdaug_ml_models
changeset 1:5448f9425c6a draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 3c91f421d26c8f42cf2671e47db735d2cf69dde8"
author | jay |
---|---|
date | Tue, 29 Dec 2020 04:43:09 +0000 |
parents | 0973f093d98f |
children | 71e48a88d777 |
files | PDAUG_ML_Models/PDAUG_ML_Models.xml PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py PDAUG_Peptide_Data_Access/test-data/Out.tsv PDAUG_Peptide_Data_Access/test-data/out.tsv PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py readme.md readme.txt |
diffstat | 6 files changed, 591 insertions(+), 566 deletions(-) [+] |
line wrap: on
line diff
--- a/PDAUG_ML_Models/PDAUG_ML_Models.xml Wed Oct 28 02:31:40 2020 +0000 +++ b/PDAUG_ML_Models/PDAUG_ML_Models.xml Tue Dec 29 04:43:09 2020 +0000 @@ -16,8 +16,8 @@ #if $SelMLAlgo.MLAlgo == 'SVMC' #if $SelMLAlgo.settings.advanced == "advanced" - --cache_size $SelMLAlgo.settings.cache_size - --C $SelMLAlgo.settings.C + --cache_size '$SelMLAlgo.settings.cache_size' + --C '$SelMLAlgo.settings.C' --kernel '$SelMLAlgo.settings.kernel' --degree '$SelMLAlgo.settings.degree' --gamma '$SelMLAlgo.settings.gamma' @@ -35,172 +35,172 @@ #if $SelMLAlgo.MLAlgo == 'SGDC' #if $SelMLAlgo.settings.advanced == "advanced" - --loss $SelMLAlgo.settings.loss - --penalty $SelMLAlgo.settings.penalty - --alpha $SelMLAlgo.settings.alpha - --l1_ratio $SelMLAlgo.settings.l1_ratio - --fit_intercept $SelMLAlgo.settings.fit_intercept - --max_iter $SelMLAlgo.settings.max_iter - --tol $SelMLAlgo.settings.tol - --shuffle $SelMLAlgo.settings.shuffle - --verbose $SelMLAlgo.settings.verbose - --epsilon $SelMLAlgo.settings.epsilon - --n_jobs $SelMLAlgo.settings.n_jobs - --random_state $SelMLAlgo.settings.random_state - --learning_rate $SelMLAlgo.settings.learning_rate - --eta0 $SelMLAlgo.settings.eta0 - --power_t $SelMLAlgo.settings.power_t - --early_stopping $SelMLAlgo.settings.early_stopping - --validation_fraction $SelMLAlgo.settings.validation_fraction - --n_iter_no_change $SelMLAlgo.settings.n_iter_no_change - --warm_start $SelMLAlgo.settings.warm_start - --average $SelMLAlgo.settings.average + --loss '$SelMLAlgo.settings.loss' + --penalty '$SelMLAlgo.settings.penalty' + --alpha '$SelMLAlgo.settings.alpha' + --l1_ratio '$SelMLAlgo.settings.l1_ratio' + --fit_intercept '$SelMLAlgo.settings.fit_intercept' + --max_iter '$SelMLAlgo.settings.max_iter' + --tol '$SelMLAlgo.settings.tol' + --shuffle '$SelMLAlgo.settings.shuffle' + --verbose '$SelMLAlgo.settings.verbose' + --epsilon '$SelMLAlgo.settings.epsilon' + --n_jobs '$SelMLAlgo.settings.n_jobs' + --random_state '$SelMLAlgo.settings.random_state' + --learning_rate '$SelMLAlgo.settings.learning_rate' + --eta0 '$SelMLAlgo.settings.eta0' + --power_t '$SelMLAlgo.settings.power_t' + --early_stopping '$SelMLAlgo.settings.early_stopping' + --validation_fraction '$SelMLAlgo.settings.validation_fraction' + --n_iter_no_change '$SelMLAlgo.settings.n_iter_no_change' + --warm_start '$SelMLAlgo.settings.warm_start' + --average '$SelMLAlgo.settings.average' #end if #end if #if $SelMLAlgo.MLAlgo == 'DTC' #if $SelMLAlgo.settings.advanced == "advanced" - --criterion $SelMLAlgo.settings.criterion - --splitter $SelMLAlgo.settings.splitter - --max_depth $SelMLAlgo.settings.max_depth - --min_samples_split $SelMLAlgo.settings.min_samples_split - --min_samples_leaf $SelMLAlgo.settings.min_samples_leaf - --min_weight_fraction_leaf $SelMLAlgo.settings.min_weight_fraction_leaf - --max_features $SelMLAlgo.settings.max_features - --random_state $SelMLAlgo.settings.random_state - --max_leaf_nodes $SelMLAlgo.settings.max_leaf_nodes - --min_impurity_decrease $SelMLAlgo.settings.min_impurity_decrease - --min_impurity_split $SelMLAlgo.settings.min_impurity_split - --presort $SelMLAlgo.settings.presort - --ccpalpha $SelMLAlgo.settings.ccpalpha + --criterion '$SelMLAlgo.settings.criterion' + --splitter '$SelMLAlgo.settings.splitter' + --max_depth '$SelMLAlgo.settings.max_depth' + --min_samples_split '$SelMLAlgo.settings.min_samples_split' + --min_samples_leaf '$SelMLAlgo.settings.min_samples_leaf' + --min_weight_fraction_leaf '$SelMLAlgo.settings.min_weight_fraction_leaf' + --max_features '$SelMLAlgo.settings.max_features' + --random_state '$SelMLAlgo.settings.random_state' + --max_leaf_nodes '$SelMLAlgo.settings.max_leaf_nodes' + --min_impurity_decrease '$SelMLAlgo.settings.min_impurity_decrease' + --min_impurity_split '$SelMLAlgo.settings.min_impurity_split' + --presort '$SelMLAlgo.settings.presort' + --ccpalpha '$SelMLAlgo.settings.ccpalpha' #end if #end if #if $SelMLAlgo.MLAlgo == 'GBC' #if $SelMLAlgo.settings.advanced == "advanced" - --loss $SelMLAlgo.settings.loss - --learning_rate $SelMLAlgo.settings.learning_rate - --n_estimators $SelMLAlgo.settings.n_estimators - --subsample $SelMLAlgo.settings.subsample - --criterion $SelMLAlgo.settings.criterion - --min_samples_split $SelMLAlgo.settings.min_samples_split - --min_samples_leaf $SelMLAlgo.settings.min_samples_leaf - --min_weight_fraction_leaf $SelMLAlgo.settings.min_weight_fraction_leaf - --max_depth $SelMLAlgo.settings.max_depth - --min_impurity_decrease $SelMLAlgo.settings.min_impurity_decrease - --min_impurity_split $SelMLAlgo.settings.min_impurity_split - --init $SelMLAlgo.settings.init - --random_state $SelMLAlgo.settings.random_state - --max_features $SelMLAlgo.settings.max_features - --verbose $SelMLAlgo.settings.verbose - --max_leaf_nodes $SelMLAlgo.settings.max_leaf_nodes - --warm_start $SelMLAlgo.settings.warm_start - --presort $SelMLAlgo.settings.presort - --validation_fraction $SelMLAlgo.settings.validation_fraction - --n_iter_no_change $SelMLAlgo.settings.n_iter_no_change - --tol $SelMLAlgo.settings.tol - --ccpalpha $SelMLAlgo.settings.ccpalpha + --loss '$SelMLAlgo.settings.loss' + --learning_rate '$SelMLAlgo.settings.learning_rate' + --n_estimators '$SelMLAlgo.settings.n_estimators' + --subsample '$SelMLAlgo.settings.subsample' + --criterion '$SelMLAlgo.settings.criterion' + --min_samples_split '$SelMLAlgo.settings.min_samples_split + --min_samples_leaf '$SelMLAlgo.settings.min_samples_leaf'' + --min_weight_fraction_leaf '$SelMLAlgo.settings.min_weight_fraction_leaf' + --max_depth '$SelMLAlgo.settings.max_depth' + --min_impurity_decrease '$SelMLAlgo.settings.min_impurity_decrease' + --min_impurity_split '$SelMLAlgo.settings.min_impurity_split' + --init '$SelMLAlgo.settings.init' + --random_state '$SelMLAlgo.settings.random_state' + --max_features '$SelMLAlgo.settings.max_features' + --verbose '$SelMLAlgo.settings.verbose' + --max_leaf_nodes '$SelMLAlgo.settings.max_leaf_nodes' + --warm_start '$SelMLAlgo.settings.warm_start' + --presort '$SelMLAlgo.settings.presort' + --validation_fraction '$SelMLAlgo.settings.validation_fraction' + --n_iter_no_change '$SelMLAlgo.settings.n_iter_no_change' + --tol '$SelMLAlgo.settings.tol' + --ccpalpha '$SelMLAlgo.settings.ccpalpha' #end if #end if #if $SelMLAlgo.MLAlgo == 'RFC' #if $SelMLAlgo.settings.advanced == "advanced" - --n_estimators $SelMLAlgo.settings.n_estimators - --criterion $SelMLAlgo.settings.criterion - --max_depth $SelMLAlgo.settings.max_depth - --min_samples_split $SelMLAlgo.settings.min_samples_split - --min_samples_leaf $SelMLAlgo.settings.min_samples_leaf - --min_weight_fraction_leaf $SelMLAlgo.settings.min_weight_fraction_leaf - --max_features $SelMLAlgo.settings.max_features - --max_leaf_nodes $SelMLAlgo.settings.max_leaf_nodes - --min_impurity_decrease $SelMLAlgo.settings.min_impurity_decrease - --min_impurity_split $SelMLAlgo.settings.min_impurity_split - --bootstrap $SelMLAlgo.settings.bootstrap - --oob_score $SelMLAlgo.settings.oob_score - --n_jobs $SelMLAlgo.settings.n_jobs - --random_state $SelMLAlgo.settings.random_state - --verbose $SelMLAlgo.settings.verbose - --warm_start $SelMLAlgo.settings.warm_start - --ccp_alpha $SelMLAlgo.settings.ccp_alpha - --max_samples $SelMLAlgo.settings.max_samples + --n_estimators '$SelMLAlgo.settings.n_estimators' + --criterion '$SelMLAlgo.settings.criterion' + --max_depth '$SelMLAlgo.settings.max_depth' + --min_samples_split '$SelMLAlgo.settings.min_samples_split' + --min_samples_leaf '$SelMLAlgo.settings.min_samples_leaf' + --min_weight_fraction_leaf '$SelMLAlgo.settings.min_weight_fraction_leaf' + --max_features '$SelMLAlgo.settings.max_features' + --max_leaf_nodes '$SelMLAlgo.settings.max_leaf_nodes' + --min_impurity_decrease '$SelMLAlgo.settings.min_impurity_decrease' + --min_impurity_split '$SelMLAlgo.settings.min_impurity_split' + --bootstrap '$SelMLAlgo.settings.bootstrap' + --oob_score '$SelMLAlgo.settings.oob_score' + --n_jobs '$SelMLAlgo.settings.n_jobs' + --random_state '$SelMLAlgo.settings.random_state' + --verbose '$SelMLAlgo.settings.verbose' + --warm_start '$SelMLAlgo.settings.warm_start' + --ccp_alpha '$SelMLAlgo.settings.ccp_alpha' + --max_samples '$SelMLAlgo.settings.max_samples' #end if #end if #if $SelMLAlgo.MLAlgo == 'LRC' #if $SelMLAlgo.settings.advanced == "advanced" - --penalty $SelMLAlgo.settings.penalty - --dual $SelMLAlgo.settings.dual - --tol $SelMLAlgo.settings.tol - --C $SelMLAlgo.settings.C - --fit_intercept $SelMLAlgo.settings.fit_intercept - --intercept_scaling $SelMLAlgo.settings.intercept_scaling - --random_state $SelMLAlgo.settings.random_state - --solver $SelMLAlgo.settings.solver - --max_iter $SelMLAlgo.settings.max_iter - --multi_class $SelMLAlgo.settings.multi_class - --verbose $SelMLAlgo.settings.verbose - --warm_start $SelMLAlgo.settings.warm_start - --n_jobs $SelMLAlgo.settings.n_jobs - --l1_ratio $SelMLAlgo.settings.l1_ratio + --penalty '$SelMLAlgo.settings.penalty' + --dual '$SelMLAlgo.settings.dual' + --tol '$SelMLAlgo.settings.tol' + --C '$SelMLAlgo.settings.C' + --fit_intercept '$SelMLAlgo.settings.fit_intercept' + --intercept_scaling '$SelMLAlgo.settings.intercept_scaling' + --random_state '$SelMLAlgo.settings.random_state' + --solver '$SelMLAlgo.settings.solver' + --max_iter '$SelMLAlgo.settings.max_iter' + --multi_class '$SelMLAlgo.settings.multi_class' + --verbose '$SelMLAlgo.settings.verbose' + --warm_start '$SelMLAlgo.settings.warm_start' + --n_jobs '$SelMLAlgo.settings.n_jobs' + --l1_ratio '$SelMLAlgo.settings.l1_ratio' #end if #end if #if $SelMLAlgo.MLAlgo == 'KNC' #if $SelMLAlgo.settings.advanced == "advanced" - --n_neighbors $SelMLAlgo.settings.n_neighbors - --weights $SelMLAlgo.settings.weights - --algorithm $SelMLAlgo.settings.algorithm - --leaf_size $SelMLAlgo.settings.leaf_size - --p $SelMLAlgo.settings.p - --metric $SelMLAlgo.settings.metric - --n_jobs $SelMLAlgo.settings.n_jobs + --n_neighbors '$SelMLAlgo.settings.n_neighbors' + --weights '$SelMLAlgo.settings.weights' + --algorithm '$SelMLAlgo.settings.algorithm' + --leaf_size '$SelMLAlgo.settings.leaf_size' + --p '$SelMLAlgo.settings.p' + --metric '$SelMLAlgo.settings.metric' + --n_jobs '$SelMLAlgo.settings.n_jobs' #end if #end if #if $SelMLAlgo.MLAlgo == 'GNBC' #if $SelMLAlgo.settings.advanced == "advanced" - --var_smoothing $SelMLAlgo.settings.var_smoothing + --var_smoothing '$SelMLAlgo.settings.var_smoothing' #end if #end if #if $SelMLAlgo.MLAlgo == 'MLP' #if $SelMLAlgo.settings.advanced == "advanced" - --hidden_layer_sizes $SelMLAlgo.settings.hidden_layer_sizes - --activation $SelMLAlgo.settings.activation - --solver $SelMLAlgo.settings.solver - --alpha $SelMLAlgo.settings.alpha - --batch_size $SelMLAlgo.settings.batch_size - --learning_rate $SelMLAlgo.settings.learning_rate - --learning_rate_init $SelMLAlgo.settings.learning_rate_init - --power_t $SelMLAlgo.settings.power_t - --max_iter $SelMLAlgo.settings.max_iter - --shuffle $SelMLAlgo.settings.shuffle - --random_state $SelMLAlgo.settings.random_state - --tol $SelMLAlgo.settings.tol - --verbose $SelMLAlgo.settings.verbose - --warm_start $SelMLAlgo.settings.warm_start - --momentum $SelMLAlgo.settings.momentum - --nesterovs_momentum $SelMLAlgo.settings.nesterovs_momentum - --early_stopping $SelMLAlgo.settings.early_stopping - --validation_fraction $SelMLAlgo.settings.validation_fraction - --beta_1 $SelMLAlgo.settings.beta_1 - --beta_2 $SelMLAlgo.settings.beta_2 - --epsilon $SelMLAlgo.settings.epsilon - --n_iter_no_change $SelMLAlgo.settings.n_iter_no_change - --max_fun $SelMLAlgo.settings.max_fun - --TrainFile $SelMLAlgo.settings.TrainFile - --TestMethod $SelMLAlgo.settings.TestMethod - --SelectedSclaer $SelMLAlgo.settings.SelectedSclaer - --NFolds $SelMLAlgo.settings.NFolds - --Testspt $SelMLAlgo.settings.Testspt - --TestFile $SelMLAlgo.settings.TestFile - --OutFile $SelMLAlgo.settings.OutFile - --htmlOutDir $SelMLAlgo.settings.htmlOutDir - --htmlFname $SelMLAlgo.settings.htmlFname - --Workdirpath $SelMLAlgo.settings.Workdirpath + --hidden_layer_sizes '$SelMLAlgo.settings.hidden_layer_sizes' + --activation '$SelMLAlgo.settings.activation' + --solver '$SelMLAlgo.settings.solver' + --alpha '$SelMLAlgo.settings.alpha' + --batch_size '$SelMLAlgo.settings.batch_size' + --learning_rate '$SelMLAlgo.settings.learning_rate' + --learning_rate_init '$SelMLAlgo.settings.learning_rate_init' + --power_t '$SelMLAlgo.settings.power_t' + --max_iter '$SelMLAlgo.settings.max_iter' + --shuffle '$SelMLAlgo.settings.shuffle' + --random_state '$SelMLAlgo.settings.random_state' + --tol '$SelMLAlgo.settings.tol' + --verbose '$SelMLAlgo.settings.verbose' + --warm_start '$SelMLAlgo.settings.warm_start' + --momentum '$SelMLAlgo.settings.momentum' + --nesterovs_momentum '$SelMLAlgo.settings.nesterovs_momentum' + --early_stopping '$SelMLAlgo.settings.early_stopping' + --validation_fraction '$SelMLAlgo.settings.validation_fraction' + --beta_1 '$SelMLAlgo.settings.beta_1' + --beta_2 '$SelMLAlgo.settings.beta_2' + --epsilon '$SelMLAlgo.settings.epsilon' + --n_iter_no_change '$SelMLAlgo.settings.n_iter_no_change' + --max_fun '$SelMLAlgo.settings.max_fun' + --TrainFile '$SelMLAlgo.settings.TrainFile' + --TestMethod '$SelMLAlgo.settings.TestMethod' + --SelectedSclaer '$SelMLAlgo.settings.SelectedSclaer' + --NFolds '$SelMLAlgo.settings.NFolds' + --Testspt '$SelMLAlgo.settings.Testspt' + --TestFile '$SelMLAlgo.settings.TestFile' + --OutFile '$SelMLAlgo.settings.OutFile' + --htmlOutDir '$SelMLAlgo.settings.htmlOutDir' + --htmlFname '$SelMLAlgo.settings.htmlFname' + --Workdirpath '$SelMLAlgo.settings.Workdirpath' #end if #end if
--- a/PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py Wed Oct 28 02:31:40 2020 +0000 +++ b/PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py Tue Dec 29 04:43:09 2020 +0000 @@ -10,13 +10,13 @@ def DataGen(DataBaseType, OutFile, IDs): - if DataBaseType == 'AMPvsTM': + if DataBaseType == 'AMPvsTMP': data = load_AMPvsTM() elif DataBaseType == 'AMPvsUniProt': data = load_AMPvsUniProt() - elif DataBaseType == 'ACPvsTM': + elif DataBaseType == 'ACPvsTMP': data = load_ACPvsTM() elif DataBaseType == 'ACPvsRandom': @@ -39,16 +39,11 @@ print ("Enter Correct Values") exit() - Target = data.target.tolist() - Target_list = set(Target) - df = data.sequences - - - Target = pd.DataFrame(Target, columns=['Target']) - df = pd.DataFrame(df, columns=['Peptide']) - - df = pd.DataFrame(df) - df = pd.concat([df, Target], axis=1) + peptide_data = data.sequences + class_label = int(len(peptide_data)/2)*[data.target_names[0]]+int(len(peptide_data)/2)*[data.target_names[1]] + peptide_data = pd.DataFrame(peptide_data, columns=['name']) + class_label = pd.DataFrame(class_label, columns=['class_label']) + df = pd.concat([peptide_data,class_label], axis=1) df.to_csv(OutFile, index=False, sep='\t') @@ -69,9 +64,9 @@ help="Out put file name for str descriptors") parser.add_argument("-L", "--List", - required=False, - default=None, - help="List of integer as ID") + required=False, + default=None, + help="List of integer as ID") args = parser.parse_args() DataGen(args.DataBaseType, args.OutFile, args.List)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PDAUG_Peptide_Data_Access/test-data/Out.tsv Tue Dec 29 04:43:09 2020 +0000 @@ -0,0 +1,413 @@ +name class_label +AAGAATVLLVIVLLAGSYLAVLA TM +LWIVIACLACVGSAAALTLRA TM +FYRFYMLREGTAVPAVWFSIELIFGLFA TM +GTLELGVDYGRAN TM +KLFWRAVVAEFLATTLFVFISIGSALGFK TM +HGSIGAGVDW TM +ATIYLVCFCFFKQLAMIFMSVLAGNMYE TM +GKLSLAATRSSE TM +TSRLLLAGVALGIICSALMTWAIYF TM +LGFFLVTFGFIWGMMLL TM +YMLFTMIFVISSIIITVVVI TM +ACFSAKVN TM +LIWVAATLAGAIIAVLLVIYA TM +HIPFAFAFAILAYLTLVLFRPVM TM +CGLLVLLTLLLMGAIVTLGVF TM +TGVYILVGVVLWTA TM +LSMFIITPVMVLGTIFIFVMG TM +MTLVALLVIGVSLTGYLGLKA TM +QLYYQVLNFGMIVSSALMIW TM +ALGALCLLLSVGSATACLLLGA TM +LAVIFFFASALLYSQAAT TM +MGHQLLVQLESIAITIVWSGVVAFIGYKLADLT TM +ADILILSLLVIQCALGLLTIPFSA TM +RHQQATFAGFIKGATWVSILSIAVLVFLALAN TM +GTKWWTVGIRPMYKW TM +PAIRVFATYAKWDEK TM +PHVFLLFITFPILFIGWGSQS TM +AGLVLWGAIIFVGWNALLLLFFW TM +TQIVLLGLVTAALWAGLLTLLLLWHW TM +SFIGRVFLFLMIVLPLWCGLHRMHHAMHD TM +MYLGAGIALIPVIMSINYL TM +FQTGFDFSD TM +LISGTLGIICLSLMATLGILL TM +FPFLNNLSFWFTVVGVILVNVSLG TM +LDIYTRLGGMVWR TM +FLIFLLVIMTVITVALLTLLF TM +KDIGILYLFTAGIVGLISVCFTVYMRMELQH TM +PLFYIINILVPCVLISFMINLVFYL TM +HKALKTLGIIMGVFTLCWLPFFLVNIVNVF TM +ANLKKLKTLMSAFLIVLGLLTFGA TM +IWVGIFLLAALLAALFVCLKA TM +KGAAGITGAGFITLAATLS TM +VVLLAIVTLISVVQNGFFAHKV TM +IAIDVLAVLVPLAAFLLDGSPDWSLYCAVWLLKPLRDS TM +ALTVGTLLFLTGIGAASWAIV TM +ITLIIFGVMAGVIGTILLISYGI TM +WVWISLYYVAFYVVMSGIFAL TM +GSNVALHVN TM +GVLELGVDYGRAN TM +IYLLILIILSMLCLVYASVPL TM +LCSFLFVIVLFVATFYTL TM +PLYFIVNVVIPCLLFSFLTGLVFYL TM +ALLIAGGVGLLALAAALVLNA TM +IWTWLRTTL TM +PAHMIAISFFFTNALALALHGALVLSAA TM +GGSLYIVGIFLPIWTFMIYIF TM +GLTLGTGGR TM +STVTGGYAQS TM +DCFLLLVLLLYAPVGFCLLVL TM +FWRAVVAEFLAMILFIFISIGSALG TM +DPVLYQHILWFFGHPEVYIIILPGFGIISHVISTFAK TM +LTFTEKWN TM +IRDTLMRLVLTVRSN TM +GKLSLAATRSTE TM +GYNKFVVQYATDA TM +QFHTMLMIAASGAVLIALGILCLVIQMYVSIR TM +LILVLFVVLVSSVGVSLTLYA TM +MASLWADYT TM +DAWSGFVRGYGYDNRTN TM +RWLWDFVNA TM +MFVALLGLGLGQVVCSVALFF TM +NMSTYVDYII TM +IWVKMTFIVVLGLCFVFFWSF TM +SLLISVVLVAYYLYIPLPDAI TM +RKTTINGLIVAVILGVCFTGLQAYEYSHA TM +IPVQLLWVNLVTDGLPATALG TM +LAVIAGGVGFIMVAVLLVLFA TM +MNKRNIMNLILAGGAGLPITTLALGYGAFF TM +IALIVAGLSALGIATALVLNA TM +NGNMWRILDHGAISL TM +VAIICAAVVAVGLIVGLSVGL TM +FSFLIVAGATTLFCLLHFGVI TM +VGISIATIVAIIAAIYYVPW TM +PLFAWSVFITAWLILLSLPVLAGAITMLLMDRNF TM +LILIGAGLGVLALAAGLILTA TM +LGLAAGAIYYYNTSNVFA TM +QLGAGAFGGYQV TM +IAIALLVILVVCSLITMSVIL TM +LAFLIGGIIGGLLLLIGVSCCLW TM +NGHMLRILDHGAISM TM +MALILGIDRFMSECRALTNF TM +AVIAGTTLAITALAVTS TM +LYIVLAILCGVSIAVALALTA TM +AAVTLGVLCLGLLVTVILLILQL TM +LWLVIGVLTAAALAVTLIALA TM +GTLLLLTAIGAASWAIVAVLL TM +LLLVASVIQGLGLLLCFTYICLHFSAL TM +AMIVALIVICITAVVAALV TM +LVFIGTCGAVLAVALGLVLWA TM +TRFGIAAKYQ TM +IPWAVLIVVLITSLIIALIAL TM +LWVVCAVLAGLGLTTALVLYA TM +FAGRVLAGAVMMSGIGIFGLWAGIL TM +LLIVLAGLAVVAVASGLILNA TM +VGSIVGGIYLGFCFNAGAPAVEAFI TM +MLSLGVSYR TM +LTKWFFCCVCTILTMPFF TM +LATIAASAIVLVVAVGLGLMA TM +PIVVTGAVY TM +KPHNLPMVFTGTAILYIGWFGFNAGSA TM +DEFGEQLSKVISLICVAVWL TM +HTLLTGVDF TM +SVELIALLAISCTFFLFMHT TM +LLIALLIYWTLAFITKTIKF TM +FKLVIFVVLGIAIASGLMLYA TM +CTLSISVLLAQTIFLFLIA TM +FVIAGGTLAIPILAFVASFLL TM +LLVITAIVLILSAAVGLVMYA TM +LALATALIGGVAAIASLLLYA TM +NPVIVIINLITLAAALLHTKTWFEL TM +FIVVAGVVILAVTIALLVYFL TM +IGLMCFLSIIITTVCIIMIAT TM +FSVDTQLQS TM +EVYILLNWIGYVNSGFNPLIYCRS TM +LTLAVALIVGVSAIASLLLYA TM +LYLAVVVLIGIGLTTTLVLYA TM +MSTAISVLLAQAVFLLLT TM +ALPGLMNKMEKAGCKRSVV TM +KQFIRYLISSNVGEVVCIFL TM +KYVVSSLVLVYGLIKVLTWIF TM +TGVSPVFAGGVEYA TM +IVIVLGILCFLLLLTVAVLVI TM +KSLGILGILLGVAALCTIVALSVV TM +LCLFVVTPVMVVGTAWIFL TM +LYLAIVVLIGVALTATLMLYA TM +LTILLAIAPVLALAVGLALYG TM +FFVLLLMILILVNLAMTIWIL TM +FIVLIPSVVITVIFLFFWLFM TM +ILVLLILAVITIFALVCVLLV TM +TYFIVLIPSVVITVIFLFFWLFM TM +FLVLFIFLTSFFLNYSHTMVA TM +GDKIGMFFQAMATFFGGFIIGF TM +SNGVIVGTCLAFVAGMIGMAYAA TM +FTFEGAARSDD TM +MNYMVYFNFFACVLVPLLLMLGVYL TM +AGLILLVVTLIGMSVLVRVLI TM +VFLAVYLLGGITFLPLVLFTL TM +LIKISALVFVTVAFFYLG TM +GFFGVATFFFAALGIILIAWSAVL TM +GQWEISVIWGLGVAMAIYLTA TM +LWWIQAMTGFAMFFLGSVHLYIMMT TM +IAVVITVVFLTLLSVVILIFF TM +QVVATATFR TM +MLLCFAFLWVLGIAYYMY TM +IFCIIMLFALLGFL TM +PLCICVAFTCLALVLVTSIVL TM +LIIVLAIVVGVGAAVGLALSA TM +ILVPCVLGLLLLPILAMLMALCV TM +LGLLLAALICVGIATTLVLNA TM +GYAAYYLVRKNFALAMPYLVE TM +LPRTLAVLLVGAALAISGAVMQALF TM +ILLFYVIFYGCLAGIFIGTIQ TM +IMSTLLEVGYDNVKSQ TM +LLAVALIIAMSISLAWQAAGW TM +IVGQLLFVALGITFIYYLFTP TM +NFWMFGLFFFFYFFIMGAYFPFFPIWL TM +LVLIVGIVAAVGVAAALVLNA TM +LTLAVALIGGVAAITSLLLYA TM +ASGGIILIIAAILAMIMAN TM +GSAGGAALAVVVLALAFGLSG TM +LFVLLLLAILVVNLALTIWIL TM +SPPLVLAALVACIIVLGFNYWIA TM +LLFLILGIISFITFFLQGFTF TM +LIVKALGILCFLLLITVAVLAV TM +QYIHVAFQGSFACITVGLIVGAL TM +QGIAVFGYSMAVSIGGILASR TM +DHKRLGIMYIIVAIVMLLRGFADAIMMR TM +IFRLHLVLGMTLFLLF TM +LVSAIILTSFMTGLFILSLWK TM +AVVGGVIAAVFITLITVVVLI TM +INLGCDVDFD TM +ALSALCLLLSVGSAAACLLLGA TM +FTVIAGAVIVLLLTLNSNS TM +LLFVSLLFCLIAQTCWLALV TM +MVLVALLVIGVSLAGYLGLKA TM +LVIPHILRLC TM +LIRVLLGFVILFITYILFPSI TM +PLFYIINILAPCVLIALMANLVFYL TM +HKLGLGLEFQA TM +ILFVAVSFIALGCVSAFVLFE TM +AVVSAQIAITASPIS TM +LTIIGGALFVLAVAAGLVLNA TM +GCCGLLALALCSLALSLLA TM +LIVLLAIVTIIAIALVAILP TM +TEISAGWG TM +FWRYFAGNLASGGAAGATSLCFVYPLDFARTRLAA TM +IVAALGIIGLWMFFSSNELSIAT TM +MTVILFVLLGISIASALVLYA TM +SNATIAVACLSFFVCMIGAAYAS TM +LKLTFDSSFS TM +LNVLLSAAINFFLIAFAVYFLV TM +HSAMLWTIGFIVTFSVGGMTGVLLAVPGADFV TM +AVLSAKGQY TM +LWELVIEQFEDLLVRILLLAA TM +FFIVMGLVDAIPMIAVGLGLY TM +LAVIAGGMGFIATAVLLVLFA TM +ACYCRIPACLAGERRYGTCFYMGRVWAFCC AMP +AGRGKQGGKVRAKAKTRSSRAGLQFPVGRVHRLLRKGNY AMP +ALFSILRGLKKLGNMGQAFVNCKIYKKC AMP +ALSILKGLEKLAKMGIALTNCKATKKC AMP +ALWKDILKNVGKAAGKAVLNTVTDMVNQ AMP +ALWKNMLKGIGKLAGQAALGAVKTLVGAES AMP +ALWKTLLKNVGKAAGKAALNAVTDMVNQ AMP +ALWKTMLKKLGTMALHAGKAAFGAAADTISQ AMP +ALWKTMLKKLGTMALHAGKAALGAAADTISQGTQ AMP +APGNKAECEREKGYCGFLKCSFPFVVSGKCSRFFFCCKNIW AMP +ASIIKTTIKVSKAVCKTLTCICTGSCSNCK AMP +ATCDLLSGTGIKHSACAAHCLLRGNRGGYCNGRAICVCRN AMP +ATTGCSCPQCIIFDPICASSYKNGRRGFSSGCHMRCYNRCHGTDYFQISKGSKCI AMP +AYPGNGVHCGKYSCTVDKQTAIGNIGNNAA AMP +CANSCSYGPLTWSCDGNTK AMP +CRQSCSFGPLTFVCDGNTK AMP +DDTPSSRCGSGGWGPCLPIVDLLCIVHVTVGCSGGFGCCRIG AMP +DFASCHTNGGICLPNRCPGHMIQIGICFRPRVKCCRSW AMP +DFKDWMKTAGEWLKKKGPGILKAAMAAAT AMP +DGVKLCDVPSGTWSGHCGSSSKCSQQCKDREHFAYGGACHYQFPSVKCFCKRQC AMP +DKLIGSCVWGAVNYTSDCNGECKRRGYKGGHCGSFANVNCWCET AMP +ELCEKASKTWSGNCGNTGHCDNQCKSWEGAAHGACHVRNGKHMCFCYFNC AMP +FCKSLPLPLSVK AMP +FFGSLLSLGSKLLPSVFKLFQRKKE AMP +FFGSVLKLIPKIL AMP +FFGWLIKGAIHAGKAIHGLIHRRRH AMP +FFGWLIRGAIHAGKAIHGLIHRRRH AMP +FFPIVAGVAGQVLKKIYCTISKKC AMP +FGLPMLSILPKALCILLKRKC AMP +FIGLLISAGKAIHDLIRRRH AMP +FIGPIISALASLFG AMP +FKLGSFLKKAWKSKLAKKLRAKGKEMLKDYAKGLLEGGSEEVPGQ AMP +FLGGLMKAFPAIICAVTKKC AMP +FLGGLMKAFPALICAVTKKC AMP +FLNALKNFAKTAGKRLKSLLN AMP +FLPAIAGMAAKFLPKIFCAISKKC AMP +FLPAIAGVAAKFLPKIFCAISKKC AMP +FLPAIVGAAAKFLPKIFCVISKKC AMP +FLPAIVGAAGKFLPKIFCAISKKC AMP +FLPAIVGAAGQFLPKIFCAISKKC AMP +FLPAVLRVAAKIVPTVFCAISKKC AMP +FLPAVLRVAAKVVPTVFCLISKKC AMP +FLPAVLRVAAQVVPTVFCAISKKC AMP +FLPFIAGMAAKFLPKIFCAISKKC AMP +FLPFIAGMAANFLPKIFCAISKKC AMP +FLPFIAGVAAKFLPKIFCAISKKC AMP +FLPFLATLLSKVL AMP +FLPGLLAGLL AMP +FLPIASLLGKYL AMP +FLPIIAGVAAKVFPKIFCAISKKC AMP +FLPIIASVAAKVFPKIFCAISKKC AMP +FLPIIASVAAKVFSKIFCAISKKC AMP +FLPIIASVAANVFSKIFCAISKKC AMP +FLPILASLAAKFGPKLFCLVTKKC AMP +FLPILASLAAKLGPKLFCLVTKKC AMP +FLPILASLAATLGPKLLCLITKKC AMP +FLPLFASLIGKLL AMP +FLPLIGKVLSGIL AMP +FLPLIGRVLSGIL AMP +FLPLLAGLAANFFPKIFCKITRKC AMP +FLPLLAGLAANFLPKIFCKITRKC AMP +FLPLLAGLAANFLPTIICKISYKC AMP +FLPMLAGLAASMVPKFVCLITKKC AMP +FLPVVAGLAAKVLPSIICAVTKKC AMP +FMGGLIKAATKIVPAAYCAITKKC AMP +FSFKRLKGFAKKLWNSKLARKIRTKGLKYVKNFAKDMLSEGEEAPPAAEPPVEAPQ AMP +FVPYNPPRPGQSKPFPSFPGHGPFNPKIQWPYPLPNPGH AMP +GAIKDALKGAAKTVAVELLKKAQCKLEKTC AMP +GFFSLIKGVAKIATKGLAKNLGKMGLDLVGCKISKEC AMP +GFFSTVKNLATNVAGTVIDTLKCKVTGGCRS AMP +GFGALFKFLAKKVAKTVAKQAAKQGAKYVVNKQME AMP +GFISTVKNLATNVAGTVIDTIKCKVTGGC AMP +GFKGAFKNVMFGIAKSAGKSALNALACKIDKSC AMP +GFLDSFKNAMIGVAKSVGKTALSTLACKIDKSC AMP +GFLSILKKVLPKVMAHMK AMP +GFLSTVKNLATNVAGTVIDTLKCKVTGGCRS AMP +GFMKYIGPLIPHAVKAISDLI AMP +GFSSIFRGVAKFASKGLGKDLARLGVNLVACKISKQC AMP +GFVDLAKKVVGGIRNALGI AMP +GIFPKIIGKGIKTGIVNGIKSLVKGVGMKVFKAGLNNIGNTGCNEDEC AMP +GIFPKIIGKGIKTGIVNGIKSLVKGVGMKVFKAGLSNIGNTGCNEDEC AMP +GIFSKFGGKAIKNLFIKGAKNIGKEVGMDVIRTGIDVAGCKIKGEC AMP +GIFSKLAGKKLKNLLISGLKNVGKEVGMDVVRTGIDIAGCKIKGEC AMP +GIFSKLGRKKIKNLLISGLKNVGKEVGMDVVRTGIDIAGCKIKGEC AMP +GIFSLIKGAAQLIGKTVAKEAGKTGLELMACKVTKQC AMP +GIFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC AMP +GILDAIKAIAKAAG AMP +GILDFAKTVVGGIRNALGI AMP +GILSLFTGGIKALGKTLFKMAGKAGAEHLACKATNQC AMP +GILSSIKGVAKGVAKNVAAQLLDTLKCKITGC AMP +GIMDSVKGLAKNLAGKLLDSLKCKITGC AMP +GIMDTIKDTAKTVAVGLLNKLKCKITGC AMP +GINTLKKVIQGLHEVIKLVSNHA AMP +GINTLKKVIQGLHEVIKLVSNHE AMP +GIPCGESCVWIPCISAALGCSCKNKVCYRN AMP +GKLQAFLAKMKEIAAQTL AMP +GKVWDWIKSAAKKIWSSEPVSQLKGQVLNAAKNYVAEKIGATPT AMP +GLADFLNKAVGKVVDFVKS AMP +GLFDVVKGVLKGVGKNVAGSLLEQLKCKLSGGC AMP +GLFKVLGSVAKHLLPHVAPIIAEKL AMP +GLFLDTLKGAAKDVAGKLLEGLKCKIAGCKP AMP +GLFLDTLKGLAGKLLQGLKCIKAGCKP AMP +GLFSILRGAAKFASKGLGKDLTKLGVDLVACKISKQC AMP +GLFSKFNKKKIKSGLFKIIKTAGKEAGLEALRTGIDVIGCKIKGEC AMP +GLFSKFNKKKIKSGLIKIIKTAGKEAGLEALRTGIDVIGCKIKGEC AMP +GLFSVLGSVAKHLLPHVAPIIAEKL AMP +GLFSVLGSVAKHLLPHVVPVIAEKL AMP +GLFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC AMP +GLLDFVTGVGKDIFAQLIKQI AMP +GLLDSIKGMAISAGKGALQNLLKVASCKLDKTC AMP +GLLDSLKNLAINAAKGAGQSVLNTLSCKLSKTC AMP +GLLDTIKGVAKTVAASMLDKLKCKISGC AMP +GLLGGLLGPLLGGGGGGGGGLL AMP +GLLGPLLKIAAKVGSNLL AMP +GLLGSIFGAGKKIACALSGLC AMP +GLLGSLFGAGKKVACALSGLC AMP +GLLKRIKTLL AMP +GLLSGLKKVGKHVAKNVAVSLMDSLKCKISGDC AMP +GLLSKVLGVGKKVLCGVSGLC AMP +GLLSVLGSVAKHVLPHVVPVIAEHL AMP +GLMSSIGKALGGLIVDVLKPKTPAS AMP +GLNALKKVFQGIHEAIKLINNHVQ AMP +GLNTLKKVFQGLHEAIKLINNHVQ AMP +GLWNKIKEAASKAAGKAALGFVNEMV AMP +GLWSKIKAAGKEAAKAAAKAAGKAALNAVSEAV AMP +GLWSKIKEAAKTAGLMAMGFVNDMV AMP +GLWSTIKQKGKEAAIAAAKAAGQAALGAL AMP +GPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW AMP +GRLQAFLAKMKEIAAQTL AMP +GRPNPVNNKPTPHPRL AMP +GRPNPVNTKPTPYPRL AMP +GSGRGSCRSQCMRRHEDEPWRVQECVSQCRRRRGGGD AMP +GSKKPVPIIYCNRRTGKCQRM AMP +GVLDILKNAAKNILAHAAEQI AMP +GVVDILKGAGKDLLAHLVGKISEKV AMP +GWKDWAKKAGGWLKKKGPGMAKAALKAAMQ AMP +GWKDWLKKGKEWLKAKGPGIVKAALQAATQ AMP +GWKDWLNKGKEWLKKKGPGIMKAALKAATQ AMP +HGVSGHGQHGVHG AMP +IFGAILPLALGALKNLIK AMP +IIEKLVNTALGLLSGL AMP +IIGHLIKTALGMLGL AMP +ILGTILGLLKGL AMP +ILGTILGLLKSL AMP +ILPLVGNLLNDLL AMP +ILQKAVLDCLKAAGSSLSKAAITAIYNKIT AMP +INWKKIAEIGKQVLSAL AMP +INWKKIAEVGGKILSSL AMP +INWLKLGKAIIDAL AMP +IPRPLDPCIAQNGRCFTGICRYPYFWIGTCRNGKSCCRRR AMP +IWLTALKFLGKHAAKHLAKQQLSKL AMP +KDRPKKPGLCPPRPQKPCVKECKNDDSCPGQQKCCNYGCKDECRDPIFVG AMP +KTCEHLADTYRGVCFTNASCDDHCKNKAHLISGTCHNWKCFCTQNC AMP +KTCENLADTY AMP +KTCENLSGTFKGPCIPDGNCNKHCRNNEHLLSGRCRDDFRCWCTNRC AMP +KTCMTKKEGWGRCLIDTTCAHSCRKYGYMGGKCQGITRRCYCLLNC AMP +KWCFRVCYRGICYRKCR AMP +KWCFRVCYRGICYRRCR AMP +KYYGNGVSCNKKGCSVDWGKAIGIIGNNSAANLATGGAAGWSK AMP +LCNERPSQTWSGNCGNTAHCDKQCQDWEKASHGACHKRENHWKCFCYFNC AMP +LFCRKGTCHFGGCPAHLVKVGSCFGFRACCKWPWDV AMP +LLGRCKVKSNRFHGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC AMP +LLGRCKVKSNRFNGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC AMP +LLKELWTKIKGAGKAVLGKIKGLL AMP +LLKELWTKMKGAGKAVLGKIKGLL AMP +LLPILGNLLNGLL AMP +LLPNLLKSLL AMP +LMCTHPLDCSN AMP +LNLKGIFKKVASLLT AMP +LPVNEAQCRQVGGYCGLRICNFPSRFLGLCTRNHPCCSRVWV AMP +MSWLNFLKYIAKYGKKAVSAAWKYKGKVLEWLNVGPTLEWVWQKLKKIAGL AMP +QDKCKKVYENYPVSKCQLANQCNYDCKLDKHARSGECFYDEKRNLQCICDYCEY AMP +QGVRNHVTCRIYGGFCVPIRCPGRTRQIGTCFGRPVKCCRRW AMP +QKLCERPSGTWSGVCGNNNACKNQCINLEKARHGSCNYVFPAHKCICYFPC AMP +QQCGRQASGRLCGNRLCCSQWGYCGSTASYCGAGCQSQCRS AMP +QRFIHPTYRPPPQPRRPVIMRA AMP +RQRVEELSKFSKKGAAARRRK AMP +RSGRGECRRQCLRRHEGQPWETQECMRRCRRRG AMP +RSVCRQIKICRRRGGCYYKCTNRPY AMP +SAPRGCWTKSYPPKPCK AMP +SCTTCVCTCSCCTT AMP +SFGLCRLRRGFCARGRCRFPSIPIGRCSRFVQCCRRVW AMP +SGISGPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW AMP +SIVPIRCRSNRDCRRFCGFRGGRCTYARQCLCGY AMP +SKGKKANKDVELARG AMP +SMLSVLKNLGKVGLGFVACKINKQC AMP +TSYGNGVHCNKSKCWIDVSELETYKAGTVSNPKDILW AMP +VDKGSYLPRPTPPRPIYNRN AMP +VDKPDYRPRPRPPNM AMP +VDKPDYRPRPWPRNMI AMP +VDKPDYRPRPWPRPN AMP +VDKPDYRPRPWPRPNM AMP +VLPIIGNLLNSLL AMP +VLPLISMALGKLL AMP +VNPIILGVLPKFVCLITKKC AMP +VRNFVTCRINRGFCVPIRCPGHRRQIGTCLGPQIKCCR AMP +VRNHVTCRINRGFCVPIRCPGRTRQIGTCFGPRIKCCRSW AMP +VTCDLLSFEAKGFAANHSLCAAHCLAIGRRGGSCERGVCICRR AMP +VTCDLLSIKGVAEHSACAANCLSMGKAGGRCENGICLCRKTTFKELWDKRF AMP +VTCFCKRPVCDSGETQIGYCRLGNTFYRLCCRQ AMP +WLGSALKIGAKLLPSVVGLFKKKKQ AMP +WNPFKELERAGQRVRDAIISAGPAVATVGQAAAIARG AMP +WNPFKELERAGQRVRDAVISAAPAVATVGQAAAIARG AMP +YDLSKNCRLRGGICYIGKCPRRFFRSGSCSRGNVCCLRFG AMP +YSKSLPLSVLNP AMP +YVSCLFRGARCRVYSGRSCCFGYYCRRDFPGSIFGTCSRRNF AMP
--- a/PDAUG_Peptide_Data_Access/test-data/out.tsv Wed Oct 28 02:31:40 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,413 +0,0 @@ -Peptide Target -AAGAATVLLVIVLLAGSYLAVLA 0 -LWIVIACLACVGSAAALTLRA 0 -FYRFYMLREGTAVPAVWFSIELIFGLFA 0 -GTLELGVDYGRAN 0 -KLFWRAVVAEFLATTLFVFISIGSALGFK 0 -HGSIGAGVDW 0 -ATIYLVCFCFFKQLAMIFMSVLAGNMYE 0 -GKLSLAATRSSE 0 -TSRLLLAGVALGIICSALMTWAIYF 0 -LGFFLVTFGFIWGMMLL 0 -YMLFTMIFVISSIIITVVVI 0 -ACFSAKVN 0 -LIWVAATLAGAIIAVLLVIYA 0 -HIPFAFAFAILAYLTLVLFRPVM 0 -CGLLVLLTLLLMGAIVTLGVF 0 -TGVYILVGVVLWTA 0 -LSMFIITPVMVLGTIFIFVMG 0 -MTLVALLVIGVSLTGYLGLKA 0 -QLYYQVLNFGMIVSSALMIW 0 -ALGALCLLLSVGSATACLLLGA 0 -LAVIFFFASALLYSQAAT 0 -MGHQLLVQLESIAITIVWSGVVAFIGYKLADLT 0 -ADILILSLLVIQCALGLLTIPFSA 0 -RHQQATFAGFIKGATWVSILSIAVLVFLALAN 0 -GTKWWTVGIRPMYKW 0 -PAIRVFATYAKWDEK 0 -PHVFLLFITFPILFIGWGSQS 0 -AGLVLWGAIIFVGWNALLLLFFW 0 -TQIVLLGLVTAALWAGLLTLLLLWHW 0 -SFIGRVFLFLMIVLPLWCGLHRMHHAMHD 0 -MYLGAGIALIPVIMSINYL 0 -FQTGFDFSD 0 -LISGTLGIICLSLMATLGILL 0 -FPFLNNLSFWFTVVGVILVNVSLG 0 -LDIYTRLGGMVWR 0 -FLIFLLVIMTVITVALLTLLF 0 -KDIGILYLFTAGIVGLISVCFTVYMRMELQH 0 -PLFYIINILVPCVLISFMINLVFYL 0 -HKALKTLGIIMGVFTLCWLPFFLVNIVNVF 0 -ANLKKLKTLMSAFLIVLGLLTFGA 0 -IWVGIFLLAALLAALFVCLKA 0 -KGAAGITGAGFITLAATLS 0 -VVLLAIVTLISVVQNGFFAHKV 0 -IAIDVLAVLVPLAAFLLDGSPDWSLYCAVWLLKPLRDS 0 -ALTVGTLLFLTGIGAASWAIV 0 -ITLIIFGVMAGVIGTILLISYGI 0 -WVWISLYYVAFYVVMSGIFAL 0 -GSNVALHVN 0 -GVLELGVDYGRAN 0 -IYLLILIILSMLCLVYASVPL 0 -LCSFLFVIVLFVATFYTL 0 -PLYFIVNVVIPCLLFSFLTGLVFYL 0 -ALLIAGGVGLLALAAALVLNA 0 -IWTWLRTTL 0 -PAHMIAISFFFTNALALALHGALVLSAA 0 -GGSLYIVGIFLPIWTFMIYIF 0 -GLTLGTGGR 0 -STVTGGYAQS 0 -DCFLLLVLLLYAPVGFCLLVL 0 -FWRAVVAEFLAMILFIFISIGSALG 0 -DPVLYQHILWFFGHPEVYIIILPGFGIISHVISTFAK 0 -LTFTEKWN 0 -IRDTLMRLVLTVRSN 0 -GKLSLAATRSTE 0 -GYNKFVVQYATDA 0 -QFHTMLMIAASGAVLIALGILCLVIQMYVSIR 0 -LILVLFVVLVSSVGVSLTLYA 0 -MASLWADYT 0 -DAWSGFVRGYGYDNRTN 0 -RWLWDFVNA 0 -MFVALLGLGLGQVVCSVALFF 0 -NMSTYVDYII 0 -IWVKMTFIVVLGLCFVFFWSF 0 -SLLISVVLVAYYLYIPLPDAI 0 -RKTTINGLIVAVILGVCFTGLQAYEYSHA 0 -IPVQLLWVNLVTDGLPATALG 0 -LAVIAGGVGFIMVAVLLVLFA 0 -MNKRNIMNLILAGGAGLPITTLALGYGAFF 0 -IALIVAGLSALGIATALVLNA 0 -NGNMWRILDHGAISL 0 -VAIICAAVVAVGLIVGLSVGL 0 -FSFLIVAGATTLFCLLHFGVI 0 -VGISIATIVAIIAAIYYVPW 0 -PLFAWSVFITAWLILLSLPVLAGAITMLLMDRNF 0 -LILIGAGLGVLALAAGLILTA 0 -LGLAAGAIYYYNTSNVFA 0 -QLGAGAFGGYQV 0 -IAIALLVILVVCSLITMSVIL 0 -LAFLIGGIIGGLLLLIGVSCCLW 0 -NGHMLRILDHGAISM 0 -MALILGIDRFMSECRALTNF 0 -AVIAGTTLAITALAVTS 0 -LYIVLAILCGVSIAVALALTA 0 -AAVTLGVLCLGLLVTVILLILQL 0 -LWLVIGVLTAAALAVTLIALA 0 -GTLLLLTAIGAASWAIVAVLL 0 -LLLVASVIQGLGLLLCFTYICLHFSAL 0 -AMIVALIVICITAVVAALV 0 -LVFIGTCGAVLAVALGLVLWA 0 -TRFGIAAKYQ 0 -IPWAVLIVVLITSLIIALIAL 0 -LWVVCAVLAGLGLTTALVLYA 0 -FAGRVLAGAVMMSGIGIFGLWAGIL 0 -LLIVLAGLAVVAVASGLILNA 0 -VGSIVGGIYLGFCFNAGAPAVEAFI 0 -MLSLGVSYR 0 -LTKWFFCCVCTILTMPFF 0 -LATIAASAIVLVVAVGLGLMA 0 -PIVVTGAVY 0 -KPHNLPMVFTGTAILYIGWFGFNAGSA 0 -DEFGEQLSKVISLICVAVWL 0 -HTLLTGVDF 0 -SVELIALLAISCTFFLFMHT 0 -LLIALLIYWTLAFITKTIKF 0 -FKLVIFVVLGIAIASGLMLYA 0 -CTLSISVLLAQTIFLFLIA 0 -FVIAGGTLAIPILAFVASFLL 0 -LLVITAIVLILSAAVGLVMYA 0 -LALATALIGGVAAIASLLLYA 0 -NPVIVIINLITLAAALLHTKTWFEL 0 -FIVVAGVVILAVTIALLVYFL 0 -IGLMCFLSIIITTVCIIMIAT 0 -FSVDTQLQS 0 -EVYILLNWIGYVNSGFNPLIYCRS 0 -LTLAVALIVGVSAIASLLLYA 0 -LYLAVVVLIGIGLTTTLVLYA 0 -MSTAISVLLAQAVFLLLT 0 -ALPGLMNKMEKAGCKRSVV 0 -KQFIRYLISSNVGEVVCIFL 0 -KYVVSSLVLVYGLIKVLTWIF 0 -TGVSPVFAGGVEYA 0 -IVIVLGILCFLLLLTVAVLVI 0 -KSLGILGILLGVAALCTIVALSVV 0 -LCLFVVTPVMVVGTAWIFL 0 -LYLAIVVLIGVALTATLMLYA 0 -LTILLAIAPVLALAVGLALYG 0 -FFVLLLMILILVNLAMTIWIL 0 -FIVLIPSVVITVIFLFFWLFM 0 -ILVLLILAVITIFALVCVLLV 0 -TYFIVLIPSVVITVIFLFFWLFM 0 -FLVLFIFLTSFFLNYSHTMVA 0 -GDKIGMFFQAMATFFGGFIIGF 0 -SNGVIVGTCLAFVAGMIGMAYAA 0 -FTFEGAARSDD 0 -MNYMVYFNFFACVLVPLLLMLGVYL 0 -AGLILLVVTLIGMSVLVRVLI 0 -VFLAVYLLGGITFLPLVLFTL 0 -LIKISALVFVTVAFFYLG 0 -GFFGVATFFFAALGIILIAWSAVL 0 -GQWEISVIWGLGVAMAIYLTA 0 -LWWIQAMTGFAMFFLGSVHLYIMMT 0 -IAVVITVVFLTLLSVVILIFF 0 -QVVATATFR 0 -MLLCFAFLWVLGIAYYMY 0 -IFCIIMLFALLGFL 0 -PLCICVAFTCLALVLVTSIVL 0 -LIIVLAIVVGVGAAVGLALSA 0 -ILVPCVLGLLLLPILAMLMALCV 0 -LGLLLAALICVGIATTLVLNA 0 -GYAAYYLVRKNFALAMPYLVE 0 -LPRTLAVLLVGAALAISGAVMQALF 0 -ILLFYVIFYGCLAGIFIGTIQ 0 -IMSTLLEVGYDNVKSQ 0 -LLAVALIIAMSISLAWQAAGW 0 -IVGQLLFVALGITFIYYLFTP 0 -NFWMFGLFFFFYFFIMGAYFPFFPIWL 0 -LVLIVGIVAAVGVAAALVLNA 0 -LTLAVALIGGVAAITSLLLYA 0 -ASGGIILIIAAILAMIMAN 0 -GSAGGAALAVVVLALAFGLSG 0 -LFVLLLLAILVVNLALTIWIL 0 -SPPLVLAALVACIIVLGFNYWIA 0 -LLFLILGIISFITFFLQGFTF 0 -LIVKALGILCFLLLITVAVLAV 0 -QYIHVAFQGSFACITVGLIVGAL 0 -QGIAVFGYSMAVSIGGILASR 0 -DHKRLGIMYIIVAIVMLLRGFADAIMMR 0 -IFRLHLVLGMTLFLLF 0 -LVSAIILTSFMTGLFILSLWK 0 -AVVGGVIAAVFITLITVVVLI 0 -INLGCDVDFD 0 -ALSALCLLLSVGSAAACLLLGA 0 -FTVIAGAVIVLLLTLNSNS 0 -LLFVSLLFCLIAQTCWLALV 0 -MVLVALLVIGVSLAGYLGLKA 0 -LVIPHILRLC 0 -LIRVLLGFVILFITYILFPSI 0 -PLFYIINILAPCVLIALMANLVFYL 0 -HKLGLGLEFQA 0 -ILFVAVSFIALGCVSAFVLFE 0 -AVVSAQIAITASPIS 0 -LTIIGGALFVLAVAAGLVLNA 0 -GCCGLLALALCSLALSLLA 0 -LIVLLAIVTIIAIALVAILP 0 -TEISAGWG 0 -FWRYFAGNLASGGAAGATSLCFVYPLDFARTRLAA 0 -IVAALGIIGLWMFFSSNELSIAT 0 -MTVILFVLLGISIASALVLYA 0 -SNATIAVACLSFFVCMIGAAYAS 0 -LKLTFDSSFS 0 -LNVLLSAAINFFLIAFAVYFLV 0 -HSAMLWTIGFIVTFSVGGMTGVLLAVPGADFV 0 -AVLSAKGQY 0 -LWELVIEQFEDLLVRILLLAA 0 -FFIVMGLVDAIPMIAVGLGLY 0 -LAVIAGGMGFIATAVLLVLFA 0 -ACYCRIPACLAGERRYGTCFYMGRVWAFCC 1 -AGRGKQGGKVRAKAKTRSSRAGLQFPVGRVHRLLRKGNY 1 -ALFSILRGLKKLGNMGQAFVNCKIYKKC 1 -ALSILKGLEKLAKMGIALTNCKATKKC 1 -ALWKDILKNVGKAAGKAVLNTVTDMVNQ 1 -ALWKNMLKGIGKLAGQAALGAVKTLVGAES 1 -ALWKTLLKNVGKAAGKAALNAVTDMVNQ 1 -ALWKTMLKKLGTMALHAGKAAFGAAADTISQ 1 -ALWKTMLKKLGTMALHAGKAALGAAADTISQGTQ 1 -APGNKAECEREKGYCGFLKCSFPFVVSGKCSRFFFCCKNIW 1 -ASIIKTTIKVSKAVCKTLTCICTGSCSNCK 1 -ATCDLLSGTGIKHSACAAHCLLRGNRGGYCNGRAICVCRN 1 -ATTGCSCPQCIIFDPICASSYKNGRRGFSSGCHMRCYNRCHGTDYFQISKGSKCI 1 -AYPGNGVHCGKYSCTVDKQTAIGNIGNNAA 1 -CANSCSYGPLTWSCDGNTK 1 -CRQSCSFGPLTFVCDGNTK 1 -DDTPSSRCGSGGWGPCLPIVDLLCIVHVTVGCSGGFGCCRIG 1 -DFASCHTNGGICLPNRCPGHMIQIGICFRPRVKCCRSW 1 -DFKDWMKTAGEWLKKKGPGILKAAMAAAT 1 -DGVKLCDVPSGTWSGHCGSSSKCSQQCKDREHFAYGGACHYQFPSVKCFCKRQC 1 -DKLIGSCVWGAVNYTSDCNGECKRRGYKGGHCGSFANVNCWCET 1 -ELCEKASKTWSGNCGNTGHCDNQCKSWEGAAHGACHVRNGKHMCFCYFNC 1 -FCKSLPLPLSVK 1 -FFGSLLSLGSKLLPSVFKLFQRKKE 1 -FFGSVLKLIPKIL 1 -FFGWLIKGAIHAGKAIHGLIHRRRH 1 -FFGWLIRGAIHAGKAIHGLIHRRRH 1 -FFPIVAGVAGQVLKKIYCTISKKC 1 -FGLPMLSILPKALCILLKRKC 1 -FIGLLISAGKAIHDLIRRRH 1 -FIGPIISALASLFG 1 -FKLGSFLKKAWKSKLAKKLRAKGKEMLKDYAKGLLEGGSEEVPGQ 1 -FLGGLMKAFPAIICAVTKKC 1 -FLGGLMKAFPALICAVTKKC 1 -FLNALKNFAKTAGKRLKSLLN 1 -FLPAIAGMAAKFLPKIFCAISKKC 1 -FLPAIAGVAAKFLPKIFCAISKKC 1 -FLPAIVGAAAKFLPKIFCVISKKC 1 -FLPAIVGAAGKFLPKIFCAISKKC 1 -FLPAIVGAAGQFLPKIFCAISKKC 1 -FLPAVLRVAAKIVPTVFCAISKKC 1 -FLPAVLRVAAKVVPTVFCLISKKC 1 -FLPAVLRVAAQVVPTVFCAISKKC 1 -FLPFIAGMAAKFLPKIFCAISKKC 1 -FLPFIAGMAANFLPKIFCAISKKC 1 -FLPFIAGVAAKFLPKIFCAISKKC 1 -FLPFLATLLSKVL 1 -FLPGLLAGLL 1 -FLPIASLLGKYL 1 -FLPIIAGVAAKVFPKIFCAISKKC 1 -FLPIIASVAAKVFPKIFCAISKKC 1 -FLPIIASVAAKVFSKIFCAISKKC 1 -FLPIIASVAANVFSKIFCAISKKC 1 -FLPILASLAAKFGPKLFCLVTKKC 1 -FLPILASLAAKLGPKLFCLVTKKC 1 -FLPILASLAATLGPKLLCLITKKC 1 -FLPLFASLIGKLL 1 -FLPLIGKVLSGIL 1 -FLPLIGRVLSGIL 1 -FLPLLAGLAANFFPKIFCKITRKC 1 -FLPLLAGLAANFLPKIFCKITRKC 1 -FLPLLAGLAANFLPTIICKISYKC 1 -FLPMLAGLAASMVPKFVCLITKKC 1 -FLPVVAGLAAKVLPSIICAVTKKC 1 -FMGGLIKAATKIVPAAYCAITKKC 1 -FSFKRLKGFAKKLWNSKLARKIRTKGLKYVKNFAKDMLSEGEEAPPAAEPPVEAPQ 1 -FVPYNPPRPGQSKPFPSFPGHGPFNPKIQWPYPLPNPGH 1 -GAIKDALKGAAKTVAVELLKKAQCKLEKTC 1 -GFFSLIKGVAKIATKGLAKNLGKMGLDLVGCKISKEC 1 -GFFSTVKNLATNVAGTVIDTLKCKVTGGCRS 1 -GFGALFKFLAKKVAKTVAKQAAKQGAKYVVNKQME 1 -GFISTVKNLATNVAGTVIDTIKCKVTGGC 1 -GFKGAFKNVMFGIAKSAGKSALNALACKIDKSC 1 -GFLDSFKNAMIGVAKSVGKTALSTLACKIDKSC 1 -GFLSILKKVLPKVMAHMK 1 -GFLSTVKNLATNVAGTVIDTLKCKVTGGCRS 1 -GFMKYIGPLIPHAVKAISDLI 1 -GFSSIFRGVAKFASKGLGKDLARLGVNLVACKISKQC 1 -GFVDLAKKVVGGIRNALGI 1 -GIFPKIIGKGIKTGIVNGIKSLVKGVGMKVFKAGLNNIGNTGCNEDEC 1 -GIFPKIIGKGIKTGIVNGIKSLVKGVGMKVFKAGLSNIGNTGCNEDEC 1 -GIFSKFGGKAIKNLFIKGAKNIGKEVGMDVIRTGIDVAGCKIKGEC 1 -GIFSKLAGKKLKNLLISGLKNVGKEVGMDVVRTGIDIAGCKIKGEC 1 -GIFSKLGRKKIKNLLISGLKNVGKEVGMDVVRTGIDIAGCKIKGEC 1 -GIFSLIKGAAQLIGKTVAKEAGKTGLELMACKVTKQC 1 -GIFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC 1 -GILDAIKAIAKAAG 1 -GILDFAKTVVGGIRNALGI 1 -GILSLFTGGIKALGKTLFKMAGKAGAEHLACKATNQC 1 -GILSSIKGVAKGVAKNVAAQLLDTLKCKITGC 1 -GIMDSVKGLAKNLAGKLLDSLKCKITGC 1 -GIMDTIKDTAKTVAVGLLNKLKCKITGC 1 -GINTLKKVIQGLHEVIKLVSNHA 1 -GINTLKKVIQGLHEVIKLVSNHE 1 -GIPCGESCVWIPCISAALGCSCKNKVCYRN 1 -GKLQAFLAKMKEIAAQTL 1 -GKVWDWIKSAAKKIWSSEPVSQLKGQVLNAAKNYVAEKIGATPT 1 -GLADFLNKAVGKVVDFVKS 1 -GLFDVVKGVLKGVGKNVAGSLLEQLKCKLSGGC 1 -GLFKVLGSVAKHLLPHVAPIIAEKL 1 -GLFLDTLKGAAKDVAGKLLEGLKCKIAGCKP 1 -GLFLDTLKGLAGKLLQGLKCIKAGCKP 1 -GLFSILRGAAKFASKGLGKDLTKLGVDLVACKISKQC 1 -GLFSKFNKKKIKSGLFKIIKTAGKEAGLEALRTGIDVIGCKIKGEC 1 -GLFSKFNKKKIKSGLIKIIKTAGKEAGLEALRTGIDVIGCKIKGEC 1 -GLFSVLGSVAKHLLPHVAPIIAEKL 1 -GLFSVLGSVAKHLLPHVVPVIAEKL 1 -GLFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC 1 -GLLDFVTGVGKDIFAQLIKQI 1 -GLLDSIKGMAISAGKGALQNLLKVASCKLDKTC 1 -GLLDSLKNLAINAAKGAGQSVLNTLSCKLSKTC 1 -GLLDTIKGVAKTVAASMLDKLKCKISGC 1 -GLLGGLLGPLLGGGGGGGGGLL 1 -GLLGPLLKIAAKVGSNLL 1 -GLLGSIFGAGKKIACALSGLC 1 -GLLGSLFGAGKKVACALSGLC 1 -GLLKRIKTLL 1 -GLLSGLKKVGKHVAKNVAVSLMDSLKCKISGDC 1 -GLLSKVLGVGKKVLCGVSGLC 1 -GLLSVLGSVAKHVLPHVVPVIAEHL 1 -GLMSSIGKALGGLIVDVLKPKTPAS 1 -GLNALKKVFQGIHEAIKLINNHVQ 1 -GLNTLKKVFQGLHEAIKLINNHVQ 1 -GLWNKIKEAASKAAGKAALGFVNEMV 1 -GLWSKIKAAGKEAAKAAAKAAGKAALNAVSEAV 1 -GLWSKIKEAAKTAGLMAMGFVNDMV 1 -GLWSTIKQKGKEAAIAAAKAAGQAALGAL 1 -GPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW 1 -GRLQAFLAKMKEIAAQTL 1 -GRPNPVNNKPTPHPRL 1 -GRPNPVNTKPTPYPRL 1 -GSGRGSCRSQCMRRHEDEPWRVQECVSQCRRRRGGGD 1 -GSKKPVPIIYCNRRTGKCQRM 1 -GVLDILKNAAKNILAHAAEQI 1 -GVVDILKGAGKDLLAHLVGKISEKV 1 -GWKDWAKKAGGWLKKKGPGMAKAALKAAMQ 1 -GWKDWLKKGKEWLKAKGPGIVKAALQAATQ 1 -GWKDWLNKGKEWLKKKGPGIMKAALKAATQ 1 -HGVSGHGQHGVHG 1 -IFGAILPLALGALKNLIK 1 -IIEKLVNTALGLLSGL 1 -IIGHLIKTALGMLGL 1 -ILGTILGLLKGL 1 -ILGTILGLLKSL 1 -ILPLVGNLLNDLL 1 -ILQKAVLDCLKAAGSSLSKAAITAIYNKIT 1 -INWKKIAEIGKQVLSAL 1 -INWKKIAEVGGKILSSL 1 -INWLKLGKAIIDAL 1 -IPRPLDPCIAQNGRCFTGICRYPYFWIGTCRNGKSCCRRR 1 -IWLTALKFLGKHAAKHLAKQQLSKL 1 -KDRPKKPGLCPPRPQKPCVKECKNDDSCPGQQKCCNYGCKDECRDPIFVG 1 -KTCEHLADTYRGVCFTNASCDDHCKNKAHLISGTCHNWKCFCTQNC 1 -KTCENLADTY 1 -KTCENLSGTFKGPCIPDGNCNKHCRNNEHLLSGRCRDDFRCWCTNRC 1 -KTCMTKKEGWGRCLIDTTCAHSCRKYGYMGGKCQGITRRCYCLLNC 1 -KWCFRVCYRGICYRKCR 1 -KWCFRVCYRGICYRRCR 1 -KYYGNGVSCNKKGCSVDWGKAIGIIGNNSAANLATGGAAGWSK 1 -LCNERPSQTWSGNCGNTAHCDKQCQDWEKASHGACHKRENHWKCFCYFNC 1 -LFCRKGTCHFGGCPAHLVKVGSCFGFRACCKWPWDV 1 -LLGRCKVKSNRFHGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC 1 -LLGRCKVKSNRFNGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC 1 -LLKELWTKIKGAGKAVLGKIKGLL 1 -LLKELWTKMKGAGKAVLGKIKGLL 1 -LLPILGNLLNGLL 1 -LLPNLLKSLL 1 -LMCTHPLDCSN 1 -LNLKGIFKKVASLLT 1 -LPVNEAQCRQVGGYCGLRICNFPSRFLGLCTRNHPCCSRVWV 1 -MSWLNFLKYIAKYGKKAVSAAWKYKGKVLEWLNVGPTLEWVWQKLKKIAGL 1 -QDKCKKVYENYPVSKCQLANQCNYDCKLDKHARSGECFYDEKRNLQCICDYCEY 1 -QGVRNHVTCRIYGGFCVPIRCPGRTRQIGTCFGRPVKCCRRW 1 -QKLCERPSGTWSGVCGNNNACKNQCINLEKARHGSCNYVFPAHKCICYFPC 1 -QQCGRQASGRLCGNRLCCSQWGYCGSTASYCGAGCQSQCRS 1 -QRFIHPTYRPPPQPRRPVIMRA 1 -RQRVEELSKFSKKGAAARRRK 1 -RSGRGECRRQCLRRHEGQPWETQECMRRCRRRG 1 -RSVCRQIKICRRRGGCYYKCTNRPY 1 -SAPRGCWTKSYPPKPCK 1 -SCTTCVCTCSCCTT 1 -SFGLCRLRRGFCARGRCRFPSIPIGRCSRFVQCCRRVW 1 -SGISGPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW 1 -SIVPIRCRSNRDCRRFCGFRGGRCTYARQCLCGY 1 -SKGKKANKDVELARG 1 -SMLSVLKNLGKVGLGFVACKINKQC 1 -TSYGNGVHCNKSKCWIDVSELETYKAGTVSNPKDILW 1 -VDKGSYLPRPTPPRPIYNRN 1 -VDKPDYRPRPRPPNM 1 -VDKPDYRPRPWPRNMI 1 -VDKPDYRPRPWPRPN 1 -VDKPDYRPRPWPRPNM 1 -VLPIIGNLLNSLL 1 -VLPLISMALGKLL 1 -VNPIILGVLPKFVCLITKKC 1 -VRNFVTCRINRGFCVPIRCPGHRRQIGTCLGPQIKCCR 1 -VRNHVTCRINRGFCVPIRCPGRTRQIGTCFGPRIKCCRSW 1 -VTCDLLSFEAKGFAANHSLCAAHCLAIGRRGGSCERGVCICRR 1 -VTCDLLSIKGVAEHSACAANCLSMGKAGGRCENGICLCRKTTFKELWDKRF 1 -VTCFCKRPVCDSGETQIGYCRLGNTFYRLCCRQ 1 -WLGSALKIGAKLLPSVVGLFKKKKQ 1 -WNPFKELERAGQRVRDAIISAGPAVATVGQAAAIARG 1 -WNPFKELERAGQRVRDAVISAAPAVATVGQAAAIARG 1 -YDLSKNCRLRGGICYIGKCPRRFFRSGSCSRGNVCCLRFG 1 -YSKSLPLSVLNP 1 -YVSCLFRGARCRVYSGRSCCFGYYCRRDFPGSIFGTCSRRNF 1
--- a/PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py Wed Oct 28 02:31:40 2020 +0000 +++ b/PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py Tue Dec 29 04:43:09 2020 +0000 @@ -14,17 +14,23 @@ n = 0 m = 0 + + l = [] + + for line in lines[1:]: + l.append(line.split('\t')[1].strip('\n').strip('\r')) + l = list(set(l)) for line in lines: - if '1' in line.split('\t')[1].strip('\n'): + if l[0] in line.split('\t')[1].strip('\n').strip('\r'): n= n+1 - of1.write('>peptide_'+str(n)+'\n') + of1.write('>peptide_'+str(n)+'_'+str(l[0])+'\n') of1.write(line.split('\t')[0]+'\n') - if '0' in line.split('\t')[1].strip('\n'): + if l[1] in line.split('\t')[1].strip('\n').strip('\r'): m= m+1 - of2.write('>peptide_'+str(m)+'\n') + of2.write('>peptide_'+str(m)+'_'+str(l[1])+'\n') of2.write(line.split('\t')[0]+'\n') elif Method == 'NoClassLabel': @@ -47,11 +53,10 @@ parser = argparse.ArgumentParser() parser.add_argument("-I", "--InFile", required=True, default=None, help=".fasta or .tsv") - parser.add_argument("-P", "--Postvs", required=False, default='Positive.fasta', help="Path to target tsv file") - parser.add_argument("-N", "--Negtvs", required=False, default='Negative.fasta', help="Path to target tsv file") + parser.add_argument("-P", "--Postvs", required=False, default='FirstDataFile.fasta', help="Path to target tsv file") + parser.add_argument("-N", "--Negtvs", required=False, default='SecondDataFile.fasta', help="Path to target tsv file") parser.add_argument("-O", "--OutFile", required=False, default='OutFile.fasta', help="Path to target tsv file") parser.add_argument("-M", "--Method", required=True, default=None, help="Path to target tsv file") args = parser.parse_args() - TSVtoFASTA(args.InFile, args.Method, args.Postvs, args.Negtvs, args.OutFile) - + TSVtoFASTA(args.InFile, args.Method, args.Postvs, args.Negtvs, args.OutFile) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.md Tue Dec 29 04:43:09 2020 +0000 @@ -0,0 +1,25 @@ +# PDAUG - a Galaxy based toolset for peptide library analysis, visualization, and machine learning modeling. + +### Overview + +Peptide Design and Analysis Under Galaxy (PDAUG) package, a Galaxy based python powered collection of tools, workflows, and datasets for a rapid in-silico peptide library analysis. PDAUG offers tools for peptide library generation, data visualization, in-built and public database based peptide sequence retrieval, peptide feature calculation, and machine learning modeling. PDAUG tool suite can be downloaded and install through galaxy toolshed as a standard galaxy tool. + + +# Prebuild Docker Image + +A prebuild build docker image based on the recent galaxy release can be obtained by the link below for a quick installation. + + - [Docker Image](https://github.com/jaidevjoshi83/docker_pdaug) + +# Contributors + - Jayadev Joshi + + - Daniel Blankenberg + +# History + + - 0.1.0: First release! + +# Support & Bug Reports + +You can file an [github issue](https://github.com/jaidevjoshi83/docker_pdaug/issues).