# HG changeset patch # User cpt # Date 1655469202 0 # Node ID f3fc78cc4c43f4d33401c880a50a1fade369ab9d Uploaded diff -r 000000000000 -r f3fc78cc4c43 cpt_disruptin_table/Disruptin_hydrophobicity_helicity_table_package.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_disruptin_table/Disruptin_hydrophobicity_helicity_table_package.py Fri Jun 17 12:33:22 2022 +0000 @@ -0,0 +1,122 @@ +""" +This program is intended to create the output table for the disruptin finder workflow +""" +from Bio import SeqIO +from Bio.SeqUtils.ProtParam import ProteinAnalysis +from Bio.SeqUtils import ProtParamData +import csv +import argparse +import sys + + +def disruptin_table(garnier_file, fasta_file): + # Iterable variables + position = 1 + net_charge = 0 + charge_res = 0 + record_number = 0 + + # loop structures + names = [] + sec_struct = [] + + # reading the lines from the garnier csv file +# with open(garnier_file,'r') as csvfile: +# garnierreader = csv.reader(csvfile) + for row in garnier_file: + if row[0] == 'Sequence: ': + names += [row[1]] + elif row[0] in 'HETC': + row = row.split('\t') + sec_struct += [''.join(row)] + + record = [] + p = [] + r = [] + c = [] + h = [] + s = [] + + # Parse the .fasta file and get the sequence + for rec in SeqIO.parse(fasta_file, "fasta"): + sequence = str(rec.seq) + + # Set up the information vectors: for position #, residue, hydrophobic/charge/polar/nonpolar, and secondary + # structure + record += [rec.id] + position_vec = [] + residue_vec = [] + charge_sym_vec = [] + sec_struct_vec = [] + + for aa in sequence: + position_vec += [str(position)] + residue_vec += [str(aa)] + sec_struct_vec += [str(sec_struct[record_number][position - 1])] + + # For R and K residues a positive charge is given + if aa in "RK": + symbol = "+" + # For D and E residues a negative charge is given + elif aa in "DE": + symbol = "-" + elif aa in "AVMILPWFG": + symbol = "N" + elif aa in "HSYTCQN": + symbol = "P" + charge_sym_vec += symbol + position += 1 + + # Calculating hyrophobicity based on Kyte and Doolittle scale. Using binning value of 9. Since the binning + # is 9, the first 4 residues and last 4 residues as set blank so as to center the values to their + # approximate position on the sequence. + prot_ana_seq = ProteinAnalysis(sequence) + hydro = [0] * 4 + prot_ana_seq.protein_scale(ProtParamData.kd, 9) + [0] * 4 + + record_number += 1 + position = 1 + + p += [position_vec] + r += [residue_vec] + c += [charge_sym_vec] + h += [hydro] + s += [sec_struct_vec] + + # returns values for name of the sequence + return record, p, r, c, h, s + + +if __name__ == "__main__": + # Grab all of the filters from our plugin loader + parser = argparse.ArgumentParser(description="Disruptin Table Output") + parser.add_argument( + "garnier_file", type=argparse.FileType("r"), help="csv file from garnier reader" + ) + parser.add_argument( + "fasta_file", + type=argparse.FileType("r"), + help="fasta file of disruptin candidates", + ) + args = parser.parse_args() + + # Set up output location +# f = open(sys.stdout, 'w', newline='') +# writer1 = csv.writer(f) + + iden, position, residue, charge, hydro, struct = disruptin_table(**vars(args)) + + for i in range(len(iden)): +# writer1.writerow(['Protein ID']+[iden[i]]) +# writer1.writerow(['Position'] + [format(x, 's') for x in position[i]]) +# writer1.writerow(['Residue'] + [format(x, 's') for x in residue[i]]) +# writer1.writerow(['Charge'] + [format(x, 's') for x in charge[i]]) +# writer1.writerow(['Hydrophobicity'] + [format(x, '.3f') for x in hydro[i]]) +# writer1.writerow(['Secondary Structure'] + [format(x, 's') for x in struct[i]]) +# writer1.writerow(['']) + + print(str(iden[i])) + print("Position \t " + "\t".join(position[i])) + print("Residue \t" + "\t".join(residue[i])) + print("Charge \t" + "\t".join(charge[i])) + print("Hydrophobicity \t" + "\t".join(format(x, ".3f") for x in hydro[i])) + print("Secondary Structure \t" + "\t".join(struct[i])) diff -r 000000000000 -r f3fc78cc4c43 cpt_disruptin_table/Disruptin_hydrophobicity_helicity_table_package.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_disruptin_table/Disruptin_hydrophobicity_helicity_table_package.xml Fri Jun 17 12:33:22 2022 +0000 @@ -0,0 +1,33 @@ + + + makes table of disruptin candidates + + macros.xml + cpt-macros.xml + + + $output]]> + + + + + + + + + + diff -r 000000000000 -r f3fc78cc4c43 cpt_disruptin_table/cpt-macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_disruptin_table/cpt-macros.xml Fri Jun 17 12:33:22 2022 +0000 @@ -0,0 +1,115 @@ + + + + + python + biopython + requests + + + + + + + + 10.1371/journal.pcbi.1008214 + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {C. Ross}, + title = {CPT Galaxy Tools}, + year = {2020-}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + diff -r 000000000000 -r f3fc78cc4c43 cpt_disruptin_table/macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_disruptin_table/macros.xml Fri Jun 17 12:33:22 2022 +0000 @@ -0,0 +1,23 @@ + + + + + python + biopython + cpt_gffparser + + + + + + + + + + + ln -s $genome_fasta genomeref.fa; + + + genomeref.fa + +