vapper: Tryp_G.py comparison

comparison Tryp_G.py @ 3:4432e4183ebd draft

planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy

author	johnheap
date	Wed, 11 Jul 2018 08:58:14 -0400
parents	36cb22bd911d
children	e91e41380946

comparison

equal deleted inserted replaced

-:82770f07a036
+:4432e4183ebd
 def contigTranslation(name):
 argString = "transeq " + name + ".fa " + name + "_6frame.fas -frame=6 " #+quietString
 print(argString)
 returncode = subprocess.call(argString, shell=True)
-#subprocess.call('ls -l *.fa', shell = True)
-#sys.exit(1)
-#if returncode != 0:
-#    return "Error in Transeq"
-#return 'ok'
 def HMMerMotifSearch(name):
 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b',
 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c']
 countList.append(totalCount)
 #print(countList)
 #print("--------")
 return countList
-"""
-def HMMerMotifSearch(name):
-motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b',
-'9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c']
-lineCounts = []
-compoundList = []
-dir_path = os.path.dirname(os.path.realpath(__file__))
-phylopath = dir_path+"/data/Motifs/Phylotype"
-for m in motifs:
-argString = "hmmsearch "+phylopath + m + ".hmm " + name + "_6frame.fas > Phy" + m + ".out"  #+quietString
-#argString = "hmmsearch "+phylopath + m + ".hmm " + dir_path+"/data/Test_6frame.fas > Phy" + m + ".out"
-print(argString)
-subprocess.call(argString, shell=True)
-hmmResult = open("Phy" + m + ".out", 'r')
-tempout = open(dir_path+"/data/"+"Phy" + m + ".txt", 'w')
-regex = r"NODE_[0-9]{1,7}_length_[0-9]{1,7}_cov_[0-9]{1,10}.[0-9]{1,7}_[0-9]{1,2}"
-n = 0
-outList = []
-for line in hmmResult:
-m = re.search(regex, line)
-if m:
-tempout.write(m.group() + "\n")
-outList.append(""+m.group()+"\n")
-n += 1
-if re.search(r"inclusion", line):
-print("inclusion threshold reached")
-break
-compoundList.append(outList)
-lineCounts.append(n)
-hmmResult.close()
-#tempout.close()
-print(lineCounts)
-motifGroups = [['1'], ['2a', '2b'], ['3'], ['4a', '4b', '4c'], ['5'], ['6'], ['7'], ['8a', '8b'], ['9a', '9b',
-'9c'],
-['10a', '10b'], ['11a', '11b'], ['12'], ['13a', '13b', '13c', '13d'], ['14'], ['15a', '15b', '15c']]
-concatGroups = [1, 2, 1, 3, 1, 1, 1, 2, 3, 2, 2, 1, 4, 1, 3]
-countList = []
-countIndex = 0
-totalCount = 0
-for c in concatGroups:
-a = []
-for n in range(0, c):
-a = a + compoundList.pop(0)
-t = set(a)
-countList.append(len(t))
-totalCount += len(t)
-countList.append(totalCount)
-print(countList)
-print("--------")
-return countList
-"""
 def relativeFrequencyTable(countList, name, htmlresource):
 relFreqList = []
 c = float(countList[15])
 j_fname = dir_path+"/data/congodata.csv"
 #print(dir_path)
 congo_df = pd.read_csv(j_fname)
 congo_df.drop('Colour', axis=1, inplace=True)
 congo_df.loc[congo_df.index.max() + 1] = localFreqList
+ysize = len(congo_df) * 20 / 97.0  # make vertical size equivlanet 20' is ok for 97.
 congo_df.set_index('Strain', inplace=True)
-cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values)
+cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize))
 plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, fontsize=8)  # get y labels printed horizontally
 ax=cg.ax_heatmap
 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ estimated as the phylotype proportion across the\nsample cohort. "
 title += "Dendrogram reflects the relationships amongst the VSG repertoires of each strain. "
 title += "Strains\nwere isolated from multiple African countries as described in Silva Pereira et al. (2018)."
 j_fname = dir_path+ "/data/congodata_deviationfromthemean.csv"
 #j_fname = r"data/congodata_deviationfromthemean.csv"
 congo_df = pd.read_csv(j_fname)
 congo_df.drop('Colour', axis=1, inplace=True)
 congo_df.loc[congo_df.index.max() + 1] = localDevList
+ysize = len(congo_df) * 20 / 97.0  # make vertical size equivlanet 20' is ok for 97.
 congo_df.set_index('Strain', inplace=True)
-cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values)
+cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize))
 plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8)  # get y labels printed horizontally
 ax = cg.ax_heatmap
 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ expressed as the deviation from the mean phylotypes "
 title +="\nproportions of the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of "
 title +="each \nstrain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)."
 for item in pcaResult.Y:
 col = myCountries.index(myColours[i])
 compoundList[col].append(-item[0])
 compoundList[col].append(item[1])
 i = i + 1
-cols = ['r', 'g', 'b', 'c', 'm', 'y', 'grey', 'k']
+colormap = plt.cm.tab20  # nipy_spectral, Set1,Paired
+cols = [colormap(i) for i in np.linspace(0, 1, 20)]
 fig, ax = plt.subplots(figsize=(9, 6))
 #plt.figure(num=1,figsize=(12, 6))
 i = 0
 for d in myCountries:
 a = compoundList[i]

Mercurial > repos > johnheap > vapper

comparison Tryp_G.py @ 3:4432e4183ebd draft