Mercurial > repos > johnheap > vapper
comparison Tryp_G.py @ 3:4432e4183ebd draft
planemo upload for repository https://github.com/johnheap/VAPPER-Galaxy
| author | johnheap |
|---|---|
| date | Wed, 11 Jul 2018 08:58:14 -0400 |
| parents | 36cb22bd911d |
| children | e91e41380946 |
comparison
equal
deleted
inserted
replaced
| 2:82770f07a036 | 3:4432e4183ebd |
|---|---|
| 54 | 54 |
| 55 def contigTranslation(name): | 55 def contigTranslation(name): |
| 56 argString = "transeq " + name + ".fa " + name + "_6frame.fas -frame=6 " #+quietString | 56 argString = "transeq " + name + ".fa " + name + "_6frame.fas -frame=6 " #+quietString |
| 57 print(argString) | 57 print(argString) |
| 58 returncode = subprocess.call(argString, shell=True) | 58 returncode = subprocess.call(argString, shell=True) |
| 59 #subprocess.call('ls -l *.fa', shell = True) | |
| 60 #sys.exit(1) | |
| 61 #if returncode != 0: | |
| 62 # return "Error in Transeq" | |
| 63 #return 'ok' | |
| 64 | 59 |
| 65 | 60 |
| 66 def HMMerMotifSearch(name): | 61 def HMMerMotifSearch(name): |
| 67 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b', | 62 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b', |
| 68 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c'] | 63 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c'] |
| 121 countList.append(totalCount) | 116 countList.append(totalCount) |
| 122 #print(countList) | 117 #print(countList) |
| 123 #print("--------") | 118 #print("--------") |
| 124 return countList | 119 return countList |
| 125 | 120 |
| 126 """ | |
| 127 def HMMerMotifSearch(name): | |
| 128 motifs = ['1', '2a', '2b', '3', '4a', '4b', '4c', '5', '6', '7', '8a', '8b', '9a', '9b', | |
| 129 '9c', '10a', '10b', '11a', '11b', '12', '13a', '13b', '13c', '13d', '14', '15a', '15b', '15c'] | |
| 130 lineCounts = [] | |
| 131 compoundList = [] | |
| 132 dir_path = os.path.dirname(os.path.realpath(__file__)) | |
| 133 phylopath = dir_path+"/data/Motifs/Phylotype" | |
| 134 for m in motifs: | |
| 135 argString = "hmmsearch "+phylopath + m + ".hmm " + name + "_6frame.fas > Phy" + m + ".out" #+quietString | |
| 136 #argString = "hmmsearch "+phylopath + m + ".hmm " + dir_path+"/data/Test_6frame.fas > Phy" + m + ".out" | |
| 137 print(argString) | |
| 138 subprocess.call(argString, shell=True) | |
| 139 | |
| 140 hmmResult = open("Phy" + m + ".out", 'r') | |
| 141 tempout = open(dir_path+"/data/"+"Phy" + m + ".txt", 'w') | |
| 142 regex = r"NODE_[0-9]{1,7}_length_[0-9]{1,7}_cov_[0-9]{1,10}.[0-9]{1,7}_[0-9]{1,2}" | |
| 143 n = 0 | |
| 144 outList = [] | |
| 145 for line in hmmResult: | |
| 146 m = re.search(regex, line) | |
| 147 if m: | |
| 148 tempout.write(m.group() + "\n") | |
| 149 outList.append(""+m.group()+"\n") | |
| 150 n += 1 | |
| 151 if re.search(r"inclusion", line): | |
| 152 print("inclusion threshold reached") | |
| 153 break | |
| 154 compoundList.append(outList) | |
| 155 lineCounts.append(n) | |
| 156 hmmResult.close() | |
| 157 #tempout.close() | |
| 158 print(lineCounts) | |
| 159 motifGroups = [['1'], ['2a', '2b'], ['3'], ['4a', '4b', '4c'], ['5'], ['6'], ['7'], ['8a', '8b'], ['9a', '9b', | |
| 160 '9c'], | |
| 161 ['10a', '10b'], ['11a', '11b'], ['12'], ['13a', '13b', '13c', '13d'], ['14'], ['15a', '15b', '15c']] | |
| 162 concatGroups = [1, 2, 1, 3, 1, 1, 1, 2, 3, 2, 2, 1, 4, 1, 3] | |
| 163 countList = [] | |
| 164 countIndex = 0 | |
| 165 totalCount = 0 | |
| 166 | |
| 167 for c in concatGroups: | |
| 168 a = [] | |
| 169 for n in range(0, c): | |
| 170 a = a + compoundList.pop(0) | |
| 171 t = set(a) | |
| 172 countList.append(len(t)) | |
| 173 totalCount += len(t) | |
| 174 countList.append(totalCount) | |
| 175 print(countList) | |
| 176 print("--------") | |
| 177 return countList | |
| 178 """ | |
| 179 | |
| 180 | 121 |
| 181 | 122 |
| 182 def relativeFrequencyTable(countList, name, htmlresource): | 123 def relativeFrequencyTable(countList, name, htmlresource): |
| 183 relFreqList = [] | 124 relFreqList = [] |
| 184 c = float(countList[15]) | 125 c = float(countList[15]) |
| 221 j_fname = dir_path+"/data/congodata.csv" | 162 j_fname = dir_path+"/data/congodata.csv" |
| 222 #print(dir_path) | 163 #print(dir_path) |
| 223 congo_df = pd.read_csv(j_fname) | 164 congo_df = pd.read_csv(j_fname) |
| 224 congo_df.drop('Colour', axis=1, inplace=True) | 165 congo_df.drop('Colour', axis=1, inplace=True) |
| 225 congo_df.loc[congo_df.index.max() + 1] = localFreqList | 166 congo_df.loc[congo_df.index.max() + 1] = localFreqList |
| 167 ysize = len(congo_df) * 20 / 97.0 # make vertical size equivlanet 20' is ok for 97. | |
| 168 | |
| 226 congo_df.set_index('Strain', inplace=True) | 169 congo_df.set_index('Strain', inplace=True) |
| 227 | 170 |
| 228 cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values) | 171 cg = sns.clustermap(congo_df, method='ward', cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize)) |
| 229 plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally | 172 plt.setp(cg.ax_heatmap.yaxis.get_ticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally |
| 230 ax=cg.ax_heatmap | 173 ax=cg.ax_heatmap |
| 231 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ estimated as the phylotype proportion across the\nsample cohort. " | 174 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ estimated as the phylotype proportion across the\nsample cohort. " |
| 232 title += "Dendrogram reflects the relationships amongst the VSG repertoires of each strain. " | 175 title += "Dendrogram reflects the relationships amongst the VSG repertoires of each strain. " |
| 233 title += "Strains\nwere isolated from multiple African countries as described in Silva Pereira et al. (2018)." | 176 title += "Strains\nwere isolated from multiple African countries as described in Silva Pereira et al. (2018)." |
| 257 j_fname = dir_path+ "/data/congodata_deviationfromthemean.csv" | 200 j_fname = dir_path+ "/data/congodata_deviationfromthemean.csv" |
| 258 #j_fname = r"data/congodata_deviationfromthemean.csv" | 201 #j_fname = r"data/congodata_deviationfromthemean.csv" |
| 259 congo_df = pd.read_csv(j_fname) | 202 congo_df = pd.read_csv(j_fname) |
| 260 congo_df.drop('Colour', axis=1, inplace=True) | 203 congo_df.drop('Colour', axis=1, inplace=True) |
| 261 congo_df.loc[congo_df.index.max() + 1] = localDevList | 204 congo_df.loc[congo_df.index.max() + 1] = localDevList |
| 205 ysize = len(congo_df) * 20 / 97.0 # make vertical size equivlanet 20' is ok for 97. | |
| 262 congo_df.set_index('Strain', inplace=True) | 206 congo_df.set_index('Strain', inplace=True) |
| 263 cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values) | 207 cg = sns.clustermap(congo_df, method='ward',cmap = "RdBu_r", col_cluster=False, yticklabels = congo_df.index.values,figsize = (10,ysize)) |
| 264 plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally | 208 plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=8) # get y labels printed horizontally |
| 265 ax = cg.ax_heatmap | 209 ax = cg.ax_heatmap |
| 266 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ expressed as the deviation from the mean phylotypes " | 210 title = "Variant Antigen Profiles of $\itTrypanosoma$ $\itcongolense$ expressed as the deviation from the mean phylotypes " |
| 267 title +="\nproportions of the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of " | 211 title +="\nproportions of the sample cohort. Dendrogram reflects the relationships amongst the VSG repertoires of " |
| 268 title +="each \nstrain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)." | 212 title +="each \nstrain. Strains were isolated from multiple African countries as described in Silva Pereira et al. (2018)." |
| 304 for item in pcaResult.Y: | 248 for item in pcaResult.Y: |
| 305 col = myCountries.index(myColours[i]) | 249 col = myCountries.index(myColours[i]) |
| 306 compoundList[col].append(-item[0]) | 250 compoundList[col].append(-item[0]) |
| 307 compoundList[col].append(item[1]) | 251 compoundList[col].append(item[1]) |
| 308 i = i + 1 | 252 i = i + 1 |
| 309 cols = ['r', 'g', 'b', 'c', 'm', 'y', 'grey', 'k'] | 253 colormap = plt.cm.tab20 # nipy_spectral, Set1,Paired |
| 310 | 254 cols = [colormap(i) for i in np.linspace(0, 1, 20)] |
| 311 fig, ax = plt.subplots(figsize=(9, 6)) | 255 fig, ax = plt.subplots(figsize=(9, 6)) |
| 312 #plt.figure(num=1,figsize=(12, 6)) | 256 #plt.figure(num=1,figsize=(12, 6)) |
| 313 i = 0 | 257 i = 0 |
| 314 for d in myCountries: | 258 for d in myCountries: |
| 315 a = compoundList[i] | 259 a = compoundList[i] |
