Mercurial > repos > cpt > cpt_helical_wheel

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml	Mon Jun 05 02:44:43 2023 +0000
@@ -0,0 +1,115 @@
+<macros>
+    <xml name="gff_requirements">
+        <requirements>
+            <requirement type="package" version="2.7">python</requirement>
+            <requirement type="package" version="1.65">biopython</requirement>
+            <requirement type="package" version="2.12.1">requests</requirement>
+			<requirement type="package" version="1.2.2">cpt_gffparser</requirement>
+            <yield/>
+        </requirements>
+        <version_command>
+		<![CDATA[
+			cd '$__tool_directory__' && git rev-parse HEAD
+		]]>
+		</version_command>
+    </xml>
+    <xml name="citation/mijalisrasche">
+        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+        <citation type="bibtex">@unpublished{galaxyTools,
+		author = {E. Mijalis, H. Rasche},
+		title = {CPT Galaxy Tools},
+		year = {2013-2017},
+		note = {https://github.com/tamu-cpt/galaxy-tools/}
+		}
+		</citation>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-crr">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Ross},
+				title = {CPT Galaxy Tools},
+				year = {2020-},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020-AJC-solo">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-clm">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="sl-citations-clm">
+        <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+        <yield/>
+    </xml>
+</macros>
--- a/cpt_helical_wheel/cpt-macros.xml	Tue Jul 05 05:21:34 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-<?xml version="1.0"?>
-<macros>
-	<xml name="gff_requirements">
-		<requirements>
-			<requirement type="package" version="2.7">python</requirement>
-			<requirement type="package" version="1.65">biopython</requirement>
-			<requirement type="package" version="2.12.1">requests</requirement>
-			<yield/>
-		</requirements>
-		<version_command>
-		<![CDATA[
-			cd $__tool_directory__ && git rev-parse HEAD
-		]]>
-		</version_command>
-	</xml>
-	<xml name="citation/mijalisrasche">
-		<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-		<citation type="bibtex">@unpublished{galaxyTools,
-		author = {E. Mijalis, H. Rasche},
-		title = {CPT Galaxy Tools},
-		year = {2013-2017},
-		note = {https://github.com/tamu-cpt/galaxy-tools/}
-		}
-		</citation>
-	</xml>
-	<xml name="citations">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {E. Mijalis, H. Rasche},
-				title = {CPT Galaxy Tools},
-				year = {2013-2017},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-		<yield/>
-		</citations>
-	</xml>
-    	<xml name="citations-crr">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Ross},
-				title = {CPT Galaxy Tools},
-				year = {2020-},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-		<yield/>
-		</citations>
-	</xml>
-        <xml name="citations-2020">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {E. Mijalis, H. Rasche},
-				title = {CPT Galaxy Tools},
-				year = {2013-2017},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {A. Criscione},
-				title = {CPT Galaxy Tools},
-				year = {2019-2021},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-                        </citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="citations-2020-AJC-solo">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-                        <citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {A. Criscione},
-				title = {CPT Galaxy Tools},
-				year = {2019-2021},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-                        </citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="citations-clm">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Maughmer},
-				title = {CPT Galaxy Tools},
-				year = {2017-2020},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="sl-citations-clm">
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Maughmer},
-				title = {CPT Galaxy Tools},
-				year = {2017-2020},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <yield/>
-	</xml>
-</macros>
--- a/cpt_helical_wheel/generateHelicalWheel.py	Tue Jul 05 05:21:34 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,86 +0,0 @@
-##
-
-import argparse
-from plotWheels.helical_wheel import helical_wheel
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Generate Helical Wheel")
-    parser.add_argument("--sequence",dest="sequence",type=str)
-    parser.add_argument("--seqRange",dest="seqRange",type=int,default=1)
-    parser.add_argument("--t_size",dest="t_size",type=int,default=32)
-    parser.add_argument("--rotation",dest="rotation",type=int,default=90)
-    parser.add_argument("--numbering",action="store_true",help="numbering for helical wheel")
-    parser.add_argument("--output",dest="output",type=argparse.FileType("wb"), default="_helicalwheel.png")#dest="output",default="_helicalwheel.png")
-    #### circle colors
-    parser.add_argument("--f_A",dest="f_A", default="#ffcc33")
-    parser.add_argument("--f_C",dest="f_C",default="#b5b5b5")
-    parser.add_argument("--f_D",dest="f_D",default="#db270f")
-    parser.add_argument("--f_E",dest="f_E",default="#db270f")
-    parser.add_argument("--f_F",dest="f_F",default="#ffcc33")
-    parser.add_argument("--f_G",dest="f_G",default="#b5b5b5")
-    parser.add_argument("--f_H",dest="f_H",default="#12d5fc")
-    parser.add_argument("--f_I",dest="f_I",default="#ffcc33")
-    parser.add_argument("--f_K",dest="f_K",default="#12d5fc")
-    parser.add_argument("--f_L",dest="f_L",default="#ffcc33")
-    parser.add_argument("--f_M",dest="f_M",default="#ffcc33")
-    parser.add_argument("--f_N",dest="f_N",default="#b5b5b5")
-    parser.add_argument("--f_P",dest="f_P",default="#ffcc33")
-    parser.add_argument("--f_Q",dest="f_Q",default="#b5b5b5")
-    parser.add_argument("--f_R",dest="f_R",default="#12d5fc")
-    parser.add_argument("--f_S",dest="f_S",default="#b5b5b5")
-    parser.add_argument("--f_T",dest="f_T",default="#b5b5b5")
-    parser.add_argument("--f_V",dest="f_V",default="#ffcc33")
-    parser.add_argument("--f_W",dest="f_W",default="#ffcc33")
-    parser.add_argument("--f_Y",dest="f_Y",default="#b5b5b5")
-    ### text colors
-    parser.add_argument("--t_A",dest="t_A",default="k")
-    parser.add_argument("--t_C",dest="t_C",default="k")
-    parser.add_argument("--t_D",dest="t_D",default="w")
-    parser.add_argument("--t_E",dest="t_E",default="w")
-    parser.add_argument("--t_F",dest="t_F",default="k")
-    parser.add_argument("--t_G",dest="t_G",default="k")
-    parser.add_argument("--t_H",dest="t_H",default="k")
-    parser.add_argument("--t_I",dest="t_I",default="k")
-    parser.add_argument("--t_K",dest="t_K",default="k")
-    parser.add_argument("--t_L",dest="t_L",default="k")
-    parser.add_argument("--t_M",dest="t_M",default="k")
-    parser.add_argument("--t_N",dest="t_N",default="k")
-    parser.add_argument("--t_P",dest="t_P",default="k")
-    parser.add_argument("--t_Q",dest="t_Q",default="k")
-    parser.add_argument("--t_R",dest="t_R",default="k")
-    parser.add_argument("--t_S",dest="t_S",default="k")
-    parser.add_argument("--t_T",dest="t_T",default="k")
-    parser.add_argument("--t_V",dest="t_V",default="k")
-    parser.add_argument("--t_W",dest="t_W",default="k")
-    parser.add_argument("--t_Y",dest="t_Y",default="k")
-
-    args = parser.parse_args()
-
-
-    #print(type(args.output))
-
-    f_colors = [args.f_A,args.f_C,args.f_D,args.f_E,args.f_F,args.f_G,args.f_H,args.f_I,args.f_K,
-                args.f_L,args.f_M,args.f_N,args.f_P,args.f_Q,args.f_R,args.f_S,args.f_T,args.f_V,
-                args.f_W,args.f_Y]
-
-    t_colors = [args.t_A,args.t_C,args.t_D,args.t_E,args.t_F,args.t_G,args.t_H,args.t_I,args.t_K,
-                args.t_L,args.t_M,args.t_N,args.t_P,args.t_Q,args.t_R,args.t_S,args.t_T,args.t_V,
-                args.t_W,args.t_Y]
-
-    colors = [f_colors, t_colors]
-
-    tmp_file = "./tmp.png"
-
-    helical_wheel(sequence=args.sequence,
-                  colorcoding=colors[0],
-                  text_color=colors[1],
-                  seqRange=args.seqRange,
-                  t_size=args.t_size,
-                  rot=args.rotation,
-                  numbering=args.numbering,
-                  filename=tmp_file
-                 )
-
-    with open("tmp.png", "rb") as f:
-        for line in f:
-            args.output.write(line)
--- a/cpt_helical_wheel/generateHelicalWheel.xml	Tue Jul 05 05:21:34 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,388 +0,0 @@
-<?xml version="1.1"?>
-<tool id="edu.tamu.cpt2.helicalWheel.generateHelicalWheel" name="Helical Wheel" version="1.0">
-    <description>Generate and Plot a Protein Helical Wheel</description>
-    <macros>
-		<import>cpt-macros.xml</import>
-                <import>macros.xml</import>
-    </macros>
-    <expand macro="requirements">
-        <requirement type="package">numpy</requirement>
-        <requirement type="package">pandas</requirement>
-        <requirement type="package" version="0.18.1">scikit-learn</requirement>
-        <requirement type="package">scipy</requirement>
-        <requirement type="package">matplotlib</requirement>
-    </expand>
-    <command detect_errors="aggressive"><![CDATA[
-python $__tool_directory__/generateHelicalWheel.py
---sequence $sequence
---seqRange $seqRange
---t_size $t_size
---rotation $rotation
-$numbering
---f_A "$sec_B.f_A"
---f_C "$sec_C.f_C"
---f_D "$sec_D.f_D"
---f_E "$sec_D.f_E"
---f_F "$sec_B.f_F"
---f_G "$sec_C.f_G"
---f_H "$sec_E.f_H"
---f_I "$sec_B.f_I"
---f_K "$sec_E.f_K"
---f_L "$sec_B.f_L"
---f_M "$sec_B.f_M"
---f_N "$sec_C.f_N"
---f_P "$sec_B.f_P"
---f_Q "$sec_C.f_Q"
---f_R "$sec_E.f_R"
---f_S "$sec_C.f_S"
---f_T "$sec_C.f_T"
---f_V "$sec_B.f_V"
---f_W "$sec_B.f_W"
---f_Y "$sec_C.f_Y"
---t_A "$sec_B.t_A"
---t_C "$sec_C.t_C"
---t_D "$sec_D.t_D"
---t_E "$sec_D.t_E"
---t_F "$sec_B.t_F"
---t_G "$sec_C.t_G"
---t_H "$sec_E.t_H"
---t_I "$sec_B.t_I"
---t_K "$sec_E.t_K"
---t_L "$sec_B.t_L"
---t_M "$sec_B.t_M"
---t_N "$sec_C.t_N"
---t_P "$sec_B.t_P"
---t_Q "$sec_C.t_Q"
---t_R "$sec_E.t_R"
---t_S "$sec_C.t_S"
---t_T "$sec_C.t_T"
---t_V "$sec_B.t_V"
---t_W "$sec_B.t_W"
---t_Y "$sec_C.t_Y"
---output $output
-]]></command>
-    <inputs>
-        <param label="Paste in exact sequence to be plotted" name="sequence" type="text" />
-        <param label="Label Start Number" name="seqRange" type="integer" value="1" help="starting residue number to use for labels" />
-        <param label="Amino Acid Text Size" name="t_size" type="integer" value="32" help="Alters the Text Size. Default is 32" />
-        <param label="Rotation" name="rotation" type="integer" value="90" help="Rotates the helical wheel. Default is 90" />
-        <param label="Label Numbering Text" name="numbering" type="boolean" help="number schema subscripts" truevalue="--numbering" falsevalue=""/>
-            <section name="sec_B" title="nonpolar ; hydrophobic">
-                <param name="f_A" type="color" label="Color for A" value="#ffcc33">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_A" type="color" label="Text color for A" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_F" type="color" label="Color for F" value="#ffcc33">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_F" type="color" label="Text color for F" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_I" type="color" label="Color for I" value="#ffcc33">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_I" type="color" label="Text color for I" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_L" type="color" label="Color for L" value="#ffcc33">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_L" type="color" label="Text color for L" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_M" type="color" label="Color for M" value="#ffcc33">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_M" type="color" label="Text color for M" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_P" type="color" label="Color for P" value="#ffcc33">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_P" type="color" label="Text color for P" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_V" type="color" label="Color for V" value="#ffcc33">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_V" type="color" label="Text color for V" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_W" type="color" label="Color for W" value="#ffcc33">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_W" type="color" label="Text color for W" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-            </section>
-            <section name="sec_C" title="polar ; uncharged">
-                <param name="f_C" type="color" label="Color for C" value="#b5b5b5">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_C" type="color" label="Text color for C" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_G" type="color" label="Color for G" value="#b5b5b5">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_G" type="color" label="Text color for G" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_N" type="color" label="Color for N" value="#b5b5b5">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_N" type="color" label="Text color for N" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_Q" type="color" label="Color for Q" value="#b5b5b5">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_Q" type="color" label="Text color for Q" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_S" type="color" label="Color for S" value="#b5b5b5">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_S" type="color" label="Text color for S" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_T" type="color" label="Color for T" value="#b5b5b5">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_T" type="color" label="Text color for T" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_Y" type="color" label="Color for Y" value="#b5b5b5">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_Y" type="color" label="Text color for Y" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-            </section>
-            <section name="sec_D" title="polar ; acidic (negatively charged)">
-                <param name="f_D" type="color" label="Color for D" value="#db270f">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_D" type="color" label="Text color for D" value="#FFFFFF">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_E" type="color" label="Color for E" value="#db270f">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_E" type="color" label="Text color for E" value="#FFFFFF">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-            </section>
-            <section name="sec_E" title="polar ; basic (positive charge)">
-                <param name="f_H" type="color" label="Color for H" value="#12d5fc">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_H" type="color" label="Text color for H" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_K" type="color" label="Color for K" value="#12d5fc">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_K" type="color" label="Text color for K" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="f_R" type="color" label="Color for R" value="#12d5fc">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-                <param name="t_R" type="color" label="Text color for R" value="#000000">
-                    <sanitizer>
-                        <valid initial="string.ascii_letters,string.digits">
-                            <add value="#" />
-                        </valid>
-                    </sanitizer>
-                </param>
-            </section>
-    </inputs>
-    <outputs>
-        <data format="png" name="output" label="_helicalWheel.png" />
-    </outputs>
-    <help><![CDATA[
-**What it does**
-INPUT : Peptide Sequence
-PARAMETERS :
-primary parameters :
-> Paste in exact sequence to be plotted - Input Sequence of desired helical wheel plot
-> Label Start Number - Numerical value that represents the beginning of the sequence (default 1)
-> Amino Acid Text Size - Size of text for helical wheel (default 32)
-> Rotation - Degrees to rotate helical wheel (defaul 90)
-color parameters :
-> Background Color and Text Color Selections
-METHOD : Using the core features from the modlAMP python module, a helical wheel projection is constructed.
-OUTPUT : _helicalWheel.png
-NOTES : Peptide lengths longer than 36 residues will not properly graph.
-]]></help>
-    <citations>
-        <citation type="doi">10.1093/bioinformatics/btx285</citation>
-        <citation type="bibtex">
-        @unpublished{galaxyTools,
-            author = {C. Ross},
-            title = {CPT Galaxy Tools},
-            year = {2020-},
-            note = {https://github.com/tamu-cpt/galaxy-tools/}
-        }
-        </citation>
-    </citations>
-</tool>
--- a/cpt_helical_wheel/macros.xml	Tue Jul 05 05:21:34 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,56 +0,0 @@
-<?xml version="1.0"?>
-<macros>
-	<xml name="requirements">
-		<requirements>
-      <requirement type="package" version="3.6">python</requirement>
-      <requirement type="package" version="1.77">biopython</requirement>
-      <requirement type="package" version="1.1.3">cpt_gffparser</requirement>
-      <yield/>
-		</requirements>
-	</xml>
-	<xml name="genome_selector">
-		<conditional name="reference_genome">
-			<param name="reference_genome_source" type="select" label="Reference Genome">
-				<option value="history" selected="True">From History</option>
-				<option value="cached">Locally Cached</option>
-			</param>
-			<when value="cached">
-				<param name="fasta_indexes" type="select" label="Source FASTA Sequence">
-					<options from_data_table="all_fasta"/>
-				</param>
-			</when>
-			<when value="history">
-				<param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
-			</when>
-		</conditional>
-	</xml>
-	<xml name="gff3_input">
-		<param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
-	</xml>
-	<xml name="input/gff3+fasta">
-		<expand macro="gff3_input" />
-		<expand macro="genome_selector" />
-	</xml>
-	<token name="@INPUT_GFF@">
-	"$gff3_data"
-	</token>
-	<token name="@INPUT_FASTA@">
-#if str($reference_genome.reference_genome_source) == 'cached':
-		"${reference_genome.fasta_indexes.fields.path}"
-#else if str($reference_genome.reference_genome_source) == 'history':
-		genomeref.fa
-#end if
-	</token>
-	<token name="@GENOME_SELECTOR_PRE@">
-#if $reference_genome.reference_genome_source == 'history':
-		ln -s $reference_genome.genome_fasta genomeref.fa;
-#end if
-	</token>
-	<token name="@GENOME_SELECTOR@">
-#if str($reference_genome.reference_genome_source) == 'cached':
-		"${reference_genome.fasta_indexes.fields.path}"
-#else if str($reference_genome.reference_genome_source) == 'history':
-		genomeref.fa
-#end if
-	</token>
-</macros>
--- a/cpt_helical_wheel/plotWheels/core.py	Tue Jul 05 05:21:34 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1223 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-.. currentmodule:: modlamp.core
-
-.. moduleauthor:: modlab Alex Mueller ETH Zurich <alex.mueller@pharma.ethz.ch>
-
-Core helper functions and classes for other modules. The two main classes are:
-
-=============================    =======================================================================================
-Class                            Characteristics
-=============================    =======================================================================================
-:py:class:`BaseSequence`         Base class inheriting to all sequence classes in the module :py:mod:`modlamp.sequences`
-:py:class:`BaseDescriptor`       Base class inheriting to the two descriptor classes in :py:mod:`modlamp.descriptors`
-=============================    =======================================================================================
-"""
-
-import os
-import random
-import re
-
-import numpy as np
-import pandas as pd
-import collections
-import operator
-from scipy.spatial import distance
-from sklearn.preprocessing import MinMaxScaler, StandardScaler
-from sklearn.utils import shuffle
-
-__author__ = "Alex Müller, Gisela Gabernet"
-__docformat__ = "restructuredtext en"
-
-
-class BaseSequence(object):
-    """Base class for sequence classes in the module :mod:`modlamp.sequences`.
-    It contains amino acid probabilities for different sequence generation classes.
-
-    The following amino acid probabilities are used: (extracted from the
-    `APD3 <http://aps.unmc.edu/AP/statistic/statistic.php>`_, March 17, 2016)
-
-    ===  ====    ======   =========    ==========
-    AA   rand    AMP      AMPnoCM      randnoCM
-    ===  ====    ======   =========    ==========
-    A    0.05    0.0766   0.0812275    0.05555555
-    C    0.05    0.071    0.0          0.0
-    D    0.05    0.026    0.0306275    0.05555555
-    E    0.05    0.0264   0.0310275    0.05555555
-    F    0.05    0.0405   0.0451275    0.05555555
-    G    0.05    0.1172   0.1218275    0.05555555
-    H    0.05    0.021    0.0256275    0.05555555
-    I    0.05    0.061    0.0656275    0.05555555
-    K    0.05    0.0958   0.1004275    0.05555555
-    L    0.05    0.0838   0.0884275    0.05555555
-    M    0.05    0.0123   0.0          0.0
-    N    0.05    0.0386   0.0432275    0.05555555
-    P    0.05    0.0463   0.0509275    0.05555555
-    Q    0.05    0.0251   0.0297275    0.05555555
-    R    0.05    0.0545   0.0591275    0.05555555
-    S    0.05    0.0613   0.0659275    0.05555555
-    T    0.05    0.0455   0.0501275    0.05555555
-    V    0.05    0.0572   0.0618275    0.05555555
-    W    0.05    0.0155   0.0201275    0.05555555
-    Y    0.05    0.0244   0.0290275    0.05555555
-    ===  ====    ======   =========    ==========
-
-    """
-
-    def __init__(self, seqnum, lenmin=7, lenmax=28):
-        """
-        :param seqnum: number of sequences to generate
-        :param lenmin: minimal length of the generated sequences
-        :param lenmax: maximal length of the generated sequences
-        :return: attributes :py:attr:`seqnum`, :py:attr:`lenmin` and :py:attr:`lenmax`.
-        :Example:
-
-        >>> b = BaseSequence(10, 7, 28)
-        >>> b.seqnum
-        10
-        >>> b.lenmin
-        7
-        >>> b.lenmax
-        28
-        """
-        self.sequences = list()
-        self.names = list()
-        self.lenmin = int(lenmin)
-        self.lenmax = int(lenmax)
-        self.seqnum = int(seqnum)
-
-        # AA classes:
-        self.AA_hyd = ['G', 'A', 'L', 'I', 'V']
-        self.AA_basic = ['K', 'R']
-        self.AA_acidic = ['D', 'E']
-        self.AA_aroma = ['W', 'Y', 'F']
-        self.AA_polar = ['S', 'T', 'Q', 'N']
-        # AA labels:
-        self.AAs = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
-        # AA probability from the APD3 database:
-        self.prob_AMP = [0.0766, 0.071, 0.026, 0.0264, 0.0405, 0.1172, 0.021, 0.061, 0.0958, 0.0838, 0.0123, 0.0386,
-                         0.0463, 0.0251, 0.0545, 0.0613, 0.0455, 0.0572, 0.0155, 0.0244]
-        # AA probability from the APD2 database without Cys and Met (synthesis reasons)
-        self.prob_AMPnoCM = [0.081228, 0., 0.030627, 0.031027, 0.045128, 0.121828, 0.025627, 0.065628, 0.100428,
-                             0.088428, 0., 0.043228, 0.050928, 0.029728, 0.059128, 0.065927, 0.050128, 0.061828,
-                             0.020128, 0.029028]
-        # equal AA probabilities:
-        self.prob = [0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05,
-                     0.05, 0.05, 0.05, 0.05]
-        # equal AA probabilities but 0 for Cys and Met:
-        self.prob_randnoCM = [0.05555555555, 0.0, 0.05555555555, 0.05555555555, 0.05555555555, 0.05555555555,
-                              0.05555555555, 0.05555555555, 0.05555555555, 0.05555555555, 0.0, 0.05555555555,
-                              0.05555555555, 0.05555555555, 0.05555555555, 0.05555555555, 0.05555555555, 0.05555555555,
-                              0.05555555555, 0.05555555555]
-
-        # AA probability from the linear CancerPPD peptides:
-        self.prob_ACP = [0.14526966, 0., 0.00690031, 0.00780824, 0.06991102, 0.04957327, 0.01725077, 0.05647358,
-                         0.27637552, 0.17759216, 0.00998729, 0.00798983, 0.01307427, 0.00381333, 0.02941711,
-                         0.02651171, 0.0154349, 0.04013074, 0.0406755, 0.00581079]
-
-        # AA probabilities for perfect amphipathic helix of different arc sizes
-        self.prob_amphihel = [[0.04545455, 0., 0.04545454, 0.04545455, 0., 0.04545455, 0.04545455, 0., 0.25, 0., 0.,
-                               0.04545454, 0.04545455, 0.04545454, 0.25, 0.04545454, 0.04545454, 0., 0., 0.04545454],
-                              [0., 0., 0., 0., 0.16666667, 0., 0., 0.16666667, 0., 0.16666667, 0., 0., 0., 0., 0., 0.,
-                               0., 0.16666667, 0.16666667, (1. - 0.16666667 * 5)]]
-
-        # helical ACP AA probabilities, depending on the position of the AA in the helix.
-        self.prob_ACPhel = np.array([[0.0483871, 0., 0., 0.0483871, 0.01612903, 0.12903226, 0.03225807, 0.09677419,
-                                      0.19354839, 0.5, 0.0483871, 0.11290323, 0.1, 0.18518519, 0.07843137, 0.12,
-                                      0.17073172, 0.16666667],
-                                     [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.01612903, 0., 0., 0., 0., 0.,
-                                      0.02439024,
-                                      0.19444444],
-                                     [0., 0.01612903, 0., 0.27419355, 0.01612903, 0., 0., 0.01612903, 0., 0., 0., 0.,
-                                      0.,
-                                      0., 0., 0., 0., 0.],
-                                     [0., 0., 0., 0., 0., 0., 0., 0.06451613, 0., 0.01612903, 0.0483871, 0.01612903, 0.,
-                                      0.01851852, 0., 0., 0., 0.],
-                                     [0.16129032, 0.0483871, 0.30645161, 0., 0.0483871, 0., 0., 0.01612903, 0.,
-                                      0.01612903,
-                                      0., 0.09677419, 0.06666667, 0.01851852, 0., 0.02, 0.14634146, 0.],
-                                     [0.64516129, 0., 0.17741936, 0.14516129, 0., 0.01612903, 0.25806452, 0.11290323,
-                                      0.06451613, 0.08064516, 0.22580645, 0.03225807, 0.06666667, 0.2037037, 0.1372549,
-                                      0.1, 0., 0.05555556],
-                                     [0., 0., 0., 0.01612903, 0., 0., 0.01612903, 0., 0.03225807, 0., 0., 0.20967742,
-                                      0.,
-                                      0., 0., 0.16, 0., 0.],
-                                     [0.0483871, 0.11290323, 0.01612903, 0.08064516, 0.33870968, 0.27419355, 0.,
-                                      0.0483871, 0.14516129, 0.06451613, 0.03225807, 0.06451613, 0.18333333, 0., 0.,
-                                      0.1, 0.26829268, 0.],
-                                     [0., 0.03225807, 0.01612903, 0.12903226, 0.12903226, 0., 0.38709677, 0.33870968,
-                                      0.0483871, 0.03225807, 0.41935484, 0.08064516, 0., 0.03703704, 0.29411765,
-                                      0.04, 0.02439024, 0.02777778],
-                                     [0.0483871, 0.70967742, 0.12903226, 0.0483871, 0.09677419, 0.32258064, 0.20967742,
-                                      0.06451613, 0.11290323, 0.06451613, 0.03225807, 0.03225807, 0.28333333,
-                                      0.24074074,
-                                      0.03921569, 0.28, 0.07317073, 0.22222222],
-                                     [0., 0.01612903, 0.01612903, 0.0483871, 0.01612903, 0.03225807, 0., 0., 0., 0.,
-                                      0., 0., 0.03333333, 0., 0.01960784, 0.02, 0., 0.],
-                                     [0., 0.01612903, 0., 0., 0., 0., 0., 0., 0.01612903, 0., 0.03225807, 0., 0., 0.,
-                                      0.01960784, 0.02, 0., 0.],
-                                     [0., 0., 0.14516129, 0.01612903, 0.03225807, 0.01612903, 0., 0., 0., 0.,
-                                      0.01612903, 0., 0., 0.12962963, 0.17647059, 0., 0., 0.],
-                                     [0., 0., 0.01612903, 0.01612903, 0., 0., 0.01612903, 0., 0.01612903, 0., 0.,
-                                      0.01612903, 0., 0.01851852, 0., 0., 0., 0.],
-                                     [0., 0.01612903, 0.01612903, 0., 0.01612903, 0., 0.01612903, 0., 0.01612903,
-                                      0.01612903, 0.01612903, 0.01612903, 0., 0.01851852, 0.01960784, 0., 0.04878049,
-                                      0.],
-                                     [0.01612903, 0., 0.01612903, 0.12903226, 0.03225807, 0.03225807, 0.0483871,
-                                      0.17741936, 0., 0.03225807, 0.09677419, 0.0483871, 0.01666667, 0., 0.15686274,
-                                      0.1, 0., 0.05555556],
-                                     [0.01612903, 0.01612903, 0., 0.01612903, 0.0483871, 0.01612903, 0., 0.01612903, 0.,
-                                      0.01612903, 0.01612903, 0.11290323, 0., 0.01851852, 0.03921569, 0.02, 0.,
-                                      0.05555556],
-                                     [0.01612903, 0.01612903, 0.01612903, 0.01612903, 0.20967742, 0.16129032,
-                                      0.01612903,
-                                      0.0483871, 0.33870968, 0.16129032, 0., 0.14516129, 0.25, 0.11111111, 0.01960784,
-                                      0.02, 0.21951219, 0.22222222],
-                                     [0., 0., 0.12903226, 0.01612903, 0., 0., 0., 0., 0.01612903, 0., 0., 0., 0., 0.,
-                                      0.,
-                                      0., 0.02439024, 0.],
-                                     [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.01612903, 0., 0., 0., 0., 0., 0.]])
-
-    def save_fasta(self, filename, names=False):
-        """Method to save generated sequences in a ``.FASTA`` formatted file.
-
-        :param filename: output filename in which the sequences from :py:attr:`sequences` are safed in fasta format.
-        :param names: {bool} whether sequence names from :py:attr:`names` should be saved as sequence identifiers
-        :return: a FASTA formatted file containing the generated sequences
-        :Example:
-
-        >>> b = BaseSequence(2)
-        >>> b.sequences = ['KLLSLSLALDLLS', 'KLPERTVVNSSDF']
-        >>> b.names = ['Sequence1', 'Sequence2']
-        >>> b.save_fasta('/location/of/fasta/file.fasta', names=True)
-        """
-        if names:
-            save_fasta(filename, self.sequences, self.names)
-        else:
-            save_fasta(filename, self.sequences)
-
-    def mutate_AA(self, nr, prob):
-        """Method to mutate with **prob** probability a **nr** of positions per sequence randomly.
-
-        :param nr: number of mutations to perform per sequence
-        :param prob: probability of mutating a sequence
-        :return: mutated sequences in the attribute :py:attr:`sequences`.
-        :Example:
-
-        >>> b = BaseSequence(1)
-        >>> b.sequences = ['IAKAGRAIIK']
-        >>> b.mutate_AA(3, 1.)
-        >>> b.sequences
-        ['NAKAGRAWIK']
-        """
-        for s in range(len(self.sequences)):
-            # mutate: yes or no? prob = mutation probability
-            mutate = np.random.choice([1, 0], 1, p=[prob, 1 - float(prob)])
-            if mutate == 1:
-                seq = list(self.sequences[s])
-                cnt = 0
-                while cnt < nr:  # mutate "nr" AA
-                    seq[random.choice(range(len(seq)))] = random.choice(self.AAs)
-                    cnt += 1
-                self.sequences[s] = ''.join(seq)
-
-    def filter_duplicates(self):
-        """Method to filter duplicates in the sequences from the class attribute :py:attr:`sequences`
-
-        :return: filtered sequences list in the attribute :py:attr:`sequences` and corresponding names.
-        :Example:
-
-        >>> b = BaseSequence(4)
-        >>> b.sequences = ['KLLKLLKKLLKLLK', 'KLLKLLKKLLKLLK', 'KLAKLAKKLAKLAK', 'KLAKLAKKLAKLAK']
-        >>> b.filter_duplicates()
-        >>> b.sequences
-        ['KLLKLLKKLLKLLK', 'KLAKLAKKLAKLAK']
-
-        .. versionadded:: v2.2.5
-        """
-        if not self.names:
-            self.names = ['Seq_' + str(i) for i in range(len(self.sequences))]
-        df = pd.DataFrame(list(zip(self.sequences, self.names)), columns=['Sequences', 'Names'])
-        df = df.drop_duplicates('Sequences', 'first')  # keep first occurrence of duplicate
-        self.sequences = df['Sequences'].get_values().tolist()
-        self.names = df['Names'].get_values().tolist()
-
-    def keep_natural_aa(self):
-        """Method to filter out sequences that do not contain natural amino acids. If the sequence contains a character
-        that is not in ``['A','C','D,'E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y']``.
-
-        :return: filtered sequence list in the attribute :py:attr:`sequences`. The other attributes are also filtered
-            accordingly (if present).
-        :Example:
-
-        >>> b = BaseSequence(2)
-        >>> b.sequences = ['BBBsdflUasUJfBJ', 'GLFDIVKKVVGALGSL']
-        >>> b.keep_natural_aa()
-        >>> b.sequences
-        ['GLFDIVKKVVGALGSL']
-        """
-        natural_aa = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W',
-                      'Y']
-
-        seqs = []
-        names = []
-
-        for i, s in enumerate(self.sequences):
-            seq = list(s.upper())
-            if all(c in natural_aa for c in seq):
-                seqs.append(s.upper())
-                if hasattr(self, 'names') and self.names:
-                    names.append(self.names[i])
-
-        self.sequences = seqs
-        self.names = names
-
-    def filter_aa(self, amino_acids):
-        """Method to filter out corresponding names and descriptor values of sequences with given amino acids in the
-        argument list *aminoacids*.
-
-        :param amino_acids: {list} amino acids to be filtered
-        :return: filtered list of sequences names in the corresponding attributes.
-        :Example:
-
-        >>> b = BaseSequence(3)
-        >>> b.sequences = ['AAALLLIIIKKK', 'CCEERRT', 'LLVVIIFFFQQ']
-        >>> b.filter_aa(['C'])
-        >>> b.sequences
-        ['AAALLLIIIKKK', 'LLVVIIFFFQQ']
-        """
-
-        pattern = re.compile('|'.join(amino_acids))
-        seqs = []
-        names = []
-
-        for i, s in enumerate(self.sequences):
-            if not pattern.search(s):
-                seqs.append(s)
-                if hasattr(self, 'names') and self.names:
-                    names.append(self.names[i])
-
-        self.sequences = seqs
-        self.names = names
-
-    def clean(self):
-        """Method to clean / clear / empty the attributes :py:attr:`sequences` and :py:attr:`names`.
-
-        :return: freshly initialized, empty class attributes.
-        """
-        self.__init__(self.seqnum, self.lenmin, self.lenmax)
-
-
-class BaseDescriptor(object):
-    """
-    Base class inheriting to both peptide descriptor classes :py:class:`modlamp.descriptors.GlobalDescriptor` and
-    :py:class:`modlamp.descriptors.PeptideDescriptor`.
-    """
-
-    def __init__(self, seqs):
-        """
-        :param seqs: a ``.FASTA`` file with sequences, a list / array of sequences or a single sequence as string to
-            calculate the descriptor values for.
-        :return: initialized attributes :py:attr:`sequences` and :py:attr:`names`.
-        :Example:
-
-        >>> AMP = BaseDescriptor('KLLKLLKKLLKLLK','pepCATS')
-        >>> AMP.sequences
-        ['KLLKLLKKLLKLLK']
-        >>> seqs = BaseDescriptor('/Path/to/file.fasta', 'eisenberg')  # load sequences from .fasta file
-        >>> seqs.sequences
-        ['AFDGHLKI','KKLQRSDLLRTK','KKLASCNNIPPR'...]
-        """
-        if type(seqs) == list and seqs[0].isupper():
-            self.sequences = [s.strip() for s in seqs]
-            self.names = []
-        elif type(seqs) == np.ndarray and seqs[0].isupper():
-            self.sequences = [s.strip() for s in seqs.tolist()]
-            self.names = []
-        elif type(seqs) == str and seqs.isupper():
-            self.sequences = [seqs.strip()]
-            self.names = []
-        elif os.path.isfile(seqs):
-            if seqs.endswith('.fasta'):  # read .fasta file
-                self.sequences, self.names = read_fasta(seqs)
-            elif seqs.endswith('.csv'):  # read .csv file with sequences every line
-                with open(seqs) as f:
-                    self.sequences = list()
-                    cntr = 0
-                    self.names = []
-                    for line in f:
-                        if line.isupper():
-                            self.sequences.append(line.strip())
-                            self.names.append('seq_' + str(cntr))
-                            cntr += 1
-            else:
-                print("Sorry, currently only .fasta or .csv files can be read!")
-        else:
-            print("%s does not exist, is not a valid list of AA sequences or is not a valid sequence string" % seqs)
-
-        self.descriptor = np.array([[]])
-        self.target = np.array([], dtype='int')
-        self.scaler = None
-        self.featurenames = []
-
-    def read_fasta(self, filename):
-        """Method for loading sequences from a ``.FASTA`` formatted file into the attributes :py:attr:`sequences` and
-        :py:attr:`names`.
-
-        :param filename: {str} ``.FASTA`` file with sequences and headers to read
-        :return: {list} sequences in the attribute :py:attr:`sequences` with corresponding sequence names in
-            :py:attr:`names`.
-        """
-        self.sequences, self.names = read_fasta(filename)
-
-    def save_fasta(self, filename, names=False):
-        """Method for saving sequences from :py:attr:`sequences` to a ``.FASTA`` formatted file.
-
-        :param filename: {str} filename of the output ``.FASTA`` file
-        :param names: {bool} whether sequence names from self.names should be saved as sequence identifiers
-        :return: a FASTA formatted file containing the generated sequences
-        """
-        if names:
-            save_fasta(filename, self.sequences, self.names)
-        else:
-            save_fasta(filename, self.sequences)
-
-    def count_aa(self, scale='relative', average=False, append=False):
-        """Method for producing the amino acid distribution for the given sequences as a descriptor
-
-        :param scale: {'absolute' or 'relative'} defines whether counts or frequencies are given for each AA
-        :param average: {boolean} whether the averaged amino acid counts for all sequences should be returned
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: the amino acid distributions for every sequence individually in the attribute :py:attr:`descriptor`
-        :Example:
-
-        >>> AMP = PeptideDescriptor('ACDEFGHIKLMNPQRSTVWY') # aa_count() does not depend on the descriptor scale
-        >>> AMP.count_aa()
-        >>> AMP.descriptor
-        array([[ 0.05,  0.05,  0.05,  0.05,  0.05,  0.05,  0.05,  0.05,  0.05, ... ]])
-        >>> AMP.descriptor.shape
-        (1, 20)
-
-        .. seealso:: :py:func:`modlamp.core.count_aa()`
-        """
-        desc = list()
-        for seq in self.sequences:
-            od = count_aas(seq, scale)
-            desc.append(list(od.values()))
-
-        desc = np.array(desc)
-        self.featurenames = list(od.keys())
-
-        if append:
-            self.descriptor = np.hstack((self.descriptor, desc))
-        elif average:
-            self.descriptor = np.mean(desc, axis=0)
-        else:
-            self.descriptor = desc
-
-    def count_ngrams(self, n):
-        """Method for producing n-grams of all sequences in self.sequences
-
-        :param n: {int or list of ints} defines whether counts or frequencies are given for each AA
-        :return: {dict} dictionary with n-grams as keys and their counts in the sequence as values in :py:attr:`descriptor`
-        :Example:
-
-        >>> D = PeptideDescriptor('GLLDFLSLAALSLDKLVKKGALS')
-        >>> D.count_ngrams([2, 3])
-        >>> D.descriptor
-        {'LS': 3, 'LD': 2, 'LSL': 2, 'AL': 2, ..., 'LVK': 1}
-
-        .. seealso:: :py:func:`modlamp.core.count_ngrams()`
-        """
-        ngrams = dict()
-        for seq in self.sequences:
-            d = count_ngrams(seq, n)
-            for k, v in d.items():
-                if k in ngrams.keys():
-                    ngrams[k] += v
-                else:
-                    ngrams[k] = v
-        self.descriptor = ngrams
-
-    def feature_scaling(self, stype='standard', fit=True):
-        """Method for feature scaling of the calculated descriptor matrix.
-
-        :param stype: {'standard' or 'minmax'} type of scaling to be used
-        :param fit: {boolean} defines whether the used scaler is first fitting on the data (True) or
-            whether the already fitted scaler in :py:attr:`scaler` should be used to transform (False).
-        :return: scaled descriptor values in :py:attr:`descriptor`
-        :Example:
-
-        >>> D.descriptor
-        array([[0.155],[0.34],[0.16235294],[-0.08842105],[0.116]])
-        >>> D.feature_scaling(type='minmax',fit=True)
-        array([[0.56818182],[1.],[0.5853447],[0.],[0.47714988]])
-        """
-        if stype in ['standard', 'minmax']:
-            if stype == 'standard':
-                self.scaler = StandardScaler()
-            elif stype == 'minmax':
-                self.scaler = MinMaxScaler()
-
-            if fit:
-                self.descriptor = self.scaler.fit_transform(self.descriptor)
-            else:
-                self.descriptor = self.scaler.transform(self.descriptor)
-        else:
-            print("Unknown scaler type!\nAvailable: 'standard', 'minmax'")
-
-    def feature_shuffle(self):
-        """Method for shuffling feature columns randomly.
-
-        :return: descriptor matrix with shuffled feature columns in :py:attr:`descriptor`
-        :Example:
-
-        >>> D.descriptor
-        array([[0.80685625,167.05234375,39.56818125,-0.26338667,155.16888667,33.48778]])
-        >>> D.feature_shuffle()
-        array([[155.16888667,-0.26338667,167.05234375,0.80685625,39.56818125,33.48778]])
-        """
-        self.descriptor = shuffle(self.descriptor.transpose()).transpose()
-
-    def sequence_order_shuffle(self):
-        """Method for shuffling sequence order in the attribute :py:attr:`sequences`.
-
-        :return: sequences in :py:attr:`sequences` with shuffled order in the list.
-        :Example:
-
-        >>> D.sequences
-        ['LILRALKGAARALKVA','VKIAKIALKIIKGLG','VGVRLIKGIGRVARGAI','LRGLRGVIRGGKAIVRVGK','GGKLVRLIARIGKGV']
-        >>> D.sequence_order_shuffle()
-        >>> D.sequences
-        ['VGVRLIKGIGRVARGAI','LILRALKGAARALKVA','LRGLRGVIRGGKAIVRVGK','GGKLVRLIARIGKGV','VKIAKIALKIIKGLG']
-        """
-        self.sequences = shuffle(self.sequences)
-
-    def random_selection(self, num):
-        """Method to randomly select a specified number of sequences (with names and descriptors if present) out of a given
-        descriptor instance.
-
-        :param num: {int} number of entries to be randomly selected
-        :return: updated instance
-        :Example:
-
-        >>> h = Helices(7, 28, 100)
-        >>> h.generate_helices()
-        >>> desc = PeptideDescriptor(h.sequences, 'eisenberg')
-        >>> desc.calculate_moment()
-        >>> len(desc.sequences)
-        100
-        >>> len(desc.descriptor)
-        100
-        >>> desc.random_selection(10)
-        >>> len(desc.descriptor)
-        10
-        >>> len(desc.descriptor)
-        10
-
-        .. versionadded:: v2.2.3
-        """
-
-        sel = np.random.choice(len(self.sequences), size=num, replace=False)
-        self.sequences = np.array(self.sequences)[sel].tolist()
-        if hasattr(self, 'descriptor') and self.descriptor.size:
-            self.descriptor = self.descriptor[sel]
-        if hasattr(self, 'names') and self.names:
-            self.names = np.array(self.names)[sel].tolist()
-        if hasattr(self, 'target') and self.target.size:
-            self.target = self.target[sel]
-
-    def minmax_selection(self, iterations, distmetric='euclidean', seed=0):
-        """Method to select a specified number of sequences according to the minmax algorithm.
-
-        :param iterations: {int} Number of sequences to retrieve.
-        :param distmetric: Distance metric to calculate the distances between the sequences in descriptor space.
-            Choose from 'euclidean' or 'minkowsky'.
-        :param seed: {int} Set a random seed for numpy to pick the first sequence.
-        :return: updated instance
-
-        .. seealso:: **SciPy** http://docs.scipy.org/doc/scipy/reference/spatial.distance.html
-        """
-
-        # Storing M into pool, where selections get deleted
-        pool = self.descriptor  # Store pool where selections get deleted
-        minmaxidx = list()  # Store original indices of selections to return
-
-        # Randomly selecting first peptide into the sele
-        np.random.seed(seed)
-        idx = int(np.random.random_integers(0, len(pool), 1))
-        sele = pool[idx:idx + 1, :]
-        minmaxidx.append(int(*np.where(np.all(self.descriptor == pool[idx:idx + 1, :], axis=1))))
-
-        # Deleting peptide in selection from pool
-        pool = np.delete(pool, idx, axis=0)
-
-        for i in range(iterations - 1):
-            # Calculating distance from sele to the rest of the peptides
-            dist = distance.cdist(pool, sele, distmetric)
-
-            # Choosing maximal distances for every sele instance
-            maxidx = np.argmax(dist, axis=0)
-            maxcols = np.max(dist, axis=0)
-
-            # Choosing minimal distance among the maximal distances
-            minmax = np.argmin(maxcols)
-            maxidx = int(maxidx[minmax])
-
-            # Adding it to selection and removing from pool
-            sele = np.append(sele, pool[maxidx:maxidx + 1, :], axis=0)
-            pool = np.delete(pool, maxidx, axis=0)
-            minmaxidx.append(int(*np.where(np.all(self.descriptor == pool[maxidx:maxidx + 1, :], axis=1))))
-
-        self.sequences = np.array(self.sequences)[minmaxidx].tolist()
-        if hasattr(self, 'descriptor') and self.descriptor.size:
-            self.descriptor = self.descriptor[minmaxidx]
-        if hasattr(self, 'names') and self.names:
-            self.names = np.array(self.names)[minmaxidx].tolist()
-        if hasattr(self, 'target') and self.target.size:
-            self.target = self.descriptor[minmaxidx]
-
-    def filter_sequences(self, sequences):
-        """Method to filter out entries for given sequences in *sequences* out of a descriptor instance. All
-        corresponding attribute values of these sequences (e.g. in :py:attr:`descriptor`, :py:attr:`name`) are deleted
-        as well. The method returns an updated descriptor instance.
-
-        :param sequences: {list} sequences to be filtered out of the whole instance, including corresponding data
-        :return: updated instance without filtered sequences
-        :Example:
-
-        >>> sequences = ['KLLKLLKKLLKLLK', 'ACDEFGHIK', 'GLFDIVKKVV', 'GLFDIVKKVVGALG', 'GLFDIVKKVVGALGSL']
-        >>> desc = PeptideDescriptor(sequences, 'pepcats')
-        >>> desc.calculate_crosscorr(7)
-        >>> len(desc.descriptor)
-        5
-        >>> desc.filter_sequences('KLLKLLKKLLKLLK')
-        >>> len(desc.descriptor)
-        4
-        >>> desc.sequences
-        ['ACDEFGHIK', 'GLFDIVKKVV', 'GLFDIVKKVVGALG', 'GLFDIVKKVVGALGSL']
-        """
-        indices = list()
-        if isinstance(sequences, str):  # check if sequences is only one sequence string and convert it to a list
-            sequences = [sequences]
-        for s in sequences:  # get indices of queried sequences
-            indices.append(self.sequences.index(s))
-
-        self.sequences = np.delete(np.array(self.sequences), indices, 0).tolist()
-        if hasattr(self, 'descriptor') and self.descriptor.size:
-            self.descriptor = np.delete(self.descriptor, indices, 0)
-        if hasattr(self, 'names') and self.names:
-            self.names = np.delete(np.array(self.names), indices, 0).tolist()
-        if hasattr(self, 'target') and self.target.size:
-            self.target = np.delete(self.target, indices, 0)
-
-    def filter_values(self, values, operator='=='):
-        """Method to filter the descriptor matrix in the attribute :py:attr:`descriptor` for a given list of values (same
-        size as the number of features in the descriptor matrix!) The operator option tells the method whether to
-        filter for values equal, lower, higher ect. to the given values in the *values* array.
-
-        :param values: {list} values to filter the attribute :py:attr:`descriptor` for
-        :param operator: {str} filter criterion, available the operators ``==``, ``<``, ``>``, ``<=``and ``>=``.
-        :return: descriptor matrix and updated sequences containing only entries with descriptor values given in
-            *values* in the corresponding attributes.
-        :Example:
-
-        >>> desc.descriptor  # desc = BaseDescriptor instance
-        array([[ 0.7666517 ],
-               [ 0.38373498]])
-        >>> desc.filter_values([0.5], '<')
-        >>> desc.descriptor
-        array([[ 0.38373498]])
-        """
-        dim = self.descriptor.shape[1]
-        for d in range(dim):  # for all the features in self.descriptor
-            if operator == '==':
-                indices = np.where(self.descriptor[:, d] == values[d])[0]
-            elif operator == '<':
-                indices = np.where(self.descriptor[:, d] < values[d])[0]
-            elif operator == '>':
-                indices = np.where(self.descriptor[:, d] > values[d])[0]
-            elif operator == '<=':
-                indices = np.where(self.descriptor[:, d] <= values[d])[0]
-            elif operator == '>=':
-                indices = np.where(self.descriptor[:, d] >= values[d])[0]
-            else:
-                raise KeyError('available operators: ``==``, ``<``, ``>``, ``<=``and ``>=``')
-
-            # filter descriptor matrix, sequence list and names list according to obtained indices
-            self.sequences = np.array(self.sequences)[indices].tolist()
-            if hasattr(self, 'descriptor') and self.descriptor.size:
-                self.descriptor = self.descriptor[indices]
-            if hasattr(self, 'names') and self.names:
-                self.names = np.array(self.names)[indices].tolist()
-            if hasattr(self, 'target') and self.target.size:
-                self.target = self.target[indices]
-
-    def filter_aa(self, amino_acids):
-        """Method to filter out corresponding names and descriptor values of sequences with given amino acids in the
-        argument list *aminoacids*.
-
-        :param amino_acids: list of amino acids to be filtered
-        :return: filtered list of sequences, descriptor values, target values and names in the corresponding attributes.
-        :Example:
-
-        >>> b = BaseSequence(3)
-        >>> b.sequences = ['AAALLLIIIKKK', 'CCEERRT', 'LLVVIIFFFQQ']
-        >>> b.filter_aa(['C'])
-        >>> b.sequences
-        ['AAALLLIIIKKK', 'LLVVIIFFFQQ']
-        """
-
-        pattern = re.compile('|'.join(amino_acids))
-        seqs = []
-        desc = []
-        names = []
-        target = []
-
-        for i, s in enumerate(self.sequences):
-            if not pattern.search(s):
-                seqs.append(s)
-                if hasattr(self, 'descriptor') and self.descriptor.size:
-                    desc.append(self.descriptor[i])
-                if hasattr(self, 'names') and self.names:
-                    names.append(self.names[i])
-                if hasattr(self, 'target') and self.target.size:
-                    target.append(self.target[i])
-
-        self.sequences = seqs
-        self.names = names
-        self.descriptor = np.array(desc)
-        self.target = np.array(target, dtype='int')
-
-    def filter_duplicates(self):
-        """Method to filter duplicates in the sequences from the class attribute :py:attr:`sequences`
-
-        :return: filtered sequences list in the attribute :py:attr:`sequences` and corresponding names.
-        :Example:
-
-        >>> b = BaseDescriptor(['KLLKLLKKLLKLLK', 'KLLKLLKKLLKLLK', 'KLAKLAKKLAKLAK', 'KLAKLAKKLAKLAK'])
-        >>> b.filter_duplicates()
-        >>> b.sequences
-        ['KLLKLLKKLLKLLK', 'KLAKLAKKLAKLAK']
-
-        .. versionadded:: v2.2.5
-        """
-        if not self.names:
-            self.names = ['Seq_' + str(i) for i in range(len(self.sequences))]
-        if not self.target:
-            self.target = [0] * len(self.sequences)
-        if not self.descriptor:
-            self.descriptor = np.zeros(len(self.sequences))
-        df = pd.DataFrame(np.array([self.sequences, self.names, self.descriptor, self.target]).T,
-                          columns=['Sequences', 'Names', 'Descriptor', 'Target'])
-        df = df.drop_duplicates('Sequences', 'first')  # keep first occurrence of duplicate
-        self.sequences = df['Sequences'].get_values().tolist()
-        self.names = df['Names'].get_values().tolist()
-        self.descriptor = df['Descriptor'].get_values()
-        self.target = df['Target'].get_values()
-
-    def keep_natural_aa(self):
-        """Method to filter out sequences that do not contain natural amino acids. If the sequence contains a character
-        that is not in ['A','C','D,'E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y'].
-
-        :return: filtered sequence list in the attribute :py:attr:`sequences`. The other attributes are also filtered
-            accordingly (if present).
-        :Example:
-
-        >>> b = BaseSequence(2)
-        >>> b.sequences = ['BBBsdflUasUJfBJ', 'GLFDIVKKVVGALGSL']
-        >>> b.keep_natural_aa()
-        >>> b.sequences
-        ['GLFDIVKKVVGALGSL']
-        """
-
-        natural_aa = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W',
-                      'Y']
-
-        seqs = []
-        desc = []
-        names = []
-        target = []
-
-        for i, s in enumerate(self.sequences):
-            seq = list(s.upper())
-            if all(c in natural_aa for c in seq):
-                seqs.append(s.upper())
-                if hasattr(self, 'descriptor') and self.descriptor.size:
-                    desc.append(self.descriptor[i])
-                if hasattr(self, 'names') and self.names:
-                    names.append(self.names[i])
-                if hasattr(self, 'target') and self.target.size:
-                    target.append(self.target[i])
-
-        self.sequences = seqs
-        self.names = names
-        self.descriptor = np.array(desc)
-        self.target = np.array(target, dtype='int')
-
-    def load_descriptordata(self, filename, delimiter=",", targets=False, skip_header=0):
-        """Method to load any data file with sequences and descriptor values and save it to a new insatnce of the
-        class :class:`modlamp.descriptors.PeptideDescriptor`.
-
-        .. note:: Headers are not considered. To skip initial lines in the file, use the *skip_header* option.
-
-        :param filename: {str} filename of the data file to be loaded
-        :param delimiter: {str} column delimiter
-        :param targets: {boolean} whether last column in the file contains a target class vector
-        :param skip_header: {int} number of initial lines to skip in the file
-        :return: loaded sequences, descriptor values and targets in the corresponding attributes.
-        """
-        data = np.genfromtxt(filename, delimiter=delimiter, skip_header=skip_header)
-        data = data[:, 1:]  # skip sequences as they are "nan" when read as float
-        seqs = np.genfromtxt(filename, delimiter=delimiter, dtype="str")
-        seqs = seqs[:, 0]
-        if targets:
-            self.target = np.array(data[:, -1], dtype='int')
-        self.sequences = seqs
-        self.descriptor = data
-
-    def save_descriptor(self, filename, delimiter=',', targets=None, header=None):
-        """Method to save the descriptor values to a .csv/.txt file
-
-        :param filename: filename of the output file
-        :param delimiter: column delimiter
-        :param targets: target class vector to be added to descriptor (same length as :py:attr:`sequences`)
-        :param header: {str} header to be written at the beginning of the file (if ``None``: feature names are taken)
-        :return: output file with peptide names and descriptor values
-        """
-        seqs = np.array(self.sequences, dtype='|S80')[:, np.newaxis]
-        ids = np.array(self.names, dtype='|S80')[:, np.newaxis]
-        if ids.shape == seqs.shape:
-            names = np.hstack((ids, seqs))
-        else:
-            names = seqs
-        if targets and len(targets) == len(self.sequences):
-            target = np.array(targets)[:, np.newaxis]
-            data = np.hstack((names, self.descriptor, target))
-        else:
-            data = np.hstack((names, self.descriptor))
-        if not header:
-            featurenames = [['Sequence']] + self.featurenames
-            header = ', '.join([f[0] for f in featurenames])
-        np.savetxt(filename, data, delimiter=delimiter, fmt='%s', header=header)
-
-
-def load_scale(scalename):
-    """Method to load scale values for a given amino acid scale
-
-    :param scalename: amino acid scale name, for available scales see the
-        :class:`modlamp.descriptors.PeptideDescriptor()` documentation.
-    :return: amino acid scale values in dictionary format.
-    """
-    # predefined amino acid scales dictionary
-    scales = {
-        'aasi': {'A': [1.89], 'C': [1.73], 'D': [3.13], 'E': [3.14], 'F': [1.53], 'G': [2.67], 'H': [3], 'I': [1.97],
-                 'K': [2.28], 'L': [1.74], 'M': [2.5], 'N': [2.33], 'P': [0.22], 'Q': [3.05], 'R': [1.91], 'S': [2.14],
-                 'T': [2.18], 'V': [2.37], 'W': [2], 'Y': [2.01]},
-        'abhprk': {'A': [0, 0, 0, 0, 0, 0], 'C': [0, 0, 0, 0, 0, 0], 'D': [1, 0, 0, 1, 0, 0], 'E': [1, 0, 0, 1, 0, 0],
-                   'F': [0, 0, 1, 0, 1, 0], 'G': [0, 0, 0, 0, 0, 0], 'H': [0, 0, 0, 1, 1, 0], 'I': [0, 0, 1, 0, 0, 0],
-                   'K': [0, 1, 0, 1, 0, 0], 'L': [0, 0, 1, 0, 0, 0], 'M': [0, 0, 1, 0, 0, 0], 'N': [0, 0, 0, 1, 0, 0],
-                   'P': [0, 0, 0, 0, 0, 1], 'Q': [0, 0, 0, 1, 0, 0], 'R': [0, 1, 0, 1, 0, 0], 'S': [0, 0, 0, 1, 0, 0],
-                   'T': [0, 0, 0, 1, 0, 0], 'V': [0, 0, 1, 0, 0, 0], 'W': [0, 0, 1, 0, 1, 0], 'Y': [0, 0, 0, 1, 1, 0]},
-        'argos': {'I': [0.77], 'F': [1.2], 'V': [0.14], 'L': [2.3], 'W': [0.07], 'M': [2.3], 'A': [0.64], 'G': [-0.48],
-                  'C': [0.25], 'Y': [-0.41], 'P': [-0.31], 'T': [-0.13], 'S': [-0.25], 'H': [-0.87], 'E': [-0.94],
-                  'N': [-0.89], 'Q': [-0.61], 'D': [-1], 'K': [-1], 'R': [-0.68]},
-        'bulkiness': {'A': [0.443], 'C': [0.551], 'D': [0.453], 'E': [0.557], 'F': [0.898], 'G': [0], 'H': [0.563],
-                      'I': [0.985], 'K': [0.674], 'L': [0.985], 'M': [0.703], 'N': [0.516], 'P': [0.768], 'Q': [0.605],
-                      'R': [0.596], 'S': [0.332], 'T': [0.677], 'V': [0.995], 'W': [1], 'Y': [0.801]},
-        'charge_phys': {'A': [0.], 'C': [-.1], 'D': [-1.], 'E': [-1.], 'F': [0.], 'G': [0.], 'H': [0.1],
-                        'I': [0.], 'K': [1.], 'L': [0.], 'M': [0.], 'N': [0.], 'P': [0.], 'Q': [0.],
-                        'R': [1.], 'S': [0.], 'T': [0.], 'V': [0.], 'W': [0.], 'Y': [0.]},
-        'charge_acid': {'A': [0.], 'C': [-.1], 'D': [-1.], 'E': [-1.], 'F': [0.], 'G': [0.], 'H': [1.],
-                        'I': [0.], 'K': [1.], 'L': [0.], 'M': [0.], 'N': [0.], 'P': [0.], 'Q': [0.],
-                        'R': [1.], 'S': [0.], 'T': [0.], 'V': [0.], 'W': [0.], 'Y': [0.]},
-        'cougar': {'A': [0.25, 0.62, 1.89], 'C': [0.208, 0.29, 1.73], 'D': [0.875, -0.9, 3.13],
-                   'E': [0.833, -0.74, 3.14], 'F': [0.042, 1.2, 1.53], 'G': [1, 0.48, 2.67], 'H': [0.083, -0.4, 3],
-                   'I': [0.667, 1.4, 1.97], 'K': [0.708, -1.5, 2.28], 'L': [0.292, 1.1, 1.74], 'M': [0, 0.64, 2.5],
-                   'N': [0.667, -0.78, 2.33], 'P': [0.875, 0.12, 0.22], 'Q': [0.792, -0.85, 3.05],
-                   'R': [0.958, -2.5, 1.91], 'S': [0.875, -0.18, 2.14], 'T': [0.583, -0.05, 2.18],
-                   'V': [0.375, 1.1, 2.37], 'W': [0.042, 0.81, 2], 'Y': [0.5, 0.26, 2.01]},
-        'eisenberg': {'I': [1.4], 'F': [1.2], 'V': [1.1], 'L': [1.1], 'W': [0.81], 'M': [0.64], 'A': [0.62],
-                      'G': [0.48], 'C': [0.29], 'Y': [0.26], 'P': [0.12], 'T': [-0.05], 'S': [-0.18], 'H': [-0.4],
-                      'E': [-0.74], 'N': [-0.78], 'Q': [-0.85], 'D': [-0.9], 'K': [-1.5], 'R': [-2.5]},
-        'ez': {'A': [-0.29, 10.22, 4.67], 'C': [0.95, 13.69, 5.77], 'D': [1.19, 14.25, 8.98], 'E': [1.3, 14.66, 4.16],
-               'F': [-0.8, 19.67, 7.12], 'G': [-0.01, 13.86, 6], 'H': [0.75, 12.26, 2.77], 'I': [-0.56, 14.34, 10.69],
-               'K': [1.66, 11.11, 2.09], 'L': [-0.64, 17.34, 8.61], 'M': [-0.28, 18.04, 7.13], 'N': [0.89, 12.78, 6.28],
-               'P': [0.83, 18.09, 3.53], 'Q': [1.21, 10.46, 2.59], 'R': [1.55, 9.34, 4.68], 'S': [0.1, 13.86, 6],
-               'T': [0.01, 13.86, 6], 'V': [-0.47, 11.35, 4.97], 'W': [-0.85, 11.65, 7.2], 'Y': [-0.42, 13.04, 6.2]},
-        'flexibility': {'A': [0.25], 'C': [0.208], 'D': [0.875], 'E': [0.833], 'F': [0.042], 'G': [1], 'H': [0.083],
-                        'I': [0.667], 'K': [0.708], 'L': [0.292], 'M': [0.], 'N': [0.667], 'P': [0.875], 'Q': [0.792],
-                        'R': [0.958], 'S': [0.875], 'T': [0.583], 'V': [0.375], 'W': [0.042], 'Y': [0.5]},
-        'grantham': {'A': [0, 8.1, 31], 'C': [2.75, 5.5, 55], 'D': [1.38, 13.0, 54], 'E': [0.92, 12.3, 83],
-                     'F': [0, 5.2, 132], 'G': [0.74, 9.0, 3], 'H': [0.58, 10.4, 96], 'I': [0, 5.2, 111],
-                     'K': [0.33, 11.3, 119], 'L': [0, 4.9, 111], 'M': [0, 5.7, 105], 'N': [1.33, 11.6, 56],
-                     'P': [0.39, 8.0, 32.5], 'Q': [0.89, 10.5, 85], 'R': [0.65, 10.5, 124], 'S': [1.42, 9.2, 32],
-                     'T': [0.71, 8.6, 61], 'V': [0, 5.9, 84], 'W': [0.13, 5.4, 170], 'Y': [0.20, 6.2, 136]},
-        'gravy': {'I': [4.5], 'V': [4.2], 'L': [3.8], 'F': [2.8], 'C': [2.5], 'M': [1.9], 'A': [1.8], 'G': [-0.4],
-                  'T': [-0.7], 'W': [-0.9], 'S': [-0.8], 'Y': [-1.3], 'P': [-1.6], 'H': [-3.2], 'E': [-3.5],
-                  'Q': [-3.5], 'D': [-3.5], 'N': [-3.5], 'K': [-3.9], 'R': [-4.5]},
-        'hopp-woods': {'A': [-0.5], 'C': [-1], 'D': [3], 'E': [3], 'F': [-2.5], 'G': [0], 'H': [-0.5], 'I': [-1.8],
-                       'K': [3], 'L': [-1.8], 'M': [-1.3], 'N': [0.2], 'P': [0], 'Q': [0.2], 'R': [3], 'S': [0.3],
-                       'T': [-0.4], 'V': [-1.5], 'W': [-3.4], 'Y': [-2.3]},
-        'isaeci': {'A': [62.9, 0.05], 'C': [78.51, 0.15], 'D': [18.46, 1.25], 'E': [30.19, 1.31], 'F': [189.42, 0.14],
-                   'G': [19.93, 0.02], 'H': [87.38, 0.56], 'I': [149.77, 0.09], 'K': [102.78, 0.53], 'L': [154.35, 0.1],
-                   'M': [132.22, 0.34], 'N': [19.53, 1.36], 'P': [122.35, 0.16], 'Q': [17.87, 1.31], 'R': [52.98, 1.69],
-                   'S': [19.75, 0.56], 'T': [59.44, 0.65], 'V': [120.91, 0.07], 'W': [179.16, 1.08],
-                   'Y': [132.16, 0.72]},
-        'janin': {'I': [1.2], 'F': [0.87], 'V': [1], 'L': [0.87], 'W': [0.59], 'M': [0.73], 'A': [0.59], 'G': [0.59],
-                  'C': [1.4], 'Y': [-0.4], 'P': [-0.26], 'T': [-0.12], 'S': [0.02], 'H': [0.02], 'E': [-0.83],
-                  'N': [-0.55], 'Q': [-0.83], 'D': [-0.69], 'K': [-2.4], 'R': [-1.8]},
-        'kytedoolittle': {'I': [1.7], 'F': [1.1], 'V': [1.6], 'L': [1.4], 'W': [-0.14], 'M': [0.8], 'A': [0.77],
-                          'G': [0.03], 'C': [1], 'Y': [-0.27], 'P': [-0.37], 'T': [-0.07], 'S': [-0.1], 'H': [-0.91],
-                          'E': [-1], 'N': [-1], 'Q': [-1], 'D': [-1], 'K': [-1.1], 'R': [-1.3]},
-        'levitt_alpha': {'A': [1.29], 'C': [1.11], 'D': [1.04], 'E': [1.44], 'F': [1.07], 'G': [0.56], 'H': [1.22],
-                         'I': [0.97], 'K': [1.23], 'L': [1.3], 'M': [1.47], 'N': [0.9], 'P': [0.52], 'Q': [1.27],
-                         'R': [0.96], 'S': [0.82], 'T': [0.82], 'V': [0.91], 'W': [0.99], 'Y': [0.72]},
-        'mss': {'A': [13.02], 'C': [23.7067], 'D': [22.02], 'E': [20.0233], 'F': [23.5288], 'G': [1.01], 'H': [23.5283],
-                'I': [22.3611], 'K': [18.9756], 'L': [19.6944], 'M': [21.92], 'N': [21.8567], 'P': [19.0242],
-                'Q': [19.9689], 'R': [19.0434], 'S': [18.3533], 'T': [22.3567], 'V': [21.0267], 'W': [26.1975],
-                'Y': [24.1954]},
-        'msw': {'A': [-0.73, 0.2, -0.62], 'C': [-0.66, 0.26, -0.27], 'D': [0.11, -1, -0.96], 'E': [0.24, -0.39, -0.04],
-                'F': [0.76, 0.85, -0.34], 'G': [-0.31, -0.28, -0.75], 'H': [0.84, 0.67, -0.78],
-                'I': [-0.91, 0.83, -0.25], 'K': [-0.51, 0.08, 0.6], 'L': [-0.74, 0.72, -0.16], 'M': [-0.7, 1, -0.32],
-                'N': [0.14, 0.2, -0.66], 'P': [-0.43, 0.73, -0.6], 'Q': [0.3, 1, -0.3], 'R': [-0.22, 0.27, 1],
-                'S': [-0.8, 0.61, -1], 'T': [-0.58, 0.85, -0.89], 'V': [-1, 0.79, -0.58], 'W': [1, 0.98, -0.47],
-                'Y': [0.97, 0.66, -0.16]},
-        'pepcats': {'A': [1, 0, 0, 0, 0, 0], 'C': [1, 0, 1, 1, 0, 0], 'D': [0, 0, 1, 0, 0, 1], 'E': [0, 0, 1, 0, 0, 1],
-                    'F': [1, 1, 0, 0, 0, 0], 'G': [0, 0, 0, 0, 0, 0], 'H': [1, 1, 0, 1, 1, 0], 'I': [1, 0, 0, 0, 0, 0],
-                    'K': [1, 0, 0, 1, 1, 0], 'L': [1, 0, 0, 0, 0, 0], 'M': [1, 0, 1, 0, 0, 0], 'N': [0, 0, 1, 1, 0, 0],
-                    'P': [1, 0, 0, 0, 0, 0], 'Q': [0, 0, 1, 1, 0, 0], 'R': [1, 0, 0, 1, 1, 0], 'S': [0, 0, 1, 1, 0, 0],
-                    'T': [0, 0, 1, 1, 0, 0], 'V': [1, 0, 0, 0, 0, 0], 'W': [1, 1, 0, 1, 0, 0], 'Y': [1, 1, 1, 1, 0, 0]},
-        'peparc': {'A': [1, 0, 0, 0, 0], 'C': [0, 1, 0, 0, 0], 'D': [0, 1, 0, 1, 0], 'E': [0, 1, 0, 1, 0],
-                   'F': [1, 0, 0, 0, 0], 'G': [0, 0, 0, 0, 0], 'H': [0, 1, 1, 0, 0], 'I': [1, 0, 0, 0, 0],
-                   'K': [0, 1, 1, 0, 0], 'L': [1, 0, 0, 0, 0], 'M': [1, 0, 0, 0, 0], 'N': [0, 1, 0, 0, 0],
-                   'P': [0, 0, 0, 0, 1], 'Q': [0, 1, 0, 0, 0], 'R': [0, 1, 1, 0, 0], 'S': [0, 1, 0, 0, 0],
-                   'T': [0, 1, 0, 0, 0], 'V': [1, 0, 0, 0, 0], 'W': [1, 0, 0, 0, 0], 'Y': [1, 0, 0, 0, 0]},
-        'polarity': {'A': [0.395], 'C': [0.074], 'D': [1.], 'E': [0.914], 'F': [0.037], 'G': [0.506], 'H': [0.679],
-                     'I': [0.037], 'K': [0.79], 'L': [0.], 'M': [0.099], 'N': [0.827], 'P': [0.383], 'Q': [0.691],
-                     'R': [0.691], 'S': [0.531], 'T': [0.457], 'V': [0.123], 'W': [0.062], 'Y': [0.16]},
-        'ppcali': {
-            'A': [0.070781, 0.036271, 2.042, 0.083272, 0.69089, 0.15948, -0.80893, 0.24698, 0.86525, 0.68563, -0.24665,
-                  0.61314, -0.53343, -0.50878, -1.3646, 2.2679, -1.5644, -0.75043, -0.65875],
-            'C': [0.61013, -0.93043, -0.85983, -2.2704, 1.5877, -2.0066, -0.30314, 1.2544, -0.2832, -1.2844, -0.73449,
-                  -0.11235, -0.41152, -0.0050164, 0.28307, 0.20522, -0.021084, -0.15627, -0.32689],
-            'D': [-1.3215, 0.24063, -0.032754, -0.37863, 1.2051, 1.0001, 2.1827, 0.19212, -0.60529, 0.37639, -0.46451,
-                  -0.46788, 1.4077, -2.1661, 0.72604, -0.12332, -0.8243, -0.082989, 0.053476],
-            'E': [-0.87713, 1.4905, 1.0755, 0.35944, 1.567, 0.41365, 1.0944, 0.72634, -0.74957, 0.038939, 0.075057,
-                  0.78637, -1.4543, 1.6667, -0.097439, -0.24293, 1.7687, 0.36174, -0.11585],
-            'F': [1.3557, -0.10336, -0.4309, 0.41269, -0.083356, 0.83783, 0.095381, -0.65222, -0.3119, 0.43293, -1.0011,
-                  -0.66855, -0.10242, 1.2066, 2.6234, 1.9981, -0.25016, 0.71979, 0.21569],
-            'G': [-1.0818, -2.1561, 0.77082, -0.92747, -1.0748, 1.7997, -1.3708, 1.279, -1.2098, 0.46065, 0.43076,
-                  0.20037, -0.2302, 0.2646, 0.57149, -0.68432, 0.19341, -0.061606, -0.08071],
-            'H': [-0.050161, 0.69246, -0.88397, -0.64601, 0.24622, 0.10487, -1.1317, -2.3661, -0.89918, 0.46391,
-                  -0.62359, 2.5478, -0.34737, -0.52062, 0.17522, -0.88648, -0.4755, 0.023187, -0.28261],
-            'I': [1.4829, -0.46435, 0.50189, 0.55724, -0.51535, -0.29914, 0.97236, -0.15793, -0.98246, -0.54347,
-                  0.97806, 0.37577, 1.618, 0.62323, -0.59359, -0.35483, -0.085017, 0.55825, -2.7542],
-            'K': [-0.85344, 1.529, 0.27747, 0.32993, -1.1786, -0.16633, -1.0459, 0.44621, 0.41027, -2.5318, 0.91329,
-                  0.53385, 0.61417, -1.111, 1.1323, 0.95105, 0.76769, -0.016115, 0.054995],
-            'L': [1.2857, 0.039488, 1.5378, 0.87969, -0.21419, 0.40389, -0.20426, -0.14351, 0.61024, -1.1927, -2.2149,
-                  -0.84248, -0.5061, -0.48548, 0.10791, -2.1503, -0.12006, -0.60222, 0.26546],
-            'M': [1.137, 0.64388, 0.13724, -0.2988, 1.2288, 0.24981, -1.6427, -0.75868, -0.54902, 1.0571, 1.272,
-                  -1.9104, 0.70919, -0.93575, -0.6314, -0.079654, 1.634, -0.0021923, 0.49825],
-            'N': [-1.084, -0.176, -0.47062, -0.92245, -0.32953, 0.74278, 0.34551, -1.4605, 0.25219, -1.2107, -0.59978,
-                  -0.79183, 1.3268, 1.9839, -1.6137, 0.5333, 0.033889, -1.0331, 0.83019],
-            'P': [-1.1823, -1.6911, -1.1331, 3.073, 1.1942, -0.93426, -0.72985, -0.042441, -0.19264, -0.21603, -0.1239,
-                  0.054016, 0.15241, -0.019691, -0.20543, 0.10206, 0.07671, -0.081968, 0.20348],
-            'Q': [-0.57747, 0.97452, -0.077547, -0.0033488, 0.17184, -0.52537, -0.27362, -0.1366, 0.2057, -0.013066,
-                  1.8834, -1.2736, -0.84991, 1.0445, 0.69027, -1.2866, -2.6776, 0.1683, 0.086105],
-            'R': [-0.62245, 1.545, -0.61966, 0.19057, -1.7485, -1.3909, -0.47526, 1.3938, -0.84556, 1.7344, -1.6516,
-                  -0.52678, 0.6791, 0.24374, -0.62551, -0.0028271, -0.053884, 0.14926, -0.17232],
-            'S': [-0.86409, -0.77147, 0.38542, -0.59389, -0.53313, -0.47585, 0.31966, -0.89716, 1.8029, 0.26431,
-                  -0.23173, -0.37626, -0.47349, -0.42878, -0.47297, -0.079826, 0.57043, 3.2057, -0.18413],
-            'T': [-0.33027, -0.57447, 0.18653, -0.28941, -0.62681, -1.0737, 0.80363, -0.59525, 1.8786, 1.3971, 0.63929,
-                  0.21281, -0.067048, 0.096271, 1.323, -0.36173, 1.2261, -2.2771, -0.65412],
-            'V': [1.1675, -0.61554, 0.95405, 0.11662, -0.74473, -1.1482, 1.1309, 0.12079, -0.77171, 0.18597, 0.93442,
-                  1.201, 0.3826, -0.091573, -0.31269, 0.074367, -0.22946, 0.24322, 2.9836],
-            'W': [1.1881, 0.43789, -1.7915, 0.138, 0.43088, 1.6467, -0.11987, 1.7369, 2.0818, 0.33122, 0.31829, 1.1586,
-                  0.67649, 0.30819, -0.55772, -0.54491, -0.17969, 0.24477, 0.38674],
-            'Y': [0.54671, -0.1468, -1.5688, 0.19001, -1.2736, 0.66162, 1.1614, -0.18614, -0.70654, -0.43634, 0.44775,
-                  -0.71366, -2.5907, -1.1649, -1.1576, 0.66572, 0.21019, -0.61016, -0.34844]},
-        'refractivity': {'A': [0.102045615], 'C': [0.841053374], 'D': [0.282153774], 'E': [0.405831178],
-                         'F': [0.691276746], 'G': [0], 'H': [0.512814484], 'I': [0.448154244], 'K': [0.50058782],
-                         'L': [0.441570656], 'M': [0.508817305], 'N': [0.282153774], 'P': [0.256995062],
-                         'Q': [0.405831178], 'R': [0.626851634], 'S': [0.149306372], 'T': [0.258876087],
-                         'V': [0.327298378], 'W': [1], 'Y': [0.741359041]},
-        't_scale': {'A': [-8.4, -8.01, -3.73, -3.65, -6.12, -1.59, 1.56],
-                    'C': [-2.44, -1.96, 0.93, -2.35, 1.31, 2.29, -1.52],
-                    'D': [-6.84, -0.94, 17.68, -0.03, 3.44, 9.07, 4.32],
-                    'E': [-6.5, 16.2, 17.28, 3.11, -4.75, -2.54, 4.72],
-                    'F': [21.59, -5.73, 1.03, -3.3, 2.64, -5.02, 1.7],
-                    'G': [-8.48, -10.37, -5.14, -6.51, -11.84, -3.6, 2.01],
-                    'H': [15.28, -3.67, 6.72, -6.38, 4.12, -1.55, -2.85],
-                    'I': [-2.97, 4.64, -0.77, 11, 3.26, -4.36, -7.88],
-                    'K': [2.7, 13.46, -14.03, -2.55, 2.77, 0.15, 3.19],
-                    'L': [2.61, 5.96, 1.97, 2.59, -4.77, -4.84, -5.44],
-                    'M': [3.38, 12.43, -4.77, 0.45, -1.55, -0.6, 3.26],
-                    'N': [-3.11, -1.22, 6.26, -9.38, 9.94, 7.66, -4.81],
-                    'P': [-5.35, -9.07, -1.52, -8.79, -8.73, 4.29, -9.91],
-                    'Q': [-5.31, 15.64, 8.44, 1.03, -4.32, -4.4, -0.52],
-                    'R': [-2.27, 18.9, -18.24, -3.47, 3.03, 6.64, 0.45],
-                    'S': [-15.88, -11.21, -2.44, -3.61, 3.46, -0.37, 8.98],
-                    'T': [-17.81, -13.64, -5.19, 10.57, 6.91, -4.43, 3.49],
-                    'V': [-5.8, -6.15, -2.26, 9.87, 5.28, -1.49, -7.54],
-                    'W': [21.68, -8.78, -2.53, 15.53, -8.15, 11.98, 3.23],
-                    'Y': [23.9, -6.47, 0.31, -4.14, 4.08, -7.28, 3.59]},
-        'tm_tend': {'A': [0.38], 'C': [-0.3], 'D': [-3.27], 'E': [-2.9], 'F': [1.98], 'G': [-0.19], 'H': [-1.44],
-                    'I': [1.97], 'K': [-3.46], 'L': [1.82], 'M': [1.4], 'N': [-1.62], 'P': [-1.44], 'Q': [-1.84],
-                    'R': [-2.57], 'S': [-0.53], 'T': [-0.32], 'V': [1.46], 'W': [1.53], 'Y': [0.49]},
-        'z3': {'A': [0.07, -1.73, 0.09], 'C': [0.71, -0.97, 4.13], 'D': [3.64, 1.13, 2.36], 'E': [3.08, 0.39, -0.07],
-               'F': [-4.92, 1.3, 0.45], 'G': [2.23, -5.36, 0.3], 'H': [2.41, 1.74, 1.11], 'I': [-4.44, -1.68, -1.03],
-               'K': [2.84, 1.41, -3.14], 'L': [-4.19, -1.03, -0.98], 'M': [-2.49, -0.27, -0.41],
-               'N': [3.22, 1.45, 0.84], 'P': [-1.22, 0.88, 2.23], 'Q': [2.18, 0.53, -1.14], 'R': [2.88, 2.52, -3.44],
-               'S': [1.96, -1.63, 0.57], 'T': [0.92, -2.09, -1.4], 'V': [-2.69, -2.53, -1.29], 'W': [-4.75, 3.65, 0.85],
-               'Y': [-1.39, 2.32, 0.01]},
-        'z5': {'A': [0.24, -2.32, 0.6, -0.14, 1.3], 'C': [0.84, -1.67, 3.71, 0.18, -2.65],
-               'D': [3.98, 0.93, 1.93, -2.46, 0.75], 'E': [3.11, 0.26, -0.11, -3.04, -0.25],
-               'F': [-4.22, 1.94, 1.06, 0.54, -0.62], 'G': [2.05, -4.06, 0.36, -0.82, -0.38],
-               'H': [2.47, 1.95, 0.26, 3.9, 0.09], 'I': [-3.89, -1.73, -1.71, -0.84, 0.26],
-               'K': [2.29, 0.89, -2.49, 1.49, 0.31], 'L': [-4.28, -1.3, -1.49, -0.72, 0.84],
-               'M': [-2.85, -0.22, 0.47, 1.94, -0.98], 'N': [3.05, 1.62, 1.04, -1.15, 1.61],
-               'P': [-1.66, 0.27, 1.84, 0.7, 2], 'Q': [1.75, 0.5, -1.44, -1.34, 0.66],
-               'R': [3.52, 2.5, -3.5, 1.99, -0.17], 'S': [2.39, -1.07, 1.15, -1.39, 0.67],
-               'T': [0.75, -2.18, -1.12, -1.46, -0.4], 'V': [-2.59, -2.64, -1.54, -0.85, -0.02],
-               'W': [-4.36, 3.94, 0.59, 3.44, -1.59], 'Y': [-2.54, 2.44, 0.43, 0.04, -1.47]}
-    }
-    if scalename == 'all':
-        d = {'I': [], 'F': [], 'V': [], 'L': [], 'W': [], 'M': [], 'A': [], 'G': [], 'C': [], 'Y': [], 'P': [],
-             'T': [], 'S': [], 'H': [], 'E': [], 'N': [], 'Q': [], 'D': [], 'K': [], 'R': []}
-        for scale in scales.keys():
-            for k, v in scales[scale].items():
-                d[k].extend(v)
-        return 'all', d
-
-    elif scalename == 'instability':
-        d = {
-            "A": {"A": 1.0, "C": 44.94, "E": 1.0, "D": -7.49, "G": 1.0, "F": 1.0, "I": 1.0, "H": -7.49, "K": 1.0,
-                  "M": 1.0, "L": 1.0, "N": 1.0, "Q": 1.0, "P": 20.26, "S": 1.0, "R": 1.0, "T": 1.0, "W": 1.0, "V": 1.0,
-                  "Y": 1.0},
-            "C": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 20.26, "G": 1.0, "F": 1.0, "I": 1.0, "H": 33.6, "K": 1.0,
-                  "M": 33.6, "L": 20.26, "N": 1.0, "Q": -6.54, "P": 20.26, "S": 1.0, "R": 1.0, "T": 33.6, "W": 24.68,
-                  "V": -6.54, "Y": 1.0},
-            "E": {"A": 1.0, "C": 44.94, "E": 33.6, "D": 20.26, "G": 1.0, "F": 1.0, "I": 20.26, "H": -6.54, "K": 1.0,
-                  "M": 1.0, "L": 1.0, "N": 1.0, "Q": 20.26, "P": 20.26, "S": 20.26, "R": 1.0, "T": 1.0, "W": -14.03,
-                  "V": 1.0, "Y": 1.0},
-            "D": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0, "G": 1.0, "F": -6.54, "I": 1.0, "H": 1.0, "K": -7.49,
-                  "M": 1.0, "L": 1.0, "N": 1.0, "Q": 1.0, "P": 1.0, "S": 20.26, "R": -6.54, "T": -14.03, "W": 1.0,
-                  "V": 1.0, "Y": 1.0},
-            "G": {"A": -7.49, "C": 1.0, "E": -6.54, "D": 1.0, "G": 13.34, "F": 1.0, "I": -7.49, "H": 1.0, "K": -7.49,
-                  "M": 1.0, "L": 1.0, "N": -7.49, "Q": 1.0, "P": 1.0, "S": 1.0, "R": 1.0, "T": -7.49, "W": 13.34,
-                  "V": 1.0, "Y": -7.49},
-            "F": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 13.34, "G": 1.0, "F": 1.0, "I": 1.0, "H": 1.0, "K": -14.03,
-                  "M": 1.0, "L": 1.0, "N": 1.0, "Q": 1.0, "P": 20.26, "S": 1.0, "R": 1.0, "T": 1.0, "W": 1.0, "V": 1.0,
-                  "Y": 33.601},
-            "I": {"A": 1.0, "C": 1.0, "E": 44.94, "D": 1.0, "G": 1.0, "F": 1.0, "I": 1.0, "H": 13.34, "K": -7.49,
-                  "M": 1.0, "L": 20.26, "N": 1.0, "Q": 1.0, "P": -1.88, "S": 1.0, "R": 1.0, "T": 1.0, "W": 1.0,
-                  "V": -7.49, "Y": 1.0},
-            "H": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0, "G": -9.37, "F": -9.37, "I": 44.94, "H": 1.0, "K": 24.68,
-                  "M": 1.0, "L": 1.0, "N": 24.68, "Q": 1.0, "P": -1.88, "S": 1.0, "R": 1.0, "T": -6.54, "W": -1.88,
-                  "V": 1.0, "Y": 44.94},
-            "K": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0, "G": -7.49, "F": 1.0, "I": -7.49, "H": 1.0, "K": 1.0,
-                  "M": 33.6, "L": -7.49, "N": 1.0, "Q": 24.64, "P": -6.54, "S": 1.0, "R": 33.6, "T": 1.0, "W": 1.0,
-                  "V": -7.49, "Y": 1.0},
-            "M": {"A": 13.34, "C": 1.0, "E": 1.0, "D": 1.0, "G": 1.0, "F": 1.0, "I": 1.0, "H": 58.28, "K": 1.0,
-                  "M": -1.88, "L": 1.0, "N": 1.0, "Q": -6.54, "P": 44.94, "S": 44.94, "R": -6.54, "T": -1.88, "W": 1.0,
-                  "V": 1.0, "Y": 24.68},
-            "L": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0, "G": 1.0, "F": 1.0, "I": 1.0, "H": 1.0, "K": -7.49, "M": 1.0,
-                  "L": 1.0, "N": 1.0, "Q": 33.6, "P": 20.26, "S": 1.0, "R": 20.26, "T": 1.0, "W": 24.68, "V": 1.0,
-                  "Y": 1.0},
-            "N": {"A": 1.0, "C": -1.88, "E": 1.0, "D": 1.0, "G": -14.03, "F": -14.03, "I": 44.94, "H": 1.0, "K": 24.68,
-                  "M": 1.0, "L": 1.0, "N": 1.0, "Q": -6.54, "P": -1.88, "S": 1.0, "R": 1.0, "T": -7.49, "W": -9.37,
-                  "V": 1.0, "Y": 1.0},
-            "Q": {"A": 1.0, "C": -6.54, "E": 20.26, "D": 20.26, "G": 1.0, "F": -6.54, "I": 1.0, "H": 1.0, "K": 1.0,
-                  "M": 1.0, "L": 1.0, "N": 1.0, "Q": 20.26, "P": 20.26, "S": 44.94, "R": 1.0, "T": 1.0, "W": 1.0,
-                  "V": -6.54, "Y": -6.54},
-            "P": {"A": 20.26, "C": -6.54, "E": 18.38, "D": -6.54, "G": 1.0, "F": 20.26, "I": 1.0, "H": 1.0, "K": 1.0,
-                  "M": -6.54, "L": 1.0, "N": 1.0, "Q": 20.26, "P": 20.26, "S": 20.26, "R": -6.54, "T": 1.0, "W": -1.88,
-                  "V": 20.26, "Y": 1.0},
-            "S": {"A": 1.0, "C": 33.6, "E": 20.26, "D": 1.0, "G": 1.0, "F": 1.0, "I": 1.0, "H": 1.0, "K": 1.0, "M": 1.0,
-                  "L": 1.0, "N": 1.0, "Q": 20.26, "P": 44.94, "S": 20.26, "R": 20.26, "T": 1.0, "W": 1.0, "V": 1.0,
-                  "Y": 1.0},
-            "R": {"A": 1.0, "C": 1.0, "E": 1.0, "D": 1.0, "G": -7.49, "F": 1.0, "I": 1.0, "H": 20.26, "K": 1.0,
-                  "M": 1.0, "L": 1.0, "N": 13.34, "Q": 20.26, "P": 20.26, "S": 44.94, "R": 58.28, "T": 1.0, "W": 58.28,
-                  "V": 1.0, "Y": -6.54},
-            "T": {"A": 1.0, "C": 1.0, "E": 20.26, "D": 1.0, "G": -7.49, "F": 13.34, "I": 1.0, "H": 1.0, "K": 1.0,
-                  "M": 1.0, "L": 1.0, "N": -14.03, "Q": -6.54, "P": 1.0, "S": 1.0, "R": 1.0, "T": 1.0, "W": -14.03,
-                  "V": 1.0, "Y": 1.0},
-            "W": {"A": -14.03, "C": 1.0, "E": 1.0, "D": 1.0, "G": -9.37, "F": 1.0, "I": 1.0, "H": 24.68, "K": 1.0,
-                  "M": 24.68, "L": 13.34, "N": 13.34, "Q": 1.0, "P": 1.0, "S": 1.0, "R": 1.0, "T": -14.03, "W": 1.0,
-                  "V": -7.49, "Y": 1.0},
-            "V": {"A": 1.0, "C": 1.0, "E": 1.0, "D": -14.03, "G": -7.49, "F": 1.0, "I": 1.0, "H": 1.0, "K": -1.88,
-                  "M": 1.0, "L": 1.0, "N": 1.0, "Q": 1.0, "P": 20.26, "S": 1.0, "R": 1.0, "T": -7.49, "W": 1.0,
-                  "V": 1.0, "Y": -6.54},
-            "Y": {"A": 24.68, "C": 1.0, "E": -6.54, "D": 24.68, "G": -7.49, "F": 1.0, "I": 1.0, "H": 13.34, "K": 1.0,
-                  "M": 44.94, "L": 1.0, "N": 1.0, "Q": 1.0, "P": 13.34, "S": 1.0, "R": -15.91, "T": -7.49, "W": -9.37,
-                  "V": 1.0, "Y": 13.34}}
-        return 'instability', d
-
-    else:
-        return scalename, scales[scalename]
-
-
-def read_fasta(inputfile):
-    """Method for loading sequences from a FASTA formatted file into :py:attr:`sequences` & :py:attr:`names`.
-    This method is used by the base class :class:`modlamp.descriptors.PeptideDescriptor` if the input is a FASTA file.
-
-    :param inputfile: .fasta file with sequences and headers to read
-    :return: list of sequences in the attribute :py:attr:`sequences` with corresponding sequence names in
-        :py:attr:`names`.
-    """
-    names = list()  # list for storing names
-    sequences = list()  # list for storing sequences
-    seq = str()
-    with open(inputfile) as f:
-        all = f.readlines()
-        last = all[-1]
-        for line in all:
-            if line.startswith('>'):
-                names.append(line.split(' ')[0][1:].strip())  # add FASTA name without description as molecule name
-                sequences.append(seq.strip())
-                seq = str()
-            elif line == last:
-                seq += line.strip()  # remove potential white space
-                sequences.append(seq.strip())
-            else:
-                seq += line.strip()  # remove potential white space
-    return sequences[1:], names
-
-
-def save_fasta(filename, sequences, names=None):
-    """Method for saving sequences in the instance :py:attr:`sequences` to a file in FASTA format.
-
-    :param filename: {str} output filename (ending .fasta)
-    :param sequences: {list} sequences to be saved to file
-    :param names: {list} whether sequence names from self.names should be saved as sequence identifiers
-    :return: a FASTA formatted file containing the generated sequences
-    """
-    if os.path.exists(filename):
-        os.remove(filename)  # remove outputfile, it it exists
-
-    with open(filename, 'w') as o:
-        for n, seq in enumerate(sequences):
-            if names:
-                o.write('>' + str(names[n]) + '\n')
-            else:
-                o.write('>Seq_' + str(n) + '\n')
-            o.write(seq + '\n')
-
-
-def aa_weights():
-    """Function holding molecular weight data on all natural amino acids.
-
-    :return: dictionary with amino acid letters and corresponding weights
-
-    .. versionadded:: v2.4.1
-    """
-    weights = {'A': 89.093, 'C': 121.158, 'D': 133.103, 'E': 147.129, 'F': 165.189, 'G': 75.067,
-               'H': 155.155, 'I': 131.173, 'K': 146.188, 'L': 131.173, 'M': 149.211, 'N': 132.118,
-               'P': 115.131, 'Q': 146.145, 'R': 174.20, 'S': 105.093, 'T': 119.119, 'V': 117.146,
-               'W': 204.225, 'Y': 181.189}
-    return weights
-
-
-def count_aas(seq, scale='relative'):
-    """Function to count the amino acids occuring in a given sequence.
-
-    :param seq: {str} amino acid sequence
-    :param scale: {'absolute' or 'relative'} defines whether counts or frequencies are given for each AA
-    :return: {dict} dictionary with amino acids as keys and their counts in the sequence as values.
-    """
-    if seq == '':  # error if len(seq) == 0
-        seq = ' '
-    aas = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
-    scl = 1.
-    if scale == 'relative':
-        scl = len(seq)
-    aa = {a: (float(seq.count(a)) / scl) for a in aas}
-    aa = collections.OrderedDict(sorted(list(aa.items())))
-    return aa
-
-
-def count_ngrams(seq, n):
-    """Function to count the n-grams of an amino acid sequence. N can be one integer or a list of integers
-
-    :param seq: {str} amino acid sequence
-    :param n: {int or list of ints} defines whether counts or frequencies are given for each AA
-    :return: {dict} dictionary with n-grams as keys and their counts in the sequence as values.
-    """
-    if seq == '':
-        seq = ' '
-    if isinstance(n, int):
-        n = [n]
-    ngrams = list()
-    for i in n:
-        ngrams.extend([seq[j:j+i] for j in range(len(seq) - (i-1))])
-    counts = {g: (seq.count(g)) for g in set(ngrams)}
-    counts = collections.OrderedDict(sorted(counts.items(), key=operator.itemgetter(1), reverse=True))
-    return counts
-
-
-def aa_energies():
-    """Function holding free energies of transfer between cyclohexane and water for all natural amino acids.
-    H. G. Boman, D. Wade, I. a Boman, B. Wåhlin, R. B. Merrifield, *FEBS Lett*. **1989**, *259*, 103–106.
-
-    :return: dictionary with amino acid letters and corresponding energies.
-    """
-    energies = {'L': -4.92, 'I': -4.92, 'V': -4.04, 'F': -2.98, 'M': -2.35, 'W': -2.33, 'A': -1.81, 'C': -1.28,
-                'G': -0.94, 'Y': 0.14, 'T': 2.57, 'S': 3.40, 'H': 4.66, 'Q': 5.54, 'K': 5.55, 'N': 6.64, 'E': 6.81,
-                'D': 8.72, 'R': 14.92, 'P': 0.}
-    return energies
-
-
-def ngrams_apd():
-    """Function returning the most frequent 2-, 3- and 4-grams from all sequences in the `APD3
-    <http://aps.unmc.edu/AP/>`_, version August 2016 with 2727 sequences.
-    For all 2, 3 and 4grams, all possible ngrams were generated from all sequences and the top 50 most frequent
-    assembled into a list. Finally, leading and tailing spaces were striped and duplicates as well as ngrams containing
-    spaces were removed.
-
-    :return: numpy.array containing most frequent ngrams
-    """
-    return np.array(['AGK', 'CKI', 'RR', 'YGGG', 'LSGL', 'RG', 'YGGY', 'PRP', 'LGGG',
-                     'GV', 'GT', 'GS', 'GR', 'IAG', 'GG', 'GF', 'GC', 'GGYG', 'GA', 'GL',
-                     'GK', 'GI', 'IPC', 'KAA', 'LAK', 'GLGG', 'GGLG', 'CKIT', 'GAGK',
-                     'LLSG', 'LKK', 'FLP', 'LSG', 'SCK', 'LLS', 'GETC', 'VLG', 'GKLL',
-                     'LLG', 'C', 'KCKI', 'G', 'VGK', 'CSC', 'TKKC', 'GCS', 'GKA', 'IGK',
-                     'GESC', 'KVCY', 'KKL', 'KKI', 'KKC', 'LGGL', 'GLL', 'CGE', 'GGYC',
-                     'GLLS', 'GLF', 'AKK', 'GKAA', 'ESCV', 'GLP', 'CGES', 'PCGE', 'FL',
-                     'CGET', 'GLW', 'KGAA', 'KAAL', 'GGY', 'GGG', 'IKG', 'LKG', 'GGL',
-                     'CK', 'GTC', 'CG', 'SKKC', 'CS', 'CR', 'KC', 'AGKA', 'KA', 'KG',
-                     'LKCK', 'SCKL', 'KK', 'KI', 'KN', 'KL', 'SK', 'KV', 'SL', 'SC',
-                     'SG', 'AAA', 'VAK', 'AAL', 'AAK', 'GGGG', 'KNVA', 'GGGL', 'GYG',
-                     'LG', 'LA', 'LL', 'LK', 'LS', 'LP', 'GCSC', 'TC', 'GAA', 'AA', 'VA',
-                     'VC', 'AG', 'VG', 'AI', 'AK', 'VL', 'AL', 'TPGC', 'IK', 'IA', 'IG',
-                     'YGG', 'LGK', 'CSCK', 'GYGG', 'LGG', 'KGA'])
-
-
-def aa_formulas():
-    """
-    Function returning the molecular formulas of all amino acids. All amino acids are considered in the neutral form
-    (uncharged).
-    """
-    formulas = {'A': {'C': 3, 'H': 7, 'N': 1, 'O': 2, 'S': 0},
-                'C': {'C': 3, 'H': 7, 'N': 1, 'O': 2, 'S': 1},
-                'D': {'C': 4, 'H': 7, 'N': 1, 'O': 4, 'S': 0},
-                'E': {'C': 5, 'H': 9, 'N': 1, 'O': 4, 'S': 0},
-                'F': {'C': 9, 'H': 11, 'N': 1, 'O': 2, 'S': 0},
-                'G': {'C': 2, 'H': 5, 'N': 1, 'O': 2, 'S': 0},
-                'H': {'C': 6, 'H': 9, 'N': 3, 'O': 2, 'S': 0},
-                'I': {'C': 6, 'H': 13, 'N': 1, 'O': 2, 'S': 0},
-                'K': {'C': 6, 'H': 14, 'N': 2, 'O': 2, 'S': 0},
-                'L': {'C': 6, 'H': 13, 'N': 1, 'O': 2, 'S': 0},
-                'M': {'C': 5, 'H': 11, 'N': 1, 'O': 2, 'S': 1},
-                'N': {'C': 4, 'H': 8, 'N': 2, 'O': 3, 'S': 0},
-                'P': {'C': 5, 'H': 9, 'N': 1, 'O': 2, 'S': 0},
-                'Q': {'C': 5, 'H': 10, 'N': 2, 'O': 3, 'S': 0},
-                'R': {'C': 6, 'H': 14, 'N': 4, 'O': 2, 'S': 0},
-                'S': {'C': 3, 'H': 7, 'N': 1, 'O': 3, 'S': 0},
-                'T': {'C': 4, 'H': 9, 'N': 1, 'O': 3, 'S': 0},
-                'V': {'C': 5, 'H': 11, 'N': 1, 'O': 2, 'S': 0},
-                'W': {'C': 11, 'H': 12, 'N': 2, 'O': 2, 'S': 0},
-                'Y': {'C': 9, 'H': 11, 'N': 1, 'O': 3, 'S': 0}
-                }
-    return formulas
--- a/cpt_helical_wheel/plotWheels/descriptors.py	Tue Jul 05 05:21:34 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1097 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-.. currentmodule:: modlamp.descriptors
-
-.. moduleauthor:: modlab Alex Mueller ETH Zurich <alex.mueller@pharma.ethz.ch>
-
-This module incorporates different classes to calculate peptide descriptor values. The following classes are available:
-
-=============================        ============================================================================
-Class                                Characteristics
-=============================        ============================================================================
-:py:class:`GlobalDescriptor`         Global one-dimensional peptide descriptors calculated from the AA sequence.
-:py:class:`PeptideDescriptor`        AA scale based global or convoluted descriptors (auto-/cross-correlated).
-=============================        ============================================================================
-
-.. seealso:: :class:`modlamp.core.BaseDescriptor` from which the classes in :mod:`modlamp.descriptors` inherit.
-"""
-
-import sys
-
-import numpy as np
-from scipy import stats
-from sklearn.externals.joblib import Parallel, delayed
-
-from plotWheels.core import BaseDescriptor, load_scale, count_aas, aa_weights, aa_energies, aa_formulas
-
-__author__ = "Alex Müller, Gisela Gabernet"
-__docformat__ = "restructuredtext en"
-
-
-def _one_autocorr(seq, window, scale):
-    """Private function used for calculating auto-correlated descriptors for 1 given sequence, window and an AA scale.
-    This function is used by the :py:func:`calculate_autocorr` method of :py:class:`PeptideDescriptor`.
-
-    :param seq: {str} amino acid sequence to calculate descriptor for
-    :param window: {int} correlation-window size
-    :param scale: {str} amino acid scale to be used to calculate descriptor
-    :return: {numpy.array} calculated descriptor data
-    """
-    try:
-        m = list()  # list of lists to store translated sequence values
-        for l in range(len(seq)):  # translate AA sequence into values
-            m.append(scale[str(seq[l])])
-        # auto-correlation in defined sequence window
-        seqdesc = list()
-        for dist in range(window):  # for all correlation distances
-            for val in range(len(scale['A'])):  # for all features of the descriptor scale
-                valsum = list()
-                cntr = 0.
-                for pos in range(len(seq)):  # for every position in the sequence
-                    if (pos + dist) < len(seq):  # check if corr distance is possible at that sequence position
-                        cntr += 1  # counter to scale sum
-                        valsum.append(m[pos][val] * m[pos + dist][val])
-                seqdesc.append(sum(valsum) / cntr)  # append scaled correlation distance values
-        return seqdesc
-    except ZeroDivisionError:
-        print("ERROR!\nThe chosen correlation window % i is larger than the sequence %s !" % (window, seq))
-
-
-def _one_crosscorr(seq, window, scale):
-    """Private function used for calculating cross-correlated descriptors for 1 given sequence, window and an AA scale.
-    This function is used by the :py:func:`calculate_crosscorr` method of :py:class:`PeptideDescriptor`.
-
-    :param seq: {str} amino acid sequence to calculate descriptor for
-    :param window: {int} correlation-window size
-    :param scale: {str} amino acid scale to be used to calculate descriptor
-    :return: {numpy.array} calculated descriptor data
-    """
-    try:
-        m = list()  # list of lists to store translated sequence values
-        for l in range(len(seq)):  # translate AA sequence into values
-            m.append(scale[str(seq[l])])
-        # auto-correlation in defined sequence window
-        seqdesc = list()
-        for val in range(len(scale['A'])):  # for all features of the descriptor scale
-            for cc in range(len(scale['A'])):  # for every feature cross correlation
-                if (val + cc) < len(scale['A']):  # check if corr distance is in range of the num of features
-                    for dist in range(window):  # for all correlation distances
-                        cntr = float()
-                        valsum = list()
-                        for pos in range(len(seq)):  # for every position in the sequence
-                            if (pos + dist) < len(seq):  # check if corr distance is possible at that sequence pos
-                                cntr += 1  # counter to scale sum
-                                valsum.append(m[pos][val] * m[pos + dist][val + cc])
-                        seqdesc.append(sum(valsum) / cntr)  # append scaled correlation distance values
-        return seqdesc
-    except ZeroDivisionError:
-        print("ERROR!\nThe chosen correlation window % i is larger than the sequence %s !" % (window, seq))
-
-
-def _one_arc(seq, modality, scale):
-    """ Privat function used for calculating arc descriptors for one sequence and AA scale. This function is used by
-    :py:func:`calculate_arc` method method of :py:class:`PeptideDescriptor`.
-
-    :param seq: {str} amino acid sequence to calculate descriptor for
-    :param scale: {str} amino acid scale to be used to calculate descriptor
-    :return: {numpy.array} calculated descriptor data
-    """
-    desc_mat = []
-    for aa in seq:
-        desc_mat.append(scale[aa])
-    desc_mat = np.asarray(desc_mat)
-
-    # Check descriptor dimension
-    desc_dim = desc_mat.shape[1]
-
-    # list to store descriptor values for all windows
-    allwindows_arc = []
-
-    if len(seq) > 18:
-        window = 18
-        # calculates number of windows in sequence
-        num_windows = len(seq) - window
-    else:
-        window = len(seq)
-        num_windows = 1
-
-    # loop through all windows
-    for j in range(num_windows):
-        # slices descriptor matrix into current window
-        window_mat = desc_mat[j:j + window, :]
-
-        # defines order of amino acids in helical projection
-        order = [0, 11, 4, 15, 8, 1, 12, 5, 16, 9, 2, 13, 6, 17, 10, 3, 14, 7]
-
-        # orders window descriptor matrix into helical projection order
-        ordered = []
-        for pos in order:
-            try:
-                ordered.append(window_mat[pos, :])
-            except:
-                # for sequences of len < 18 adding dummy vector with 2s, length of descriptor dimensions
-                ordered.append([2] * desc_dim)
-        ordered = np.asarray(ordered)
-
-        window_arc = []
-
-        # loop through pharmacophoric features
-        for m in range(desc_dim):
-            all_arcs = []  # stores all arcs that can be found of a pharmacophoric feature
-            arc = 0
-
-            for n in range(18):  # for all positions in helix, regardless of sequence length
-                if ordered[n, m] == 0:  # if position does not contain pharmacophoric feature
-                    all_arcs.append(arc)  # append previous arc to all arcs list
-                    arc = 0  # arc is initialized
-                elif ordered[n, m] == 1:  # if position contains pharmacophoric feature(PF), elongate arc by 20°
-                    arc += 20
-                elif ordered[n, m] == 2:  # if position doesn't contain amino acid:
-                    if ordered[n - 1, m] == 1:  # if previous position contained PF add 10°
-                        arc += 10
-                    elif ordered[n - 1, m] == 0:  # if previous position didn't contain PF don't add anything
-                        arc += 0
-                    elif ordered[
-                                n - 2, m] == 1:  # if previous position is empty then check second previous for PF
-                        arc += 10
-                    if n == 17:  # if we are at the last position check for position n=0 instead of next position.
-                        if ordered[0, m] == 1:  # if it contains PF add 10° extra
-                            arc += 10
-                    else:  # if next position contains PF add 10° extra
-                        if ordered[n + 1, m] == 1:
-                            arc += 10
-                        elif ordered[n + 1, m] == 0:
-                            arc += 0
-                        else:  # if next position is empty check for 2nd next position
-                            if n == 16:
-                                if ordered[0, m] == 1:
-                                    arc += 10
-                            else:
-                                if ordered[n + 2, m] == 1:
-                                    arc += 10
-
-            all_arcs.append(arc)
-            if not arc == 360:
-                arc0 = all_arcs.pop() + all_arcs[0]  # join first and last arc together
-                all_arcs = [arc0] + all_arcs[1:]
-
-            window_arc.append(np.max(all_arcs))  # append to window arcs the maximum arc of this PF
-        allwindows_arc.append(window_arc)  # append all PF arcs of this window
-
-    allwindows_arc = np.asarray(allwindows_arc)
-
-    if modality == 'max':
-        final_arc = np.max(allwindows_arc, axis=0)  # calculate maximum / mean arc along all windows
-    elif modality == 'mean':
-        final_arc = np.mean(allwindows_arc, axis=0)
-    else:
-        print('modality is unknown, please choose between "max" and "mean"\n.')
-        sys.exit()
-    return final_arc
-
-
-def _charge(seq, ph=7.0, amide=False):
-    """Calculates charge of a single sequence. The method used is first described by Bjellqvist. In the case of
-    amidation, the value for the  'Cterm' pKa is 15 (and Cterm is added to the pos_pks dictionary.
-    The pKa scale is extracted from: http://www.hbcpnetbase.com/ (CRC Handbook of Chemistry and Physics, 96th ed).
-
-    **pos_pks** = {'Nterm': 9.38, 'K': 10.67, 'R': 12.10, 'H': 6.04}
-
-    **neg_pks** = {'Cterm': 2.15, 'D': 3.71, 'E': 4.15, 'C': 8.14, 'Y': 10.10}
-
-    :param ph: {float} pH at which to calculate peptide charge.
-    :param amide: {boolean} whether the sequences have an amidated C-terminus.
-    :return: {array} descriptor values in the attribute :py:attr:`descriptor
-    """
-
-    if amide:
-        pos_pks = {'Nterm': 9.38, 'K': 10.67, 'R': 12.10, 'H': 6.04}
-        neg_pks = {'Cterm': 15., 'D': 3.71, 'E': 4.15, 'C': 8.14, 'Y': 10.10}
-    else:
-        pos_pks = {'Nterm': 9.38, 'K': 10.67, 'R': 12.10, 'H': 6.04}
-        neg_pks = {'Cterm': 2.15, 'D': 3.71, 'E': 4.15, 'C': 8.14, 'Y': 10.10}
-
-    aa_content = count_aas(seq, scale='absolute')
-    aa_content['Nterm'] = 1.0
-    aa_content['Cterm'] = 1.0
-    pos_charge = 0.0
-    for aa, pK in pos_pks.items():
-        c_r = 10 ** (pK - ph)
-        partial_charge = c_r / (c_r + 1.0)
-        pos_charge += aa_content[aa] * partial_charge
-    neg_charge = 0.0
-    for aa, pK in neg_pks.items():
-        c_r = 10 ** (ph - pK)
-        partial_charge = c_r / (c_r + 1.0)
-        neg_charge += aa_content[aa] * partial_charge
-    return round(pos_charge - neg_charge, 3)
-
-
-class GlobalDescriptor(BaseDescriptor):
-    """
-    Base class for global, non-amino acid scale dependant descriptors. The following descriptors can be calculated by
-    the **methods** linked below:
-
-    - `Sequence Length      <modlamp.html#modlamp.descriptors.GlobalDescriptor.length>`_
-    - `Molecular Formula    <modlamp.html#modlamp.descriptors.GlobalDescriptor.formula>`_
-    - `Molecular Weight     <modlamp.html#modlamp.descriptors.GlobalDescriptor.calculate_MW>`_
-    - `Sequence Charge      <modlamp.html#modlamp.descriptors.GlobalDescriptor.calculate_charge>`_
-    - `Charge Density       <modlamp.html#modlamp.descriptors.GlobalDescriptor.charge_density>`_
-    - `Isoelectric Point    <modlamp.html#modlamp.descriptors.GlobalDescriptor.isoelectric_point>`_
-    - `Instability Index    <modlamp.html#modlamp.descriptors.GlobalDescriptor.instability_index>`_
-    - `Aromaticity          <modlamp.html#modlamp.descriptors.GlobalDescriptor.aromaticity>`_
-    - `Aliphatic Index      <modlamp.html#modlamp.descriptors.GlobalDescriptor.aliphatic_index>`_
-    - `Boman Index          <modlamp.html#modlamp.descriptors.GlobalDescriptor.boman_index>`_
-    - `Hydrophobic Ratio    <modlamp.html#modlamp.descriptors.GlobalDescriptor.hydrophobic_ratio>`_
-    - `all of the above     <modlamp.html#modlamp.descriptors.GlobalDescriptor.calculate_all>`_
-    """
-
-    def length(self, append=False):
-        """
-        Method to calculate the length (total AA count) of every sequence in the attribute :py:attr:`sequences`.
-
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: array of sequence lengths in the attribute :py:attr:`descriptor`
-        :Example:
-
-        >>> desc = GlobalDescriptor(['AFDGHLKI','KKLQRSDLLRTK','KKLASCNNIPPR'])
-        >>> desc.length()
-        >>> desc.descriptor
-        array([[ 8.], [12.], [12.]])
-        """
-        desc = []
-        for seq in self.sequences:
-            desc.append(float(len(seq.strip())))
-        desc = np.asarray(desc).reshape(len(desc), 1)
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-            self.featurenames.append('Length')
-        else:
-            self.descriptor = np.array(desc)
-            self.featurenames = ['Length']
-
-    def formula(self, amide=False, append=False):
-        """Method to calculate the molecular formula of every sequence in the attribute :py:attr:`sequences`.
-
-        :param amide: {boolean} whether the sequences are C-terminally amidated.
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: array of molecular formulas {str} in the attribute :py:attr:`descriptor`
-        :Example:
-
-        >>> desc = GlobalDescriptor(['KADSFLSADGHSADFSLDKKLKERL', 'ERTILSDFPQWWFASLDFLNC', 'ACDEFGHIKLMNPQRSTVWY'])
-        >>> desc.formula(amide=True)
-        >>> for v in desc.descriptor:
-        ...     print(v[0])
-        C122 H197 N35 O39
-        C121 H168 N28 O33 S
-        C106 H157 N29 O30 S2
-
-        .. seealso:: :py:func:`modlamp.core.aa_formulas()`
-
-        .. versionadded:: v2.7.6
-        """
-        desc = []
-        formulas = aa_formulas()
-        for seq in self.sequences:
-            f = {'C': 0, 'H': 0, 'N': 0, 'O': 0, 'S': 0}
-            for aa in seq:  # loop over all AAs
-                for k in f.keys():
-                    f[k] += formulas[aa][k]
-
-            # substract H2O for every peptide bond
-            f['H'] -= 2 * (len(seq) - 1)
-            f['O'] -= (len(seq) - 1)
-
-            if amide:  # add C-terminal amide --> replace OH with NH2
-                f['O'] -= 1
-                f['H'] += 1
-                f['N'] += 1
-
-            if f['S'] != 0:
-                val = 'C%s H%s N%s O%s %s%s' % (f['C'], f['H'], f['N'], f['O'], 'S', f['S'])
-            else:
-                val = 'C%s H%s N%s O%s' % (f['C'], f['H'], f['N'], f['O'])
-
-            desc.append([val])
-
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-            self.featurenames.append('Formula')
-        else:
-            self.descriptor = np.array(desc)
-            self.featurenames = ['Formula']
-
-    def calculate_MW(self, amide=False, append=False):
-        """Method to calculate the molecular weight [g/mol] of every sequence in the attribute :py:attr:`sequences`.
-
-        :param amide: {boolean} whether the sequences are C-terminally amidated (subtracts 0.95 from the MW).
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: array of descriptor values in the attribute :py:attr:`descriptor`
-        :Example:
-
-        >>> desc = GlobalDescriptor('IAESFKGHIPL')
-        >>> desc.calculate_MW(amide=True)
-        >>> desc.descriptor
-        array([[ 1210.43]])
-
-        .. seealso:: :py:func:`modlamp.core.aa_weights()`
-
-        .. versionchanged:: v2.1.5 amide option added
-        """
-        desc = []
-        weights = aa_weights()
-        for seq in self.sequences:
-            mw = []
-            for aa in seq:  # sum over aa weights
-                mw.append(weights[aa])
-            desc.append(round(sum(mw) - 18.015 * (len(seq) - 1), 2))  # sum over AA MW and subtract H20 MW for every
-            # peptide bond
-        desc = np.asarray(desc).reshape(len(desc), 1)
-        if amide:  # if sequences are amidated, subtract 0.98 from calculated MW (OH - NH2)
-            desc = [d - 0.98 for d in desc]
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-            self.featurenames.append('MW')
-        else:
-            self.descriptor = np.array(desc)
-            self.featurenames = ['MW']
-
-    def calculate_charge(self, ph=7.0, amide=False, append=False):
-        """Method to overall charge of every sequence in the attribute :py:attr:`sequences`.
-
-        The method used is first described by Bjellqvist. In the case of amidation, the value for the 'Cterm' pKa is 15
-        (and Cterm is added to the pos_pKs dictionary.
-        The pKa scale is extracted from: http://www.hbcpnetbase.com/ (CRC Handbook of Chemistry and Physics, 96th ed).
-
-        **pos_pKs** = {'Nterm': 9.38, 'K': 10.67, 'R': 12.10, 'H': 6.04}
-
-        **neg_pKs** = {'Cterm': 2.15, 'D': 3.71, 'E': 4.15, 'C': 8.14, 'Y': 10.10}
-
-        :param ph: {float} ph at which to calculate peptide charge.
-        :param amide: {boolean} whether the sequences have an amidated C-terminus.
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: array of descriptor values in the attribute :py:attr:`descriptor`
-        :Example:
-
-        >>> desc = GlobalDescriptor('KLAKFGKRSELVALSG')
-        >>> desc.calculate_charge(ph=7.4, amide=True)
-        >>> desc.descriptor
-        array([[ 3.989]])
-        """
-
-        desc = []
-        for seq in self.sequences:
-            desc.append(_charge(seq, ph, amide))  # calculate charge with helper function
-        desc = np.asarray(desc).reshape(len(desc), 1)
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-            self.featurenames.append('Charge')
-        else:
-            self.descriptor = np.array(desc)
-            self.featurenames = ['Charge']
-
-    def charge_density(self, ph=7.0, amide=False, append=False):
-        """Method to calculate the charge density (charge / MW) of every sequences in the attributes :py:attr:`sequences`
-
-        :param ph: {float} pH at which to calculate peptide charge.
-        :param amide: {boolean} whether the sequences have an amidated C-terminus.
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: array of descriptor values in the attribute :py:attr:`descriptor`.
-        :Example:
-
-        >>> desc = GlobalDescriptor('GNSDLLIEQRTLLASDEF')
-        >>> desc.charge_density(ph=6, amide=True)
-        >>> desc.descriptor
-        array([[-0.00097119]])
-        """
-        self.calculate_charge(ph, amide)
-        charges = self.descriptor
-        self.calculate_MW(amide)
-        masses = self.descriptor
-        desc = charges / masses
-        desc = np.asarray(desc).reshape(len(desc), 1)
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-            self.featurenames.append('ChargeDensity')
-        else:
-            self.descriptor = np.array(desc)
-            self.featurenames = ['ChargeDensity']
-
-    def isoelectric_point(self, amide=False, append=False):
-        """
-        Method to calculate the isoelectric point of every sequence in the attribute :py:attr:`sequences`.
-        The pK scale is extracted from: http://www.hbcpnetbase.com/ (CRC Handbook of Chemistry and Physics, 96th ed).
-
-         **pos_pKs** = {'Nterm': 9.38, 'K': 10.67, 'R': 12.10, 'H': 6.04}
-
-         **neg_pKs** = {'Cterm': 2.15, 'D': 3.71, 'E': 4.15, 'C': 8.14, 'Y': 10.10}
-
-        :param amide: {boolean} whether the sequences have an amidated C-terminus.
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: array of descriptor values in the attribute :py:attr:`descriptor`
-        :Example:
-
-        >>> desc = GlobalDescriptor('KLFDIKFGHIPQRST')
-        >>> desc.isoelectric_point()
-        >>> desc.descriptor
-        array([[ 10.6796875]])
-        """
-        ph, ph1, ph2 = float(), float(), float()
-        desc = []
-        for seq in self.sequences:
-
-            # Bracket between ph1 and ph2
-            ph = 7.0
-            charge = _charge(seq, ph, amide)
-            if charge > 0.0:
-                ph1 = ph
-                charge1 = charge
-                while charge1 > 0.0:
-                    ph = ph1 + 1.0
-                    charge = _charge(seq, ph, amide)
-                    if charge > 0.0:
-                        ph1 = ph
-                        charge1 = charge
-                    else:
-                        ph2 = ph
-                        break
-            else:
-                ph2 = ph
-                charge2 = charge
-                while charge2 < 0.0:
-                    ph = ph2 - 1.0
-                    charge = _charge(seq, ph, amide)
-                    if charge < 0.0:
-                        ph2 = ph
-                        charge2 = charge
-                    else:
-                        ph1 = ph
-                        break
-            # Bisection
-            while ph2 - ph1 > 0.0001 and charge != 0.0:
-                ph = (ph1 + ph2) / 2.0
-                charge = _charge(seq, ph, amide)
-                if charge > 0.0:
-                    ph1 = ph
-                else:
-                    ph2 = ph
-            desc.append(ph)
-        desc = np.asarray(desc).reshape(len(desc), 1)
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-            self.featurenames.append('pI')
-        else:
-            self.descriptor = np.array(desc)
-            self.featurenames = ['pI']
-
-    def instability_index(self, append=False):
-        """
-        Method to calculate the instability of every sequence in the attribute :py:attr:`sequences`.
-        The instability index is a prediction of protein stability based on the amino acid composition.
-        ([1] K. Guruprasad, B. V Reddy, M. W. Pandit, Protein Eng. 1990, 4, 155–161.)
-
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: array of descriptor values in the attribute :py:attr:`descriptor`
-        :Example:
-
-        >>> desc = GlobalDescriptor('LLASMNDLLAKRST')
-        >>> desc.instability_index()
-        >>> desc.descriptor
-        array([[ 63.95714286]])
-        """
-
-        desc = []
-        dimv = load_scale('instability')[1]
-        for seq in self.sequences:
-            stabindex = float()
-            for i in range(len(seq) - 1):
-                stabindex += dimv[seq[i]][seq[i+1]]
-            desc.append((10.0 / len(seq)) * stabindex)
-        desc = np.asarray(desc).reshape(len(desc), 1)
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-            self.featurenames.append('InstabilityInd')
-        else:
-            self.descriptor = np.array(desc)
-            self.featurenames = ['InstabilityInd']
-
-    def aromaticity(self, append=False):
-        """
-        Method to calculate the aromaticity of every sequence in the attribute :py:attr:`sequences`.
-        According to Lobry, 1994, it is simply the relative frequency of Phe+Trp+Tyr.
-
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: array of descriptor values in the attribute :py:attr:`descriptor`
-        :Example:
-
-        >>> desc = GlobalDescriptor('GLFYWRFFLQRRFLYWW')
-        >>> desc.aromaticity()
-        >>> desc.descriptor
-        array([[ 0.52941176]])
-        """
-        desc = []
-        for seq in self.sequences:
-            f = seq.count('F')
-            w = seq.count('W')
-            y = seq.count('Y')
-            desc.append(float(f + w + y) / len(seq))
-        desc = np.asarray(desc).reshape(len(desc), 1)
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-            self.featurenames.append('Aromaticity')
-        else:
-            self.descriptor = np.array(desc)
-            self.featurenames = ['Aromaticity']
-
-    def aliphatic_index(self, append=False):
-        """
-        Method to calculate the aliphatic index of every sequence in the attribute :py:attr:`sequences`.
-        According to Ikai, 1980, the aliphatic index is a measure of thermal stability of proteins and is dependant
-        on the relative volume occupied by aliphatic amino acids (A,I,L & V).
-        ([1] A. Ikai, J. Biochem. 1980, 88, 1895–1898.)
-
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: array of descriptor values in the attribute :py:attr:`descriptor`
-        :Example:
-
-        >>> desc = GlobalDescriptor('KWLKYLKKLAKLVK')
-        >>> desc.aliphatic_index()
-        >>> desc.descriptor
-        array([[ 139.28571429]])
-        """
-        desc = []
-        aa_dict = aa_weights()
-        for seq in self.sequences:
-            d = {aa: seq.count(aa) for aa in aa_dict.keys()}  # count aa
-            d = {k: (float(d[k]) / len(seq)) * 100 for k in d.keys()}  # get mole percent of all AA
-            desc.append(d['A'] + 2.9 * d['V'] + 3.9 * (d['I'] + d['L']))  # formula for calculating the AI (Ikai, 1980)
-        desc = np.asarray(desc).reshape(len(desc), 1)
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-            self.featurenames.append('AliphaticInd')
-        else:
-            self.descriptor = np.array(desc)
-            self.featurenames = ['AliphaticInd']
-
-    def boman_index(self, append=False):
-        """Method to calculate the boman index of every sequence in the attribute :py:attr:`sequences`.
-        According to Boman, 2003, the boman index is a measure for protein-protein interactions and is calculated by
-        summing over all amino acid free energy of transfer [kcal/mol] between water and cyclohexane,[2] followed by
-        dividing by    sequence length.
-        ([1] H. G. Boman, D. Wade, I. a Boman, B. Wåhlin, R. B. Merrifield, *FEBS Lett*. **1989**, *259*, 103–106.
-        [2] A. Radzick, R. Wolfenden, *Biochemistry* **1988**, *27*, 1664–1670.)
-
-        .. seealso:: :py:func:`modlamp.core.aa_energies()`
-
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: array of descriptor values in the attribute :py:attr:`descriptor`
-        :Example:
-
-        >>> desc = GlobalDescriptor('GLFDIVKKVVGALGSL')
-        >>> desc.boman_index()
-        >>> desc.descriptor
-        array([[-1.011875]])
-        """
-        d = aa_energies()
-        desc = []
-        for seq in self.sequences:
-            val = []
-            for a in seq:
-                val.append(d[a])
-            desc.append(sum(val) / len(val))
-        desc = np.asarray(desc).reshape(len(desc), 1)
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-            self.featurenames.append('BomanInd')
-        else:
-            self.descriptor = np.array(desc)
-            self.featurenames = ['BomanInd']
-
-    def hydrophobic_ratio(self, append=False):
-        """
-        Method to calculate the hydrophobic ratio of every sequence in the attribute :py:attr:`sequences`, which is the
-        relative frequency of the amino acids **A,C,F,I,L,M & V**.
-
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: array of descriptor values in the attribute :py:attr:`descriptor`
-        :Example:
-
-        >>> desc = GlobalDescriptor('VALLYWRTVLLAIII')
-        >>> desc.hydrophobic_ratio()
-        >>> desc.descriptor
-        array([[ 0.73333333]])
-        """
-        desc = []
-        aa_dict = aa_weights()
-        for seq in self.sequences:
-            pa = {aa: seq.count(aa) for aa in aa_dict.keys()}  # count aa
-            # formula for calculating the AI (Ikai, 1980):
-            desc.append((pa['A'] + pa['C'] + pa['F'] + pa['I'] + pa['L'] + pa['M'] + pa['V']) / float(len(seq)))
-        desc = np.asarray(desc).reshape(len(desc), 1)
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-            self.featurenames.append('HydrophRatio')
-        else:
-            self.descriptor = np.array(desc)
-            self.featurenames = ['HydrophRatio']
-
-    def calculate_all(self, ph=7.4, amide=True):
-        """Method combining all global descriptors and appending them into the feature matrix in the attribute
-        :py:attr:`descriptor`.
-
-        :param ph: {float} pH at which to calculate peptide charge
-        :param amide: {boolean} whether the sequences have an amidated C-terminus.
-        :return: array of descriptor values in the attribute :py:attr:`descriptor`
-        :Example:
-
-        >>> desc = GlobalDescriptor('AFGHFKLKKLFIFGHERT')
-        >>> desc.calculate_all(amide=True)
-        >>> desc.featurenames
-        ['Length', 'MW', 'ChargeDensity', 'pI', 'InstabilityInd', 'Aromaticity', 'AliphaticInd', 'BomanInd', 'HydRatio']
-        >>> desc.descriptor
-        array([[ 18.,  2.17559000e+03,   1.87167619e-03,   1.16757812e+01, ...  1.10555556e+00,   4.44444444e-01]])
-        >>> desc.save_descriptor('/path/to/outputfile.csv')  # save the descriptor data (with feature names header)
-        """
-
-        # This is a strange way of doing it. However, the append=True option excludes length and charge, no idea why!
-        fn = []
-        self.length()  # sequence length
-        l = self.descriptor
-        fn.extend(self.featurenames)
-        self.calculate_MW(amide=amide)  # molecular weight
-        mw = self.descriptor
-        fn.extend(self.featurenames)
-        self.calculate_charge(ph=ph, amide=amide)  # net charge
-        c = self.descriptor
-        fn.extend(self.featurenames)
-        self.charge_density(ph=ph, amide=amide)  # charge density
-        cd = self.descriptor
-        fn.extend(self.featurenames)
-        self.isoelectric_point(amide=amide)  # pI
-        pi = self.descriptor
-        fn.extend(self.featurenames)
-        self.instability_index()  # instability index
-        si = self.descriptor
-        fn.extend(self.featurenames)
-        self.aromaticity()  # global aromaticity
-        ar = self.descriptor
-        fn.extend(self.featurenames)
-        self.aliphatic_index()  # aliphatic index
-        ai = self.descriptor
-        fn.extend(self.featurenames)
-        self.boman_index()  # Boman index
-        bi = self.descriptor
-        fn.extend(self.featurenames)
-        self.hydrophobic_ratio()  # Hydrophobic ratio
-        hr = self.descriptor
-        fn.extend(self.featurenames)
-
-        self.descriptor = np.concatenate((l, mw, c, cd, pi, si, ar, ai, bi, hr), axis=1)
-        self.featurenames = fn
-
-
-class PeptideDescriptor(BaseDescriptor):
-    """Base class for peptide descriptors. The following **amino acid descriptor scales** are available for descriptor
-    calculation:
-
-    - **AASI**           (An amino acid selectivity index scale for helical antimicrobial peptides, *[1] D. Juretić, D. Vukicević, N. Ilić, N. Antcheva, A. Tossi, J. Chem. Inf. Model. 2009, 49, 2873–2882.*)
-    - **ABHPRK**         (modlabs inhouse physicochemical feature scale (Acidic, Basic, Hydrophobic, Polar, aRomatic, Kink-inducer)
-    - **argos**          (Argos hydrophobicity amino acid scale, *[2] Argos, P., Rao, J. K. M. & Hargrave, P. A., Eur. J. Biochem. 2005, 128, 565–575.*)
-    - **bulkiness**      (Amino acid side chain bulkiness scale, *[3] J. M. Zimmerman, N. Eliezer, R. Simha, J. Theor. Biol. 1968, 21, 170–201.*)
-    - **charge_phys**    (Amino acid charge at pH 7.0 - Hystidine charge +0.1.)
-    - **charge_acid**    (Amino acid charge at acidic pH - Hystidine charge +1.0.)
-    - **cougar**         (modlabs inhouse selection of global peptide descriptors)
-    - **eisenberg**      (the Eisenberg hydrophobicity consensus amino acid scale, *[4] D. Eisenberg, R. M. Weiss, T. C. Terwilliger, W. Wilcox, Faraday Symp. Chem. Soc. 1982, 17, 109.*)
-    - **Ez**             (potential that assesses energies of insertion of amino acid side chains into lipid bilayers, *[5] A. Senes, D. C. Chadi, P. B. Law, R. F. S. Walters, V. Nanda, W. F. DeGrado, J. Mol. Biol. 2007, 366, 436–448.*)
-    - **flexibility**    (amino acid side chain flexibilitiy scale, *[6] R. Bhaskaran, P. K. Ponnuswamy, Int. J. Pept. Protein Res. 1988, 32, 241–255.*)
-    - **grantham**       (amino acid side chain composition, polarity and molecular volume, *[8] Grantham, R. Science. 185, 862–864 (1974).*)
-    - **gravy**          (GRAVY hydrophobicity amino acid scale, *[9] J. Kyte, R. F. Doolittle, J. Mol. Biol. 1982, 157, 105–132.*)
-    - **hopp-woods**     (Hopp-Woods amino acid hydrophobicity scale,*[10] T. P. Hopp, K. R. Woods, Proc. Natl. Acad. Sci. 1981, 78, 3824–3828.*)
-    - **ISAECI**         (Isotropic Surface Area (ISA) and Electronic Charge Index (ECI) of amino acid side chains, *[11] E. R. Collantes, W. J. Dunn, J. Med. Chem. 1995, 38, 2705–2713.*)
-    - **janin**          (Janin hydrophobicity amino acid scale, *[12] J. L. Cornette, K. B. Cease, H. Margalit, J. L. Spouge, J. A. Berzofsky, C. DeLisi, J. Mol. Biol. 1987, 195, 659–685.*)
-    - **kytedoolittle**  (Kyte & Doolittle hydrophobicity amino acid scale, *[13] J. Kyte, R. F. Doolittle, J. Mol. Biol. 1982, 157, 105–132.*)
-    - **levitt_alpha**   (Levitt amino acid alpha-helix propensity scale, extracted from http://web.expasy.org/protscale. *[14] M. Levitt, Biochemistry 1978, 17, 4277-4285.*)
-    - **MSS**            (A graph-theoretical index that reflects topological shape and size of amino acid side chains, *[15] C. Raychaudhury, A. Banerjee, P. Bag, S. Roy, J. Chem. Inf. Comput. Sci. 1999, 39, 248–254.*)
-    - **MSW**            (Amino acid scale based on a PCA of the molecular surface based WHIM descriptor (MS-WHIM), extended to natural amino acids, *[16] A. Zaliani, E. Gancia, J. Chem. Inf. Comput. Sci 1999, 39, 525–533.*)
-    - **pepArc**         (modlabs pharmacophoric feature scale, dimensions are: hydrophobicity, polarity, positive charge, negative charge, proline.)
-    - **pepcats**        (modlabs pharmacophoric feature based PEPCATS scale, *[17] C. P. Koch, A. M. Perna, M. Pillong, N. K. Todoroff, P. Wrede, G. Folkers, J. A. Hiss, G. Schneider, PLoS Comput. Biol. 2013, 9, e1003088.*)
-    - **polarity**       (Amino acid polarity scale, *[18] J. M. Zimmerman, N. Eliezer, R. Simha, J. Theor. Biol. 1968, 21, 170–201.*)
-    - **PPCALI**         (modlabs inhouse scale derived from a PCA of 143 amino acid property scales, *[19] C. P. Koch, A. M. Perna, M. Pillong, N. K. Todoroff, P. Wrede, G. Folkers, J. A. Hiss, G. Schneider, PLoS Comput. Biol. 2013, 9, e1003088.*)
-    - **refractivity**   (Relative amino acid refractivity values, *[20] T. L. McMeekin, M. Wilensky, M. L. Groves, Biochem. Biophys. Res. Commun. 1962, 7, 151–156.*)
-    - **t_scale**        (A PCA derived scale based on amino acid side chain properties calculated with 6 different probes of the GRID program, *[21] M. Cocchi, E. Johansson, Quant. Struct. Act. Relationships 1993, 12, 1–8.*)
-    - **TM_tend**        (Amino acid transmembrane propensity scale, extracted from http://web.expasy.org/protscale, *[22] Zhao, G., London E. Protein Sci. 2006, 15, 1987-2001.*)
-    - **z3**             (The original three dimensional Z-scale, *[23] S. Hellberg, M. Sjöström, B. Skagerberg, S. Wold, J. Med. Chem. 1987, 30, 1126–1135.*)
-    - **z5**             (The extended five dimensional Z-scale, *[24] M. Sandberg, L. Eriksson, J. Jonsson, M. Sjöström, S. Wold, J. Med. Chem. 1998, 41, 2481–2491.*)
-
-    Further, amino acid scale independent methods can be calculated with help of the :class:`GlobalDescriptor` class.
-
-    """
-
-    def __init__(self, seqs, scalename='Eisenberg'):
-        """
-        :param seqs: a .fasta file with sequences, a list of sequences or a single sequence as string to calculate the
-            descriptor values for.
-        :param scalename: {str} name of the amino acid scale (one of the given list above) used to calculate the
-            descriptor values
-        :return: initialized attributes :py:attr:`sequences`, :py:attr:`names` and dictionary :py:attr:`scale` with
-            amino acid scale values of the scale name in :py:attr:`scalename`.
-        :Example:
-
-        >>> AMP = PeptideDescriptor('KLLKLLKKLLKLLK','pepcats')
-        >>> AMP.sequences
-        ['KLLKLLKKLLKLLK']
-        >>> seqs = PeptideDescriptor('/Path/to/file.fasta', 'eisenberg')  # load sequences from .fasta file
-        >>> seqs.sequences
-        ['AFDGHLKI','KKLQRSDLLRTK','KKLASCNNIPPR'...]
-        """
-        super(PeptideDescriptor, self).__init__(seqs)
-        self.scalename, self.scale = load_scale(scalename.lower())
-        self.all_moms = list()  # for passing hydrophobic moments to calculate_profile
-        self.all_globs = list()  # for passing global  to calculate_profile
-
-    def load_scale(self, scalename):
-        """Method to load amino acid values from a given scale
-
-        :param scalename: {str} name of the amino acid scale to be loaded.
-        :return: loaded amino acid scale values in a dictionary in the attribute :py:attr:`scale`.
-
-        .. seealso:: :func:`modlamp.core.load_scale()`
-        """
-        self.scalename, self.scale = load_scale(scalename.lower())
-
-    def calculate_autocorr(self, window, append=False):
-        """Method for auto-correlating the amino acid values for a given descriptor scale
-
-        :param window: {int} correlation window for descriptor calculation in a sliding window approach
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: calculated descriptor numpy.array in the attribute :py:attr:`descriptor`.
-        :Example:
-
-        >>> AMP = PeptideDescriptor('GLFDIVKKVVGALGSL','PPCALI')
-        >>> AMP.calculate_autocorr(7)
-        >>> AMP.descriptor
-        array([[  1.28442339e+00,   1.29025116e+00,   1.03240901e+00, .... ]])
-        >>> AMP.descriptor.shape
-        (1, 133)
-
-        .. versionchanged:: v.2.3.0
-        """
-        desc = Parallel(n_jobs=-1)(delayed(_one_autocorr)(seq, window, self.scale) for seq in self.sequences)
-
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-        else:
-            self.descriptor = np.array(desc)
-
-    def calculate_crosscorr(self, window, append=False):
-        """Method for cross-correlating the amino acid values for a given descriptor scale
-
-        :param window: {int} correlation window for descriptor calculation in a sliding window approach
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: calculated descriptor numpy.array in the attribute :py:attr:`descriptor`.
-        :Example:
-
-        >>> AMP = PeptideDescriptor('GLFDIVKKVVGALGSL','pepcats')
-        >>> AMP.calculate_crosscorr(7)
-        >>> AMP.descriptor
-        array([[ 0.6875    ,  0.46666667,  0.42857143,  0.61538462,  0.58333333, ... ]])
-        >>> AMP.descriptor.shape
-        (1, 147)
-        """
-        desc = Parallel(n_jobs=-1)(delayed(_one_crosscorr)(seq, window, self.scale) for seq in self.sequences)
-
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-        else:
-            self.descriptor = np.array(desc)
-
-    def calculate_moment(self, window=1000, angle=100, modality='max', append=False):
-        """Method for calculating the maximum or mean moment of the amino acid values for a given descriptor scale and
-        window.
-
-        :param window: {int} amino acid window in which to calculate the moment. If the sequence is shorter than the
-            window, the length of the sequence is taken. So if the default window of 1000 is chosen, for all sequences
-            shorter than 1000, the **global** hydrophobic moment will be calculated. Otherwise, the maximal
-            hydrophiobic moment for the chosen window size found in the sequence will be returned.
-        :param angle: {int} angle in which to calculate the moment. **100** for alpha helices, **180** for beta sheets.
-        :param modality: {'all', 'max' or 'mean'} Calculate respectively maximum or mean hydrophobic moment. If all,
-            moments for all windows are returned.
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: Calculated descriptor as a numpy.array in the attribute :py:attr:`descriptor` and all possible global
-            values in :py:attr:`all_moms` (needed for the :py:func:`calculate_profile` method)
-        :Example:
-
-        >>> AMP = PeptideDescriptor('GLFDIVKKVVGALGSL', 'eisenberg')
-        >>> AMP.calculate_moment()
-        >>> AMP.descriptor
-        array([[ 0.48790226]])
-        """
-        if self.scale['A'] == list:
-            print('\n Descriptor moment calculation is only possible for one dimensional descriptors.\n')
-
-        else:
-            desc = []
-            for seq in self.sequences:
-                wdw = min(window, len(seq))  # if sequence is shorter than window, take the whole sequence instead
-                mtrx = []
-                mwdw = []
-
-                for aa in range(len(seq)):
-                    mtrx.append(self.scale[str(seq[aa])])
-
-                for i in range(len(mtrx) - wdw + 1):
-                    mwdw.append(sum(mtrx[i:i + wdw], []))
-
-                mwdw = np.asarray(mwdw)
-                rads = angle * (np.pi / 180) * np.asarray(range(wdw))  # calculate actual moment (radial)
-                vcos = (mwdw * np.cos(rads)).sum(axis=1)
-                vsin = (mwdw * np.sin(rads)).sum(axis=1)
-                moms = np.sqrt(vsin ** 2 + vcos ** 2) / wdw
-
-                if modality == 'max':  # take window with maximal value
-                    moment = np.max(moms)
-                elif modality == 'mean':  # take average value over all windows
-                    moment = np.mean(moms)
-                elif modality == 'all':
-                    moment = moms
-                else:
-                    print('\nERROR!\nModality parameter is wrong, please choose between "all", "max" and "mean".\n')
-                    return
-                desc.append(moment)
-                self.all_moms.append(moms)
-
-            desc = np.asarray(desc).reshape(len(desc), 1)  # final descriptor array
-
-            if append:
-                self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-            else:
-                self.descriptor = np.array(desc)
-
-    def calculate_global(self, window=1000, modality='max', append=False):
-        """Method for calculating a global / window averaging descriptor value of a given AA scale
-
-        :param window: {int} amino acid window in which to calculate the moment. If the sequence is shorter than the
-            window, the length of the sequence is taken.
-        :param modality: {'max' or 'mean'} Calculate respectively maximum or mean hydrophobic moment.
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: Calculated descriptor as a numpy.array in the attribute :py:attr:`descriptor` and all possible global
-            values in :py:attr:`all_globs` (needed for the :py:func:`calculate_profile` method)
-        :Example:
-
-        >>> AMP = PeptideDescriptor('GLFDIVKKVVGALGSL','eisenberg')
-        >>> AMP.calculate_global(window=1000, modality='max')
-        >>> AMP.descriptor
-        array([[ 0.44875]])
-        """
-        desc = list()
-        for n, seq in enumerate(self.sequences):
-            wdw = min(window, len(seq))  # if sequence is shorter than window, take the whole sequence instead
-            mtrx = []
-            mwdw = []
-
-            for l in range(len(seq)):  # translate AA sequence into values
-                mtrx.append(self.scale[str(seq[l])])
-
-            for i in range(len(mtrx) - wdw + 1):
-                mwdw.append(sum(mtrx[i:i + wdw], []))  # list of all the values for the different windows
-
-            mwdw = np.asarray(mwdw)
-            glob = np.sum(mwdw, axis=1) / float(wdw)
-            outglob = float()
-
-            if modality in ['max', 'mean']:
-                if modality == 'max':
-                    outglob = np.max(glob)  # returned moment will be the maximum of all windows
-                elif modality == 'mean':
-                    outglob = np.mean(glob)  # returned moment will be the mean of all windows
-                else:
-                    print('Modality parameter is wrong, please choose between "max" and "mean"\n.')
-                    return
-            desc.append(outglob)
-            self.all_globs.append(glob)
-
-        desc = np.asarray(desc).reshape(len(desc), 1)
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-        else:
-            self.descriptor = np.array(desc)
-
-    def calculate_profile(self, prof_type='uH', window=7, append=False):
-        """Method for calculating hydrophobicity or hydrophobic moment profiles for given sequences and fitting for
-        slope and intercept. The hydrophobicity scale used is "eisenberg"
-
-        :param prof_type: prof_type of profile, available: 'H' for hydrophobicity or 'uH' for hydrophobic moment
-        :param window: {int} size of sliding window used (odd-numbered).
-        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
-            attribute :py:attr:`descriptor`.
-        :return: Fitted slope and intercept of calculated profile for every given sequence in the attribute
-            :py:attr:`descriptor`.
-        :Example:
-
-        >>> AMP = PeptideDescriptor('KLLKLLKKVVGALG','kytedoolittle')
-        >>> AMP.calculate_profile(prof_type='H')
-        >>> AMP.descriptor
-        array([[ 0.03731293,  0.19246599]])
-        """
-        if prof_type == 'uH':
-            self.calculate_moment(window=window)
-            y_vals = self.all_moms
-        elif prof_type == 'H':
-            self.calculate_global(window=window)
-            y_vals = self.all_globs
-        else:
-            print('prof_type parameter is unknown, choose "uH" for hydrophobic moment or "H" for hydrophobicity\n.')
-            sys.exit()
-
-        desc = list()
-        for n, seq in enumerate(self.sequences):
-            x_vals = range(len(seq))[int((window - 1) / 2):-int((window - 1) / 2)]
-            if len(seq) <= window:
-                slope, intercept, r_value, p_value, std_err = [0, 0, 0, 0, 0]
-            else:
-                slope, intercept, r_value, p_value, std_err = stats.linregress(x_vals, y_vals[n])
-            desc.append([slope, intercept])
-
-        if append:
-            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-        else:
-            self.descriptor = np.array(desc)
-
-    def calculate_arc(self, modality="max", append=False):
-        """ Method for calculating property arcs as seen in the helical wheel plot. Use for binary amino acid scales only.
-
-        :param modality: modality of the arc to calculate, to choose between "max" and "mean".
-        :param append: if true, append to current descriptor stored in the descriptor attribute.
-        :return: calculated descriptor as numpy.array in the descriptor attribute.
-
-        :Example:
-
-        >>> arc = PeptideDescriptor("KLLKLLKKLLKLLK", scalename="peparc")
-        >>> arc.calculate_arc(modality="max", append=False)
-        >>> arc.descriptor
-        array([[200, 160, 160,   0,   0]])
-        """
-        desc = Parallel(n_jobs=-1)(delayed(_one_arc)(seq, modality, self.scale) for seq in self.sequences)
-
-        # Converts each of the amino acids to descriptor vector
-        for seq in self.sequences:
-
-            # desc_mat = []
-            # for aa in seq:
-            #     desc_mat.append(self.scale[aa])
-            # desc_mat = np.asarray(desc_mat)
-			#
-            # # Check descriptor dimension
-            # desc_dim = desc_mat.shape[1]
-			#
-            # # list to store descriptor values for all windows
-            # allwindows_arc = []
-			#
-            # if len(seq) > 18:
-            #     window = 18
-            #     # calculates number of windows in sequence
-            #     num_windows = len(seq) - window
-            # else:
-            #     window = len(seq)
-            #     num_windows = 1
-			#
-            # # loop through all windows
-            # for j in range(num_windows):
-            #     # slices descriptor matrix into current window
-            #     window_mat = desc_mat[j:j + window, :]
-			#
-            #     # defines order of amino acids in helical projection
-            #     order = [0, 11, 4, 15, 8, 1, 12, 5, 16, 9, 2, 13, 6, 17, 10, 3, 14, 7]
-			#
-            #     # orders window descriptor matrix into helical projection order
-            #     ordered = []
-            #     for pos in order:
-            #         try:
-            #             ordered.append(window_mat[pos, :])
-            #         except:
-            #             # for sequences of len < 18 adding dummy vector with 2s, length of descriptor dimensions
-            #             ordered.append([2] * desc_dim)
-            #     ordered = np.asarray(ordered)
-			#
-            #     window_arc = []
-			#
-            #     # loop through pharmacophoric features
-            #     for m in range(desc_dim):
-            #         all_arcs = []  # stores all arcs that can be found of a pharmacophoric feature
-            #         arc = 0
-			#
-            #         for n in range(18):  # for all positions in helix, regardless of sequence length
-            #             if ordered[n, m] == 0:  # if position does not contain pharmacophoric feature
-            #                 all_arcs.append(arc)  # append previous arc to all arcs list
-            #                 arc = 0  # arc is initialized
-            #             elif ordered[n, m] == 1:  # if position contains pharmacophoric feature(PF), elongate arc by 20°
-            #                 arc += 20
-            #             elif ordered[n, m] == 2:  # if position doesn't contain amino acid:
-            #                 if ordered[n - 1, m] == 1:  # if previous position contained PF add 10°
-            #                     arc += 10
-            #                 elif ordered[n - 1, m] == 0:  # if previous position didn't contain PF don't add anything
-            #                     arc += 0
-            #                 elif ordered[
-            #                             n - 2, m] == 1:  # if previous position is empty then check second previous for PF
-            #                     arc += 10
-            #                 if n == 17:  # if we are at the last position check for position n=0 instead of next position.
-            #                     if ordered[0, m] == 1:  # if it contains PF add 10° extra
-            #                         arc += 10
-            #                 else:  # if next position contains PF add 10° extra
-            #                     if ordered[n + 1, m] == 1:
-            #                         arc += 10
-            #                     elif ordered[n + 1, m] == 0:
-            #                         arc += 0
-            #                     else:  # if next position is empty check for 2nd next position
-            #                         if n == 16:
-            #                             if ordered[0, m] == 1:
-            #                                 arc += 10
-            #                         else:
-            #                             if ordered[n + 2, m] == 1:
-            #                                 arc += 10
-			#
-            #         all_arcs.append(arc)
-            #         if not arc == 360:
-            #             arc0 = all_arcs.pop() + all_arcs[0]  # join first and last arc together
-            #             all_arcs = [arc0] + all_arcs[1:]
-			#
-            #         window_arc.append(np.max(all_arcs))  # append to window arcs the maximum arc of this PF
-            #     allwindows_arc.append(window_arc)  # append all PF arcs of this window
-			#
-            # allwindows_arc = np.asarray(allwindows_arc)
-			#
-            # if modality == 'max':
-            #     final_arc = np.max(allwindows_arc, axis=0)  # calculate maximum / mean arc along all windows
-            # elif modality == 'mean':
-            #     final_arc = np.mean(allwindows_arc, axis=0)
-            # else:
-            #     print('modality is unknown, please choose between "max" and "mean"\n.')
-            #     sys.exit()
-
-            if append:
-                self.descriptor = np.hstack((self.descriptor, np.array(desc)))
-            else:
-                self.descriptor = np.array(desc)
-
-
-
-
-
-
-
-
--- a/cpt_helical_wheel/plotWheels/helical_wheel.py	Tue Jul 05 05:21:34 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,233 +0,0 @@
-import matplotlib
-matplotlib.use('Agg')
-
-import matplotlib.lines as lines
-import matplotlib.patches as patches
-import matplotlib.pyplot as plt
-#from mpl_toolkits.mplot3d import Axes3D
-import numpy as np
-from scipy.stats.kde import gaussian_kde
-
-from plotWheels.core import load_scale
-from plotWheels.descriptors import PeptideDescriptor
-
-def helical_wheel(sequence, colorcoding='rainbow', text_color=None,
-                  lineweights=True, filename=None, seq=False, moment=False,
-                  seqRange=1, t_size=32, rot=float(90), dpi=150, numbering=True):
-    """A function to project a given peptide sequence onto a helical wheel plot. It can be useful to illustrate the
-    properties of alpha-helices, like positioning of charged and hydrophobic residues along the sequence.
-
-    :param sequence: {str} the peptide sequence for which the helical wheel should be drawn
-    :param colorcoding: {str} the color coding to be used, available: *rainbow*, *charge*, *polar*, *simple*,
-        *amphipathic*, *custom_input*, *none*
-    :param lineweights: {boolean} defines whether connection lines decrease in thickness along the sequence
-    :param filename: {str} filename  where to save the plot. *default = None* --> show the plot
-    :param seq: {bool} whether the amino acid sequence should be plotted as a title
-    :param moment: {bool} whether the Eisenberg hydrophobic moment should be calculated and plotted
-    :param seqRange: {int} starting value of residue location in sequence
-    :param t_size: {int} text size
-    :param rot: {float} rotation by radians --> converted to degrees.
-    :param dpi: {int} dpi parameter for saved files
-    :return: a helical wheel projection plot of the given sequence (interactively or in **filename**)
-    :Example:
-
-    >>> helical_wheel('GLFDIVKKVVGALG')
-    >>> helical_wheel('KLLKLLKKLLKLLK', colorcoding='charge')
-    >>> helical_wheel('AKLWLKAGRGFGRG', colorcoding='none', lineweights=False)
-    >>> helical_wheel('ACDEFGHIKLMNPQRSTVWY')
-
-    .. image:: ../docs/static/wheel1.png
-        :height: 300px
-    .. image:: ../docs/static/wheel2.png
-        :height: 300px
-    .. image:: ../docs/static/wheel3.png
-        :height: 300px
-    .. image:: ../docs/static/wheel4.png
-        :height: 300px
-
-    .. versionadded:: v2.1.5
-    """
-    # color mappings
-    aa = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
-    if colorcoding == type(str):
-        f_rainbow = ['#3e3e28', '#ffcc33', '#b30047', '#b30047', '#ffcc33', '#3e3e28', '#80d4ff', '#ffcc33', '#0047b3',
-                    '#ffcc33', '#ffcc33', '#b366ff', '#29a329', '#b366ff', '#0047b3', '#ff66cc', '#ff66cc', '#ffcc33',
-                    '#ffcc33', '#ffcc33']
-        f_charge = ['#000000', '#000000', '#ff4d94', '#ff4d94', '#000000', '#000000', '#80d4ff', '#000000', '#80d4ff',
-                    '#000000', '#000000', '#000000', '#000000', '#000000', '#80d4ff', '#000000', '#000000', '#000000',
-                    '#000000', '#000000']
-        f_polar = ['#000000', '#000000', '#80d4ff', '#80d4ff', '#000000', '#000000', '#80d4ff', '#000000', '#80d4ff',
-                '#000000', '#000000', '#80d4ff', '#000000', '#80d4ff', '#80d4ff', '#80d4ff', '#80d4ff', '#000000',
-                '#000000', '#000000']
-        f_simple = ['#ffcc33', '#ffcc33', '#0047b3', '#0047b3', '#ffcc33', '#7f7f7f', '#0047b3', '#ffcc33', '#0047b3',
-                    '#ffcc33', '#ffcc33', '#0047b3', '#ffcc33', '#0047b3', '#0047b3', '#0047b3', '#0047b3', '#ffcc33',
-                    '#ffcc33', '#ffcc33']
-        f_none = ['#ffffff'] * 20
-        f_amphi = ['#ffcc33', '#29a329', '#b30047', '#b30047', '#f79318', '#80d4ff', '#0047b3', '#ffcc33', '#0047b3',
-                '#ffcc33', '#ffcc33', '#80d4ff', '#29a329', '#80d4ff', '#0047b3', '#80d4ff', '#80d4ff', '#ffcc33',
-                '#f79318', '#f79318']
-        t_rainbow = ['w', 'k', 'w', 'w', 'k', 'w', 'k', 'k', 'w', 'k', 'k', 'k', 'k', 'k', 'w', 'k', 'k', 'k', 'k', 'k']
-        t_charge = ['w', 'w', 'k', 'k', 'w', 'w', 'k', 'w', 'k', 'w', 'w', 'w', 'w', 'w', 'k', 'w', 'w', 'w', 'w', 'w']
-        t_polar = ['w', 'w', 'k', 'k', 'w', 'w', 'k', 'w', 'k', 'w', 'w', 'k', 'w', 'k', 'k', 'k', 'k', 'w', 'w', 'w']
-        t_simple = ['k', 'k', 'w', 'w', 'k', 'w', 'w', 'k', 'w', 'k', 'k', 'k', 'k', 'w', 'w', 'w', 'w', 'k', 'k', 'k']
-        t_none = ['k'] * 20
-        t_amphi = ['k', 'k', 'w', 'w', 'w', 'k', 'w', 'k', 'w', 'k', 'k', 'k', 'w', 'k', 'w', 'k', 'k', 'k', 'w', 'w']
-        d_eisberg = load_scale('eisenberg')[1]  # eisenberg hydrophobicity values for HM
-    else:
-        f_custom = colorcoding
-        t_custom = text_color
-        d_eisberg = load_scale('eisenberg')[1]
-
-    if lineweights:
-        lw = np.arange(0.1, 5.5, 5. / (len(sequence) - 1))  # line thickness array
-        lw = lw[::-1]  # inverse order
-    else:
-        lw = [2.] * (len(sequence) - 1)
-    # check which color coding to use
-    if colorcoding == type(str):
-        if colorcoding == 'rainbow':
-            df = dict(zip(aa, f_rainbow))
-            dt = dict(zip(aa, t_rainbow))
-        elif colorcoding == 'charge':
-            df = dict(zip(aa, f_charge))
-            dt = dict(zip(aa, t_charge))
-        elif colorcoding == 'polar':
-            df = dict(zip(aa, f_polar))
-            dt = dict(zip(aa, t_polar))
-        elif colorcoding == 'simple':
-            df = dict(zip(aa, f_simple))
-            dt = dict(zip(aa, t_simple))
-        elif colorcoding == 'none':
-            df = dict(zip(aa, f_none))
-            dt = dict(zip(aa, t_none))
-        elif colorcoding == 'amphipathic':
-            df = dict(zip(aa, f_amphi))
-            dt = dict(zip(aa, t_amphi))
-        else:
-            print("Unknown color coding, 'rainbow' used instead")
-            df = dict(zip(aa, f_rainbow))
-            dt = dict(zip(aa, t_rainbow))
-    else:
-        df = dict(zip(aa, f_custom))
-        dt = dict(zip(aa, t_custom))
-
-    # degree to radian
-    deg = np.arange(float(len(sequence))) * -100.
-    deg = [d + rot for d in deg]  # start at 270 degree in unit circle (on top)
-    rad = np.radians(deg)
-
-    # dict for coordinates and eisenberg values
-    d_hydro = dict(zip(rad, [0.] * len(rad)))
-
-    # create figure
-    fig = plt.figure(frameon=False, figsize=(10, 10))
-    ax = fig.add_subplot(111)
-    old = None
-    hm = list()
-
-    # iterate over sequence
-    for i, r in enumerate(rad):
-        new = (np.cos(r), np.sin(r))  # new AA coordinates
-        if i < 18:
-            # plot the connecting lines
-            if old is not None:
-                line = lines.Line2D((old[0], new[0]), (old[1], new[1]), transform=ax.transData, color='k',
-                                    linewidth=lw[i - 1])
-                line.set_zorder(1)  # 1 = level behind circles
-                ax.add_line(line)
-        elif 17 < i < 36:
-            line = lines.Line2D((old[0], new[0]), (old[1], new[1]), transform=ax.transData, color='k',
-                                linewidth=lw[i - 1])
-            line.set_zorder(1)  # 1 = level behind circles
-            ax.add_line(line)
-            new = (np.cos(r) * 1.2, np.sin(r) * 1.2)
-        elif i == 36:
-            line = lines.Line2D((old[0], new[0]), (old[1], new[1]), transform=ax.transData, color='k',
-                                linewidth=lw[i - 1])
-            line.set_zorder(1)  # 1 = level behind circles
-            ax.add_line(line)
-            new = (np.cos(r) * 1.4, np.sin(r) * 1.4)
-        else:
-            new = (np.cos(r) * 1.4, np.sin(r) * 1.4)
-
-        # plot circles
-        circ = patches.Circle(new, radius=0.125, transform=ax.transData, edgecolor='k', facecolor=df[sequence[i]])
-        circ.set_zorder(2)  # level in front of lines
-        ax.add_patch(circ)
-
-        # check if N- or C-terminus and add subscript, then plot AA letter
-        if numbering:
-            size = t_size
-            if i == 0:
-                ax.text(new[0], new[1], sequence[i] + '$_N$', va='center', ha='center', transform=ax.transData,
-                        size=size, color=dt[sequence[i]], fontweight='bold')
-            elif i == len(sequence) - 1:
-                ax.text(new[0], new[1], sequence[i] + '$_C$', va='center', ha='center', transform=ax.transData,
-                        size=size, color=dt[sequence[i]], fontweight='bold')
-            else:
-                seqRange += 1
-                ax.text(new[0], new[1], sequence[i] + '$_{'+str(seqRange)+'}$', va='center', ha='center', transform=ax.transData,
-                        size=size, color=dt[sequence[i]], fontweight='bold')
-
-            eb = d_eisberg[sequence[i]][0]  # eisenberg value for this AA
-            hm.append([eb * new[0], eb * new[1]])  # save eisenberg hydrophobicity vector value to later calculate HM
-
-            old = (np.cos(r), np.sin(r))  # save as previous coordinates
-
-        else:
-            size = t_size
-            if i == 0:
-                ax.text(new[0], new[1], sequence[i] + '$_N$', va='center', ha='center', transform=ax.transData,
-                        size=size, color=dt[sequence[i]], fontweight='bold')
-            elif i == len(sequence) - 1:
-                ax.text(new[0], new[1], sequence[i] + '$_C$', va='center', ha='center', transform=ax.transData,
-                        size=size, color=dt[sequence[i]], fontweight='bold')
-            else:
-                ax.text(new[0], new[1], sequence[i], va='center', ha='center', transform=ax.transData,
-                        size=size, color=dt[sequence[i]], fontweight='bold')
-
-            eb = d_eisberg[sequence[i]][0]  # eisenberg value for this AA
-            hm.append([eb * new[0], eb * new[1]])  # save eisenberg hydrophobicity vector value to later calculate HM
-
-            old = (np.cos(r), np.sin(r))  # save as previous coordinates
-
-    # draw hydrophobic moment arrow if moment option
-    if moment:
-        v_hm = np.sum(np.array(hm), 0)
-        x = .0333 * v_hm[0]
-        y = .0333 * v_hm[1]
-        ax.arrow(0., 0., x, y, head_width=0.04, head_length=0.03, transform=ax.transData,
-                 color='k', linewidth=6.)
-        desc = PeptideDescriptor(sequence)  # calculate hydrophobic moment
-        desc.calculate_moment()
-        if abs(x) < 0.2 and y > 0.:  # right positioning of HM text so arrow does not cover it
-            z = -0.2
-        else:
-            z = 0.2
-        plt.text(0., z, str(round(desc.descriptor[0][0], 3)), fontdict={'fontsize': 20, 'fontweight': 'bold',
-                                                                        'ha': 'center'})
-
-    # plot shape
-    if len(sequence) < 19:
-        ax.set_xlim(-1.2, 1.2)
-        ax.set_ylim(-1.2, 1.2)
-    else:
-        ax.set_xlim(-1.4, 1.4)
-        ax.set_ylim(-1.4, 1.4)
-    ax.spines['right'].set_visible(False)
-    ax.spines['top'].set_visible(False)
-    ax.spines['left'].set_visible(False)
-    ax.spines['bottom'].set_visible(False)
-    cur_axes = plt.gca()
-    cur_axes.axes.get_xaxis().set_visible(False)
-    cur_axes.axes.get_yaxis().set_visible(False)
-    plt.tight_layout()
-
-    if seq:
-        plt.title(sequence, fontweight='bold', fontsize=20)
-
-    # show or save plot
-    if filename:
-        plt.savefig(filename, dpi=dpi)
-    else:
-        plt.show()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/generateHelicalWheel.py	Mon Jun 05 02:44:43 2023 +0000
@@ -0,0 +1,131 @@
+##
+
+import argparse
+from plotWheels.helical_wheel import helical_wheel
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate Helical Wheel")
+    parser.add_argument("--sequence", dest="sequence", type=str)
+    parser.add_argument("--seqRange", dest="seqRange", type=int, default=1)
+    parser.add_argument("--t_size", dest="t_size", type=int, default=32)
+    parser.add_argument("--rotation", dest="rotation", type=int, default=90)
+    parser.add_argument(
+        "--numbering", action="store_true", help="numbering for helical wheel"
+    )
+    parser.add_argument(
+        "--output",
+        dest="output",
+        type=argparse.FileType("wb"),
+        default="_helicalwheel.png",
+    )  # dest="output",default="_helicalwheel.png")
+    #### circle colors
+    parser.add_argument("--f_A", dest="f_A", default="#ffcc33")
+    parser.add_argument("--f_C", dest="f_C", default="#b5b5b5")
+    parser.add_argument("--f_D", dest="f_D", default="#db270f")
+    parser.add_argument("--f_E", dest="f_E", default="#db270f")
+    parser.add_argument("--f_F", dest="f_F", default="#ffcc33")
+    parser.add_argument("--f_G", dest="f_G", default="#b5b5b5")
+    parser.add_argument("--f_H", dest="f_H", default="#12d5fc")
+    parser.add_argument("--f_I", dest="f_I", default="#ffcc33")
+    parser.add_argument("--f_K", dest="f_K", default="#12d5fc")
+    parser.add_argument("--f_L", dest="f_L", default="#ffcc33")
+    parser.add_argument("--f_M", dest="f_M", default="#ffcc33")
+    parser.add_argument("--f_N", dest="f_N", default="#b5b5b5")
+    parser.add_argument("--f_P", dest="f_P", default="#ffcc33")
+    parser.add_argument("--f_Q", dest="f_Q", default="#b5b5b5")
+    parser.add_argument("--f_R", dest="f_R", default="#12d5fc")
+    parser.add_argument("--f_S", dest="f_S", default="#b5b5b5")
+    parser.add_argument("--f_T", dest="f_T", default="#b5b5b5")
+    parser.add_argument("--f_V", dest="f_V", default="#ffcc33")
+    parser.add_argument("--f_W", dest="f_W", default="#ffcc33")
+    parser.add_argument("--f_Y", dest="f_Y", default="#b5b5b5")
+    ### text colors
+    parser.add_argument("--t_A", dest="t_A", default="k")
+    parser.add_argument("--t_C", dest="t_C", default="k")
+    parser.add_argument("--t_D", dest="t_D", default="w")
+    parser.add_argument("--t_E", dest="t_E", default="w")
+    parser.add_argument("--t_F", dest="t_F", default="k")
+    parser.add_argument("--t_G", dest="t_G", default="k")
+    parser.add_argument("--t_H", dest="t_H", default="k")
+    parser.add_argument("--t_I", dest="t_I", default="k")
+    parser.add_argument("--t_K", dest="t_K", default="k")
+    parser.add_argument("--t_L", dest="t_L", default="k")
+    parser.add_argument("--t_M", dest="t_M", default="k")
+    parser.add_argument("--t_N", dest="t_N", default="k")
+    parser.add_argument("--t_P", dest="t_P", default="k")
+    parser.add_argument("--t_Q", dest="t_Q", default="k")
+    parser.add_argument("--t_R", dest="t_R", default="k")
+    parser.add_argument("--t_S", dest="t_S", default="k")
+    parser.add_argument("--t_T", dest="t_T", default="k")
+    parser.add_argument("--t_V", dest="t_V", default="k")
+    parser.add_argument("--t_W", dest="t_W", default="k")
+    parser.add_argument("--t_Y", dest="t_Y", default="k")
+
+    args = parser.parse_args()
+
+    # print(type(args.output))
+
+    f_colors = [
+        args.f_A,
+        args.f_C,
+        args.f_D,
+        args.f_E,
+        args.f_F,
+        args.f_G,
+        args.f_H,
+        args.f_I,
+        args.f_K,
+        args.f_L,
+        args.f_M,
+        args.f_N,
+        args.f_P,
+        args.f_Q,
+        args.f_R,
+        args.f_S,
+        args.f_T,
+        args.f_V,
+        args.f_W,
+        args.f_Y,
+    ]
+
+    t_colors = [
+        args.t_A,
+        args.t_C,
+        args.t_D,
+        args.t_E,
+        args.t_F,
+        args.t_G,
+        args.t_H,
+        args.t_I,
+        args.t_K,
+        args.t_L,
+        args.t_M,
+        args.t_N,
+        args.t_P,
+        args.t_Q,
+        args.t_R,
+        args.t_S,
+        args.t_T,
+        args.t_V,
+        args.t_W,
+        args.t_Y,
+    ]
+
+    colors = [f_colors, t_colors]
+
+    tmp_file = "./tmp.png"
+
+    helical_wheel(
+        sequence=args.sequence,
+        colorcoding=colors[0],
+        text_color=colors[1],
+        seqRange=args.seqRange,
+        t_size=args.t_size,
+        rot=args.rotation,
+        numbering=args.numbering,
+        filename=tmp_file,
+    )
+
+    with open("tmp.png", "rb") as f:
+        for line in f:
+            args.output.write(line)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/generateHelicalWheel.xml	Mon Jun 05 02:44:43 2023 +0000
@@ -0,0 +1,387 @@
+<tool id="edu.tamu.cpt2.helicalWheel.generateHelicalWheel" name="Helical Wheel" version="1.0">
+    <description>Generate and Plot a Protein Helical Wheel</description>
+    <macros>
+        <import>cpt-macros.xml</import>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements">
+        <requirement type="package">numpy</requirement>
+        <requirement type="package">pandas</requirement>
+        <requirement type="package" version="0.18.1">scikit-learn</requirement>
+        <requirement type="package">scipy</requirement>
+        <requirement type="package">matplotlib</requirement>
+    </expand>
+    <command detect_errors="aggressive"><![CDATA[
+python '$__tool_directory__/generateHelicalWheel.py'
+--sequence '$sequence'
+--seqRange '$seqRange'
+--t_size '$t_size'
+--rotation '$rotation'
+'$numbering'
+--f_A "$sec_B.f_A"
+--f_C "$sec_C.f_C"
+--f_D "$sec_D.f_D"
+--f_E "$sec_D.f_E"
+--f_F "$sec_B.f_F"
+--f_G "$sec_C.f_G"
+--f_H "$sec_E.f_H"
+--f_I "$sec_B.f_I"
+--f_K "$sec_E.f_K"
+--f_L "$sec_B.f_L"
+--f_M "$sec_B.f_M"
+--f_N "$sec_C.f_N"
+--f_P "$sec_B.f_P"
+--f_Q "$sec_C.f_Q"
+--f_R "$sec_E.f_R"
+--f_S "$sec_C.f_S"
+--f_T "$sec_C.f_T"
+--f_V "$sec_B.f_V"
+--f_W "$sec_B.f_W"
+--f_Y "$sec_C.f_Y"
+--t_A "$sec_B.t_A"
+--t_C "$sec_C.t_C"
+--t_D "$sec_D.t_D"
+--t_E "$sec_D.t_E"
+--t_F "$sec_B.t_F"
+--t_G "$sec_C.t_G"
+--t_H "$sec_E.t_H"
+--t_I "$sec_B.t_I"
+--t_K "$sec_E.t_K"
+--t_L "$sec_B.t_L"
+--t_M "$sec_B.t_M"
+--t_N "$sec_C.t_N"
+--t_P "$sec_B.t_P"
+--t_Q "$sec_C.t_Q"
+--t_R "$sec_E.t_R"
+--t_S "$sec_C.t_S"
+--t_T "$sec_C.t_T"
+--t_V "$sec_B.t_V"
+--t_W "$sec_B.t_W"
+--t_Y "$sec_C.t_Y"
+--output '$output'
+]]></command>
+    <inputs>
+        <param label="Paste in exact sequence to be plotted" name="sequence" type="text"/>
+        <param label="Label Start Number" name="seqRange" type="integer" value="1" help="starting residue number to use for labels"/>
+        <param label="Amino Acid Text Size" name="t_size" type="integer" value="32" help="Alters the Text Size. Default is 32"/>
+        <param label="Rotation" name="rotation" type="integer" value="90" help="Rotates the helical wheel. Default is 90"/>
+        <param label="Label Numbering Text" name="numbering" type="boolean" help="number schema subscripts" truevalue="--numbering" falsevalue=""/>
+        <section name="sec_B" title="nonpolar ; hydrophobic">
+            <param name="f_A" type="color" label="Color for A" value="#ffcc33">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_A" type="color" label="Text color for A" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_F" type="color" label="Color for F" value="#ffcc33">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_F" type="color" label="Text color for F" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_I" type="color" label="Color for I" value="#ffcc33">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_I" type="color" label="Text color for I" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_L" type="color" label="Color for L" value="#ffcc33">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_L" type="color" label="Text color for L" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_M" type="color" label="Color for M" value="#ffcc33">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_M" type="color" label="Text color for M" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_P" type="color" label="Color for P" value="#ffcc33">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_P" type="color" label="Text color for P" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_V" type="color" label="Color for V" value="#ffcc33">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_V" type="color" label="Text color for V" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_W" type="color" label="Color for W" value="#ffcc33">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_W" type="color" label="Text color for W" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+        </section>
+        <section name="sec_C" title="polar ; uncharged">
+            <param name="f_C" type="color" label="Color for C" value="#b5b5b5">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_C" type="color" label="Text color for C" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_G" type="color" label="Color for G" value="#b5b5b5">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_G" type="color" label="Text color for G" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_N" type="color" label="Color for N" value="#b5b5b5">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_N" type="color" label="Text color for N" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_Q" type="color" label="Color for Q" value="#b5b5b5">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_Q" type="color" label="Text color for Q" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_S" type="color" label="Color for S" value="#b5b5b5">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_S" type="color" label="Text color for S" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_T" type="color" label="Color for T" value="#b5b5b5">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_T" type="color" label="Text color for T" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_Y" type="color" label="Color for Y" value="#b5b5b5">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_Y" type="color" label="Text color for Y" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+        </section>
+        <section name="sec_D" title="polar ; acidic (negatively charged)">
+            <param name="f_D" type="color" label="Color for D" value="#db270f">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_D" type="color" label="Text color for D" value="#FFFFFF">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_E" type="color" label="Color for E" value="#db270f">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_E" type="color" label="Text color for E" value="#FFFFFF">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+        </section>
+        <section name="sec_E" title="polar ; basic (positive charge)">
+            <param name="f_H" type="color" label="Color for H" value="#12d5fc">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_H" type="color" label="Text color for H" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_K" type="color" label="Color for K" value="#12d5fc">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_K" type="color" label="Text color for K" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="f_R" type="color" label="Color for R" value="#12d5fc">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="t_R" type="color" label="Text color for R" value="#000000">
+                <sanitizer>
+                    <valid initial="string.ascii_letters,string.digits">
+                        <add value="#"/>
+                    </valid>
+                </sanitizer>
+            </param>
+        </section>
+    </inputs>
+    <outputs>
+        <data format="png" name="output" label="_helicalWheel.png"/>
+    </outputs>
+    <help><![CDATA[
+**What it does**
+INPUT : Peptide Sequence
+PARAMETERS :
+primary parameters :
+> Paste in exact sequence to be plotted - Input Sequence of desired helical wheel plot
+> Label Start Number - Numerical value that represents the beginning of the sequence (default 1)
+> Amino Acid Text Size - Size of text for helical wheel (default 32)
+> Rotation - Degrees to rotate helical wheel (defaul 90)
+color parameters :
+> Background Color and Text Color Selections
+METHOD : Using the core features from the modlAMP python module, a helical wheel projection is constructed.
+OUTPUT : _helicalWheel.png
+NOTES : Peptide lengths longer than 36 residues will not properly graph.
+]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btx285</citation>
+        <citation type="bibtex">
+        @unpublished{galaxyTools,
+            author = {C. Ross},
+            title = {CPT Galaxy Tools},
+            year = {2020-},
+            note = {https://github.com/tamu-cpt/galaxy-tools/}
+        }
+        </citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon Jun 05 02:44:43 2023 +0000
@@ -0,0 +1,74 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package">progressivemauve</requirement>
+            <!--<requirement type="package" version="2.7">python</requirement>-->
+            <requirement type="package" version="0.6.4">bcbiogff</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@WRAPPER_VERSION@">2.4.0</token>
+    <xml name="citation/progressive_mauve">
+        <citation type="doi">10.1371/journal.pone.0011147</citation>
+    </xml>
+    <xml name="citation/gepard">
+        <citation type="doi">10.1093/bioinformatics/btm039</citation>
+    </xml>
+    <token name="@XMFA_INPUT@">
+		'$xmfa'
+	</token>
+    <xml name="xmfa_input" token_formats="xmfa">
+        <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/>
+    </xml>
+    <token name="@XMFA_FA_INPUT@">
+		'$sequences'
+	</token>
+    <xml name="xmfa_fa_input">
+        <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/>
+    </xml>
+    <xml name="genome_selector">
+        <conditional name="reference_genome">
+            <param name="reference_genome_source" type="select" label="Reference Genome">
+                <option value="history" selected="True">From History</option>
+                <option value="cached">Locally Cached</option>
+            </param>
+            <when value="cached">
+                <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+                    <options from_data_table="all_fasta"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="gff3_input">
+        <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+    </xml>
+    <xml name="input/gff3+fasta">
+        <expand macro="gff3_input"/>
+        <expand macro="genome_selector"/>
+    </xml>
+    <token name="@INPUT_GFF@">
+	    '$gff3_data'
+	</token>
+    <token name="@INPUT_FASTA@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+	</token>
+    <token name="@GENOME_SELECTOR_PRE@">
+    #if $reference_genome.reference_genome_source == 'history':
+            ln -s '$reference_genome.genome_fasta' genomeref.fa;
+    #end if
+	</token>
+    <token name="@GENOME_SELECTOR@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+	</token>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plotWheels/core.py	Mon Jun 05 02:44:43 2023 +0000
@@ -0,0 +1,3228 @@
+# -*- coding: utf-8 -*-
+"""
+.. currentmodule:: modlamp.core
+
+.. moduleauthor:: modlab Alex Mueller ETH Zurich <alex.mueller@pharma.ethz.ch>
+
+Core helper functions and classes for other modules. The two main classes are:
+
+=============================    =======================================================================================
+Class                            Characteristics
+=============================    =======================================================================================
+:py:class:`BaseSequence`         Base class inheriting to all sequence classes in the module :py:mod:`modlamp.sequences`
+:py:class:`BaseDescriptor`       Base class inheriting to the two descriptor classes in :py:mod:`modlamp.descriptors`
+=============================    =======================================================================================
+"""
+
+import os
+import random
+import re
+
+import numpy as np
+import pandas as pd
+import collections
+import operator
+from scipy.spatial import distance
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
+from sklearn.utils import shuffle
+
+__author__ = "Alex Müller, Gisela Gabernet"
+__docformat__ = "restructuredtext en"
+
+
+class BaseSequence(object):
+    """Base class for sequence classes in the module :mod:`modlamp.sequences`.
+    It contains amino acid probabilities for different sequence generation classes.
+
+    The following amino acid probabilities are used: (extracted from the
+    `APD3 <http://aps.unmc.edu/AP/statistic/statistic.php>`_, March 17, 2016)
+
+    ===  ====    ======   =========    ==========
+    AA   rand    AMP      AMPnoCM      randnoCM
+    ===  ====    ======   =========    ==========
+    A    0.05    0.0766   0.0812275    0.05555555
+    C    0.05    0.071    0.0          0.0
+    D    0.05    0.026    0.0306275    0.05555555
+    E    0.05    0.0264   0.0310275    0.05555555
+    F    0.05    0.0405   0.0451275    0.05555555
+    G    0.05    0.1172   0.1218275    0.05555555
+    H    0.05    0.021    0.0256275    0.05555555
+    I    0.05    0.061    0.0656275    0.05555555
+    K    0.05    0.0958   0.1004275    0.05555555
+    L    0.05    0.0838   0.0884275    0.05555555
+    M    0.05    0.0123   0.0          0.0
+    N    0.05    0.0386   0.0432275    0.05555555
+    P    0.05    0.0463   0.0509275    0.05555555
+    Q    0.05    0.0251   0.0297275    0.05555555
+    R    0.05    0.0545   0.0591275    0.05555555
+    S    0.05    0.0613   0.0659275    0.05555555
+    T    0.05    0.0455   0.0501275    0.05555555
+    V    0.05    0.0572   0.0618275    0.05555555
+    W    0.05    0.0155   0.0201275    0.05555555
+    Y    0.05    0.0244   0.0290275    0.05555555
+    ===  ====    ======   =========    ==========
+
+    """
+
+    def __init__(self, seqnum, lenmin=7, lenmax=28):
+        """
+        :param seqnum: number of sequences to generate
+        :param lenmin: minimal length of the generated sequences
+        :param lenmax: maximal length of the generated sequences
+        :return: attributes :py:attr:`seqnum`, :py:attr:`lenmin` and :py:attr:`lenmax`.
+        :Example:
+
+        >>> b = BaseSequence(10, 7, 28)
+        >>> b.seqnum
+        10
+        >>> b.lenmin
+        7
+        >>> b.lenmax
+        28
+        """
+        self.sequences = list()
+        self.names = list()
+        self.lenmin = int(lenmin)
+        self.lenmax = int(lenmax)
+        self.seqnum = int(seqnum)
+
+        # AA classes:
+        self.AA_hyd = ["G", "A", "L", "I", "V"]
+        self.AA_basic = ["K", "R"]
+        self.AA_acidic = ["D", "E"]
+        self.AA_aroma = ["W", "Y", "F"]
+        self.AA_polar = ["S", "T", "Q", "N"]
+        # AA labels:
+        self.AAs = [
+            "A",
+            "C",
+            "D",
+            "E",
+            "F",
+            "G",
+            "H",
+            "I",
+            "K",
+            "L",
+            "M",
+            "N",
+            "P",
+            "Q",
+            "R",
+            "S",
+            "T",
+            "V",
+            "W",
+            "Y",
+        ]
+        # AA probability from the APD3 database:
+        self.prob_AMP = [
+            0.0766,
+            0.071,
+            0.026,
+            0.0264,
+            0.0405,
+            0.1172,
+            0.021,
+            0.061,
+            0.0958,
+            0.0838,
+            0.0123,
+            0.0386,
+            0.0463,
+            0.0251,
+            0.0545,
+            0.0613,
+            0.0455,
+            0.0572,
+            0.0155,
+            0.0244,
+        ]
+        # AA probability from the APD2 database without Cys and Met (synthesis reasons)
+        self.prob_AMPnoCM = [
+            0.081228,
+            0.0,
+            0.030627,
+            0.031027,
+            0.045128,
+            0.121828,
+            0.025627,
+            0.065628,
+            0.100428,
+            0.088428,
+            0.0,
+            0.043228,
+            0.050928,
+            0.029728,
+            0.059128,
+            0.065927,
+            0.050128,
+            0.061828,
+            0.020128,
+            0.029028,
+        ]
+        # equal AA probabilities:
+        self.prob = [
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+            0.05,
+        ]
+        # equal AA probabilities but 0 for Cys and Met:
+        self.prob_randnoCM = [
+            0.05555555555,
+            0.0,
+            0.05555555555,
+            0.05555555555,
+            0.05555555555,
+            0.05555555555,
+            0.05555555555,
+            0.05555555555,
+            0.05555555555,
+            0.05555555555,
+            0.0,
+            0.05555555555,
+            0.05555555555,
+            0.05555555555,
+            0.05555555555,
+            0.05555555555,
+            0.05555555555,
+            0.05555555555,
+            0.05555555555,
+            0.05555555555,
+        ]
+
+        # AA probability from the linear CancerPPD peptides:
+        self.prob_ACP = [
+            0.14526966,
+            0.0,
+            0.00690031,
+            0.00780824,
+            0.06991102,
+            0.04957327,
+            0.01725077,
+            0.05647358,
+            0.27637552,
+            0.17759216,
+            0.00998729,
+            0.00798983,
+            0.01307427,
+            0.00381333,
+            0.02941711,
+            0.02651171,
+            0.0154349,
+            0.04013074,
+            0.0406755,
+            0.00581079,
+        ]
+
+        # AA probabilities for perfect amphipathic helix of different arc sizes
+        self.prob_amphihel = [
+            [
+                0.04545455,
+                0.0,
+                0.04545454,
+                0.04545455,
+                0.0,
+                0.04545455,
+                0.04545455,
+                0.0,
+                0.25,
+                0.0,
+                0.0,
+                0.04545454,
+                0.04545455,
+                0.04545454,
+                0.25,
+                0.04545454,
+                0.04545454,
+                0.0,
+                0.0,
+                0.04545454,
+            ],
+            [
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.16666667,
+                0.0,
+                0.0,
+                0.16666667,
+                0.0,
+                0.16666667,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.0,
+                0.16666667,
+                0.16666667,
+                (1.0 - 0.16666667 * 5),
+            ],
+        ]
+
+        # helical ACP AA probabilities, depending on the position of the AA in the helix.
+        self.prob_ACPhel = np.array(
+            [
+                [
+                    0.0483871,
+                    0.0,
+                    0.0,
+                    0.0483871,
+                    0.01612903,
+                    0.12903226,
+                    0.03225807,
+                    0.09677419,
+                    0.19354839,
+                    0.5,
+                    0.0483871,
+                    0.11290323,
+                    0.1,
+                    0.18518519,
+                    0.07843137,
+                    0.12,
+                    0.17073172,
+                    0.16666667,
+                ],
+                [
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.02439024,
+                    0.19444444,
+                ],
+                [
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.27419355,
+                    0.01612903,
+                    0.0,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                ],
+                [
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.06451613,
+                    0.0,
+                    0.01612903,
+                    0.0483871,
+                    0.01612903,
+                    0.0,
+                    0.01851852,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                ],
+                [
+                    0.16129032,
+                    0.0483871,
+                    0.30645161,
+                    0.0,
+                    0.0483871,
+                    0.0,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.09677419,
+                    0.06666667,
+                    0.01851852,
+                    0.0,
+                    0.02,
+                    0.14634146,
+                    0.0,
+                ],
+                [
+                    0.64516129,
+                    0.0,
+                    0.17741936,
+                    0.14516129,
+                    0.0,
+                    0.01612903,
+                    0.25806452,
+                    0.11290323,
+                    0.06451613,
+                    0.08064516,
+                    0.22580645,
+                    0.03225807,
+                    0.06666667,
+                    0.2037037,
+                    0.1372549,
+                    0.1,
+                    0.0,
+                    0.05555556,
+                ],
+                [
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.03225807,
+                    0.0,
+                    0.0,
+                    0.20967742,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.16,
+                    0.0,
+                    0.0,
+                ],
+                [
+                    0.0483871,
+                    0.11290323,
+                    0.01612903,
+                    0.08064516,
+                    0.33870968,
+                    0.27419355,
+                    0.0,
+                    0.0483871,
+                    0.14516129,
+                    0.06451613,
+                    0.03225807,
+                    0.06451613,
+                    0.18333333,
+                    0.0,
+                    0.0,
+                    0.1,
+                    0.26829268,
+                    0.0,
+                ],
+                [
+                    0.0,
+                    0.03225807,
+                    0.01612903,
+                    0.12903226,
+                    0.12903226,
+                    0.0,
+                    0.38709677,
+                    0.33870968,
+                    0.0483871,
+                    0.03225807,
+                    0.41935484,
+                    0.08064516,
+                    0.0,
+                    0.03703704,
+                    0.29411765,
+                    0.04,
+                    0.02439024,
+                    0.02777778,
+                ],
+                [
+                    0.0483871,
+                    0.70967742,
+                    0.12903226,
+                    0.0483871,
+                    0.09677419,
+                    0.32258064,
+                    0.20967742,
+                    0.06451613,
+                    0.11290323,
+                    0.06451613,
+                    0.03225807,
+                    0.03225807,
+                    0.28333333,
+                    0.24074074,
+                    0.03921569,
+                    0.28,
+                    0.07317073,
+                    0.22222222,
+                ],
+                [
+                    0.0,
+                    0.01612903,
+                    0.01612903,
+                    0.0483871,
+                    0.01612903,
+                    0.03225807,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.03333333,
+                    0.0,
+                    0.01960784,
+                    0.02,
+                    0.0,
+                    0.0,
+                ],
+                [
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.03225807,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.01960784,
+                    0.02,
+                    0.0,
+                    0.0,
+                ],
+                [
+                    0.0,
+                    0.0,
+                    0.14516129,
+                    0.01612903,
+                    0.03225807,
+                    0.01612903,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.0,
+                    0.12962963,
+                    0.17647059,
+                    0.0,
+                    0.0,
+                    0.0,
+                ],
+                [
+                    0.0,
+                    0.0,
+                    0.01612903,
+                    0.01612903,
+                    0.0,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.01851852,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                ],
+                [
+                    0.0,
+                    0.01612903,
+                    0.01612903,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.01612903,
+                    0.01612903,
+                    0.01612903,
+                    0.01612903,
+                    0.0,
+                    0.01851852,
+                    0.01960784,
+                    0.0,
+                    0.04878049,
+                    0.0,
+                ],
+                [
+                    0.01612903,
+                    0.0,
+                    0.01612903,
+                    0.12903226,
+                    0.03225807,
+                    0.03225807,
+                    0.0483871,
+                    0.17741936,
+                    0.0,
+                    0.03225807,
+                    0.09677419,
+                    0.0483871,
+                    0.01666667,
+                    0.0,
+                    0.15686274,
+                    0.1,
+                    0.0,
+                    0.05555556,
+                ],
+                [
+                    0.01612903,
+                    0.01612903,
+                    0.0,
+                    0.01612903,
+                    0.0483871,
+                    0.01612903,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.01612903,
+                    0.01612903,
+                    0.11290323,
+                    0.0,
+                    0.01851852,
+                    0.03921569,
+                    0.02,
+                    0.0,
+                    0.05555556,
+                ],
+                [
+                    0.01612903,
+                    0.01612903,
+                    0.01612903,
+                    0.01612903,
+                    0.20967742,
+                    0.16129032,
+                    0.01612903,
+                    0.0483871,
+                    0.33870968,
+                    0.16129032,
+                    0.0,
+                    0.14516129,
+                    0.25,
+                    0.11111111,
+                    0.01960784,
+                    0.02,
+                    0.21951219,
+                    0.22222222,
+                ],
+                [
+                    0.0,
+                    0.0,
+                    0.12903226,
+                    0.01612903,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.02439024,
+                    0.0,
+                ],
+                [
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.01612903,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                ],
+            ]
+        )
+
+    def save_fasta(self, filename, names=False):
+        """Method to save generated sequences in a ``.FASTA`` formatted file.
+
+        :param filename: output filename in which the sequences from :py:attr:`sequences` are safed in fasta format.
+        :param names: {bool} whether sequence names from :py:attr:`names` should be saved as sequence identifiers
+        :return: a FASTA formatted file containing the generated sequences
+        :Example:
+
+        >>> b = BaseSequence(2)
+        >>> b.sequences = ['KLLSLSLALDLLS', 'KLPERTVVNSSDF']
+        >>> b.names = ['Sequence1', 'Sequence2']
+        >>> b.save_fasta('/location/of/fasta/file.fasta', names=True)
+        """
+        if names:
+            save_fasta(filename, self.sequences, self.names)
+        else:
+            save_fasta(filename, self.sequences)
+
+    def mutate_AA(self, nr, prob):
+        """Method to mutate with **prob** probability a **nr** of positions per sequence randomly.
+
+        :param nr: number of mutations to perform per sequence
+        :param prob: probability of mutating a sequence
+        :return: mutated sequences in the attribute :py:attr:`sequences`.
+        :Example:
+
+        >>> b = BaseSequence(1)
+        >>> b.sequences = ['IAKAGRAIIK']
+        >>> b.mutate_AA(3, 1.)
+        >>> b.sequences
+        ['NAKAGRAWIK']
+        """
+        for s in range(len(self.sequences)):
+            # mutate: yes or no? prob = mutation probability
+            mutate = np.random.choice([1, 0], 1, p=[prob, 1 - float(prob)])
+            if mutate == 1:
+                seq = list(self.sequences[s])
+                cnt = 0
+                while cnt < nr:  # mutate "nr" AA
+                    seq[random.choice(range(len(seq)))] = random.choice(self.AAs)
+                    cnt += 1
+                self.sequences[s] = "".join(seq)
+
+    def filter_duplicates(self):
+        """Method to filter duplicates in the sequences from the class attribute :py:attr:`sequences`
+
+        :return: filtered sequences list in the attribute :py:attr:`sequences` and corresponding names.
+        :Example:
+
+        >>> b = BaseSequence(4)
+        >>> b.sequences = ['KLLKLLKKLLKLLK', 'KLLKLLKKLLKLLK', 'KLAKLAKKLAKLAK', 'KLAKLAKKLAKLAK']
+        >>> b.filter_duplicates()
+        >>> b.sequences
+        ['KLLKLLKKLLKLLK', 'KLAKLAKKLAKLAK']
+
+        .. versionadded:: v2.2.5
+        """
+        if not self.names:
+            self.names = ["Seq_" + str(i) for i in range(len(self.sequences))]
+        df = pd.DataFrame(
+            list(zip(self.sequences, self.names)), columns=["Sequences", "Names"]
+        )
+        df = df.drop_duplicates(
+            "Sequences", "first"
+        )  # keep first occurrence of duplicate
+        self.sequences = df["Sequences"].get_values().tolist()
+        self.names = df["Names"].get_values().tolist()
+
+    def keep_natural_aa(self):
+        """Method to filter out sequences that do not contain natural amino acids. If the sequence contains a character
+        that is not in ``['A','C','D,'E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y']``.
+
+        :return: filtered sequence list in the attribute :py:attr:`sequences`. The other attributes are also filtered
+            accordingly (if present).
+        :Example:
+
+        >>> b = BaseSequence(2)
+        >>> b.sequences = ['BBBsdflUasUJfBJ', 'GLFDIVKKVVGALGSL']
+        >>> b.keep_natural_aa()
+        >>> b.sequences
+        ['GLFDIVKKVVGALGSL']
+        """
+        natural_aa = [
+            "A",
+            "C",
+            "D",
+            "E",
+            "F",
+            "G",
+            "H",
+            "I",
+            "K",
+            "L",
+            "M",
+            "N",
+            "P",
+            "Q",
+            "R",
+            "S",
+            "T",
+            "V",
+            "W",
+            "Y",
+        ]
+
+        seqs = []
+        names = []
+
+        for i, s in enumerate(self.sequences):
+            seq = list(s.upper())
+            if all(c in natural_aa for c in seq):
+                seqs.append(s.upper())
+                if hasattr(self, "names") and self.names:
+                    names.append(self.names[i])
+
+        self.sequences = seqs
+        self.names = names
+
+    def filter_aa(self, amino_acids):
+        """Method to filter out corresponding names and descriptor values of sequences with given amino acids in the
+        argument list *aminoacids*.
+
+        :param amino_acids: {list} amino acids to be filtered
+        :return: filtered list of sequences names in the corresponding attributes.
+        :Example:
+
+        >>> b = BaseSequence(3)
+        >>> b.sequences = ['AAALLLIIIKKK', 'CCEERRT', 'LLVVIIFFFQQ']
+        >>> b.filter_aa(['C'])
+        >>> b.sequences
+        ['AAALLLIIIKKK', 'LLVVIIFFFQQ']
+        """
+
+        pattern = re.compile("|".join(amino_acids))
+        seqs = []
+        names = []
+
+        for i, s in enumerate(self.sequences):
+            if not pattern.search(s):
+                seqs.append(s)
+                if hasattr(self, "names") and self.names:
+                    names.append(self.names[i])
+
+        self.sequences = seqs
+        self.names = names
+
+    def clean(self):
+        """Method to clean / clear / empty the attributes :py:attr:`sequences` and :py:attr:`names`.
+
+        :return: freshly initialized, empty class attributes.
+        """
+        self.__init__(self.seqnum, self.lenmin, self.lenmax)
+
+
+class BaseDescriptor(object):
+    """
+    Base class inheriting to both peptide descriptor classes :py:class:`modlamp.descriptors.GlobalDescriptor` and
+    :py:class:`modlamp.descriptors.PeptideDescriptor`.
+    """
+
+    def __init__(self, seqs):
+        """
+        :param seqs: a ``.FASTA`` file with sequences, a list / array of sequences or a single sequence as string to
+            calculate the descriptor values for.
+        :return: initialized attributes :py:attr:`sequences` and :py:attr:`names`.
+        :Example:
+
+        >>> AMP = BaseDescriptor('KLLKLLKKLLKLLK','pepCATS')
+        >>> AMP.sequences
+        ['KLLKLLKKLLKLLK']
+        >>> seqs = BaseDescriptor('/Path/to/file.fasta', 'eisenberg')  # load sequences from .fasta file
+        >>> seqs.sequences
+        ['AFDGHLKI','KKLQRSDLLRTK','KKLASCNNIPPR'...]
+        """
+        if type(seqs) == list and seqs[0].isupper():
+            self.sequences = [s.strip() for s in seqs]
+            self.names = []
+        elif type(seqs) == np.ndarray and seqs[0].isupper():
+            self.sequences = [s.strip() for s in seqs.tolist()]
+            self.names = []
+        elif type(seqs) == str and seqs.isupper():
+            self.sequences = [seqs.strip()]
+            self.names = []
+        elif os.path.isfile(seqs):
+            if seqs.endswith(".fasta"):  # read .fasta file
+                self.sequences, self.names = read_fasta(seqs)
+            elif seqs.endswith(".csv"):  # read .csv file with sequences every line
+                with open(seqs) as f:
+                    self.sequences = list()
+                    cntr = 0
+                    self.names = []
+                    for line in f:
+                        if line.isupper():
+                            self.sequences.append(line.strip())
+                            self.names.append("seq_" + str(cntr))
+                            cntr += 1
+            else:
+                print("Sorry, currently only .fasta or .csv files can be read!")
+        else:
+            print(
+                "%s does not exist, is not a valid list of AA sequences or is not a valid sequence string"
+                % seqs
+            )
+
+        self.descriptor = np.array([[]])
+        self.target = np.array([], dtype="int")
+        self.scaler = None
+        self.featurenames = []
+
+    def read_fasta(self, filename):
+        """Method for loading sequences from a ``.FASTA`` formatted file into the attributes :py:attr:`sequences` and
+        :py:attr:`names`.
+
+        :param filename: {str} ``.FASTA`` file with sequences and headers to read
+        :return: {list} sequences in the attribute :py:attr:`sequences` with corresponding sequence names in
+            :py:attr:`names`.
+        """
+        self.sequences, self.names = read_fasta(filename)
+
+    def save_fasta(self, filename, names=False):
+        """Method for saving sequences from :py:attr:`sequences` to a ``.FASTA`` formatted file.
+
+        :param filename: {str} filename of the output ``.FASTA`` file
+        :param names: {bool} whether sequence names from self.names should be saved as sequence identifiers
+        :return: a FASTA formatted file containing the generated sequences
+        """
+        if names:
+            save_fasta(filename, self.sequences, self.names)
+        else:
+            save_fasta(filename, self.sequences)
+
+    def count_aa(self, scale="relative", average=False, append=False):
+        """Method for producing the amino acid distribution for the given sequences as a descriptor
+
+        :param scale: {'absolute' or 'relative'} defines whether counts or frequencies are given for each AA
+        :param average: {boolean} whether the averaged amino acid counts for all sequences should be returned
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: the amino acid distributions for every sequence individually in the attribute :py:attr:`descriptor`
+        :Example:
+
+        >>> AMP = PeptideDescriptor('ACDEFGHIKLMNPQRSTVWY') # aa_count() does not depend on the descriptor scale
+        >>> AMP.count_aa()
+        >>> AMP.descriptor
+        array([[ 0.05,  0.05,  0.05,  0.05,  0.05,  0.05,  0.05,  0.05,  0.05, ... ]])
+        >>> AMP.descriptor.shape
+        (1, 20)
+
+        .. seealso:: :py:func:`modlamp.core.count_aa()`
+        """
+        desc = list()
+        for seq in self.sequences:
+            od = count_aas(seq, scale)
+            desc.append(list(od.values()))
+
+        desc = np.array(desc)
+        self.featurenames = list(od.keys())
+
+        if append:
+            self.descriptor = np.hstack((self.descriptor, desc))
+        elif average:
+            self.descriptor = np.mean(desc, axis=0)
+        else:
+            self.descriptor = desc
+
+    def count_ngrams(self, n):
+        """Method for producing n-grams of all sequences in self.sequences
+
+        :param n: {int or list of ints} defines whether counts or frequencies are given for each AA
+        :return: {dict} dictionary with n-grams as keys and their counts in the sequence as values in :py:attr:`descriptor`
+        :Example:
+
+        >>> D = PeptideDescriptor('GLLDFLSLAALSLDKLVKKGALS')
+        >>> D.count_ngrams([2, 3])
+        >>> D.descriptor
+        {'LS': 3, 'LD': 2, 'LSL': 2, 'AL': 2, ..., 'LVK': 1}
+
+        .. seealso:: :py:func:`modlamp.core.count_ngrams()`
+        """
+        ngrams = dict()
+        for seq in self.sequences:
+            d = count_ngrams(seq, n)
+            for k, v in d.items():
+                if k in ngrams.keys():
+                    ngrams[k] += v
+                else:
+                    ngrams[k] = v
+        self.descriptor = ngrams
+
+    def feature_scaling(self, stype="standard", fit=True):
+        """Method for feature scaling of the calculated descriptor matrix.
+
+        :param stype: {'standard' or 'minmax'} type of scaling to be used
+        :param fit: {boolean} defines whether the used scaler is first fitting on the data (True) or
+            whether the already fitted scaler in :py:attr:`scaler` should be used to transform (False).
+        :return: scaled descriptor values in :py:attr:`descriptor`
+        :Example:
+
+        >>> D.descriptor
+        array([[0.155],[0.34],[0.16235294],[-0.08842105],[0.116]])
+        >>> D.feature_scaling(type='minmax',fit=True)
+        array([[0.56818182],[1.],[0.5853447],[0.],[0.47714988]])
+        """
+        if stype in ["standard", "minmax"]:
+            if stype == "standard":
+                self.scaler = StandardScaler()
+            elif stype == "minmax":
+                self.scaler = MinMaxScaler()
+
+            if fit:
+                self.descriptor = self.scaler.fit_transform(self.descriptor)
+            else:
+                self.descriptor = self.scaler.transform(self.descriptor)
+        else:
+            print("Unknown scaler type!\nAvailable: 'standard', 'minmax'")
+
+    def feature_shuffle(self):
+        """Method for shuffling feature columns randomly.
+
+        :return: descriptor matrix with shuffled feature columns in :py:attr:`descriptor`
+        :Example:
+
+        >>> D.descriptor
+        array([[0.80685625,167.05234375,39.56818125,-0.26338667,155.16888667,33.48778]])
+        >>> D.feature_shuffle()
+        array([[155.16888667,-0.26338667,167.05234375,0.80685625,39.56818125,33.48778]])
+        """
+        self.descriptor = shuffle(self.descriptor.transpose()).transpose()
+
+    def sequence_order_shuffle(self):
+        """Method for shuffling sequence order in the attribute :py:attr:`sequences`.
+
+        :return: sequences in :py:attr:`sequences` with shuffled order in the list.
+        :Example:
+
+        >>> D.sequences
+        ['LILRALKGAARALKVA','VKIAKIALKIIKGLG','VGVRLIKGIGRVARGAI','LRGLRGVIRGGKAIVRVGK','GGKLVRLIARIGKGV']
+        >>> D.sequence_order_shuffle()
+        >>> D.sequences
+        ['VGVRLIKGIGRVARGAI','LILRALKGAARALKVA','LRGLRGVIRGGKAIVRVGK','GGKLVRLIARIGKGV','VKIAKIALKIIKGLG']
+        """
+        self.sequences = shuffle(self.sequences)
+
+    def random_selection(self, num):
+        """Method to randomly select a specified number of sequences (with names and descriptors if present) out of a given
+        descriptor instance.
+
+        :param num: {int} number of entries to be randomly selected
+        :return: updated instance
+        :Example:
+
+        >>> h = Helices(7, 28, 100)
+        >>> h.generate_helices()
+        >>> desc = PeptideDescriptor(h.sequences, 'eisenberg')
+        >>> desc.calculate_moment()
+        >>> len(desc.sequences)
+        100
+        >>> len(desc.descriptor)
+        100
+        >>> desc.random_selection(10)
+        >>> len(desc.descriptor)
+        10
+        >>> len(desc.descriptor)
+        10
+
+        .. versionadded:: v2.2.3
+        """
+
+        sel = np.random.choice(len(self.sequences), size=num, replace=False)
+        self.sequences = np.array(self.sequences)[sel].tolist()
+        if hasattr(self, "descriptor") and self.descriptor.size:
+            self.descriptor = self.descriptor[sel]
+        if hasattr(self, "names") and self.names:
+            self.names = np.array(self.names)[sel].tolist()
+        if hasattr(self, "target") and self.target.size:
+            self.target = self.target[sel]
+
+    def minmax_selection(self, iterations, distmetric="euclidean", seed=0):
+        """Method to select a specified number of sequences according to the minmax algorithm.
+
+        :param iterations: {int} Number of sequences to retrieve.
+        :param distmetric: Distance metric to calculate the distances between the sequences in descriptor space.
+            Choose from 'euclidean' or 'minkowsky'.
+        :param seed: {int} Set a random seed for numpy to pick the first sequence.
+        :return: updated instance
+
+        .. seealso:: **SciPy** http://docs.scipy.org/doc/scipy/reference/spatial.distance.html
+        """
+
+        # Storing M into pool, where selections get deleted
+        pool = self.descriptor  # Store pool where selections get deleted
+        minmaxidx = list()  # Store original indices of selections to return
+
+        # Randomly selecting first peptide into the sele
+        np.random.seed(seed)
+        idx = int(np.random.random_integers(0, len(pool), 1))
+        sele = pool[idx : idx + 1, :]
+        minmaxidx.append(
+            int(*np.where(np.all(self.descriptor == pool[idx : idx + 1, :], axis=1)))
+        )
+
+        # Deleting peptide in selection from pool
+        pool = np.delete(pool, idx, axis=0)
+
+        for i in range(iterations - 1):
+            # Calculating distance from sele to the rest of the peptides
+            dist = distance.cdist(pool, sele, distmetric)
+
+            # Choosing maximal distances for every sele instance
+            maxidx = np.argmax(dist, axis=0)
+            maxcols = np.max(dist, axis=0)
+
+            # Choosing minimal distance among the maximal distances
+            minmax = np.argmin(maxcols)
+            maxidx = int(maxidx[minmax])
+
+            # Adding it to selection and removing from pool
+            sele = np.append(sele, pool[maxidx : maxidx + 1, :], axis=0)
+            pool = np.delete(pool, maxidx, axis=0)
+            minmaxidx.append(
+                int(
+                    *np.where(
+                        np.all(self.descriptor == pool[maxidx : maxidx + 1, :], axis=1)
+                    )
+                )
+            )
+
+        self.sequences = np.array(self.sequences)[minmaxidx].tolist()
+        if hasattr(self, "descriptor") and self.descriptor.size:
+            self.descriptor = self.descriptor[minmaxidx]
+        if hasattr(self, "names") and self.names:
+            self.names = np.array(self.names)[minmaxidx].tolist()
+        if hasattr(self, "target") and self.target.size:
+            self.target = self.descriptor[minmaxidx]
+
+    def filter_sequences(self, sequences):
+        """Method to filter out entries for given sequences in *sequences* out of a descriptor instance. All
+        corresponding attribute values of these sequences (e.g. in :py:attr:`descriptor`, :py:attr:`name`) are deleted
+        as well. The method returns an updated descriptor instance.
+
+        :param sequences: {list} sequences to be filtered out of the whole instance, including corresponding data
+        :return: updated instance without filtered sequences
+        :Example:
+
+        >>> sequences = ['KLLKLLKKLLKLLK', 'ACDEFGHIK', 'GLFDIVKKVV', 'GLFDIVKKVVGALG', 'GLFDIVKKVVGALGSL']
+        >>> desc = PeptideDescriptor(sequences, 'pepcats')
+        >>> desc.calculate_crosscorr(7)
+        >>> len(desc.descriptor)
+        5
+        >>> desc.filter_sequences('KLLKLLKKLLKLLK')
+        >>> len(desc.descriptor)
+        4
+        >>> desc.sequences
+        ['ACDEFGHIK', 'GLFDIVKKVV', 'GLFDIVKKVVGALG', 'GLFDIVKKVVGALGSL']
+        """
+        indices = list()
+        if isinstance(
+            sequences, str
+        ):  # check if sequences is only one sequence string and convert it to a list
+            sequences = [sequences]
+        for s in sequences:  # get indices of queried sequences
+            indices.append(self.sequences.index(s))
+
+        self.sequences = np.delete(np.array(self.sequences), indices, 0).tolist()
+        if hasattr(self, "descriptor") and self.descriptor.size:
+            self.descriptor = np.delete(self.descriptor, indices, 0)
+        if hasattr(self, "names") and self.names:
+            self.names = np.delete(np.array(self.names), indices, 0).tolist()
+        if hasattr(self, "target") and self.target.size:
+            self.target = np.delete(self.target, indices, 0)
+
+    def filter_values(self, values, operator="=="):
+        """Method to filter the descriptor matrix in the attribute :py:attr:`descriptor` for a given list of values (same
+        size as the number of features in the descriptor matrix!) The operator option tells the method whether to
+        filter for values equal, lower, higher ect. to the given values in the *values* array.
+
+        :param values: {list} values to filter the attribute :py:attr:`descriptor` for
+        :param operator: {str} filter criterion, available the operators ``==``, ``<``, ``>``, ``<=``and ``>=``.
+        :return: descriptor matrix and updated sequences containing only entries with descriptor values given in
+            *values* in the corresponding attributes.
+        :Example:
+
+        >>> desc.descriptor  # desc = BaseDescriptor instance
+        array([[ 0.7666517 ],
+               [ 0.38373498]])
+        >>> desc.filter_values([0.5], '<')
+        >>> desc.descriptor
+        array([[ 0.38373498]])
+        """
+        dim = self.descriptor.shape[1]
+        for d in range(dim):  # for all the features in self.descriptor
+            if operator == "==":
+                indices = np.where(self.descriptor[:, d] == values[d])[0]
+            elif operator == "<":
+                indices = np.where(self.descriptor[:, d] < values[d])[0]
+            elif operator == ">":
+                indices = np.where(self.descriptor[:, d] > values[d])[0]
+            elif operator == "<=":
+                indices = np.where(self.descriptor[:, d] <= values[d])[0]
+            elif operator == ">=":
+                indices = np.where(self.descriptor[:, d] >= values[d])[0]
+            else:
+                raise KeyError(
+                    "available operators: ``==``, ``<``, ``>``, ``<=``and ``>=``"
+                )
+
+            # filter descriptor matrix, sequence list and names list according to obtained indices
+            self.sequences = np.array(self.sequences)[indices].tolist()
+            if hasattr(self, "descriptor") and self.descriptor.size:
+                self.descriptor = self.descriptor[indices]
+            if hasattr(self, "names") and self.names:
+                self.names = np.array(self.names)[indices].tolist()
+            if hasattr(self, "target") and self.target.size:
+                self.target = self.target[indices]
+
+    def filter_aa(self, amino_acids):
+        """Method to filter out corresponding names and descriptor values of sequences with given amino acids in the
+        argument list *aminoacids*.
+
+        :param amino_acids: list of amino acids to be filtered
+        :return: filtered list of sequences, descriptor values, target values and names in the corresponding attributes.
+        :Example:
+
+        >>> b = BaseSequence(3)
+        >>> b.sequences = ['AAALLLIIIKKK', 'CCEERRT', 'LLVVIIFFFQQ']
+        >>> b.filter_aa(['C'])
+        >>> b.sequences
+        ['AAALLLIIIKKK', 'LLVVIIFFFQQ']
+        """
+
+        pattern = re.compile("|".join(amino_acids))
+        seqs = []
+        desc = []
+        names = []
+        target = []
+
+        for i, s in enumerate(self.sequences):
+            if not pattern.search(s):
+                seqs.append(s)
+                if hasattr(self, "descriptor") and self.descriptor.size:
+                    desc.append(self.descriptor[i])
+                if hasattr(self, "names") and self.names:
+                    names.append(self.names[i])
+                if hasattr(self, "target") and self.target.size:
+                    target.append(self.target[i])
+
+        self.sequences = seqs
+        self.names = names
+        self.descriptor = np.array(desc)
+        self.target = np.array(target, dtype="int")
+
+    def filter_duplicates(self):
+        """Method to filter duplicates in the sequences from the class attribute :py:attr:`sequences`
+
+        :return: filtered sequences list in the attribute :py:attr:`sequences` and corresponding names.
+        :Example:
+
+        >>> b = BaseDescriptor(['KLLKLLKKLLKLLK', 'KLLKLLKKLLKLLK', 'KLAKLAKKLAKLAK', 'KLAKLAKKLAKLAK'])
+        >>> b.filter_duplicates()
+        >>> b.sequences
+        ['KLLKLLKKLLKLLK', 'KLAKLAKKLAKLAK']
+
+        .. versionadded:: v2.2.5
+        """
+        if not self.names:
+            self.names = ["Seq_" + str(i) for i in range(len(self.sequences))]
+        if not self.target:
+            self.target = [0] * len(self.sequences)
+        if not self.descriptor:
+            self.descriptor = np.zeros(len(self.sequences))
+        df = pd.DataFrame(
+            np.array([self.sequences, self.names, self.descriptor, self.target]).T,
+            columns=["Sequences", "Names", "Descriptor", "Target"],
+        )
+        df = df.drop_duplicates(
+            "Sequences", "first"
+        )  # keep first occurrence of duplicate
+        self.sequences = df["Sequences"].get_values().tolist()
+        self.names = df["Names"].get_values().tolist()
+        self.descriptor = df["Descriptor"].get_values()
+        self.target = df["Target"].get_values()
+
+    def keep_natural_aa(self):
+        """Method to filter out sequences that do not contain natural amino acids. If the sequence contains a character
+        that is not in ['A','C','D,'E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y'].
+
+        :return: filtered sequence list in the attribute :py:attr:`sequences`. The other attributes are also filtered
+            accordingly (if present).
+        :Example:
+
+        >>> b = BaseSequence(2)
+        >>> b.sequences = ['BBBsdflUasUJfBJ', 'GLFDIVKKVVGALGSL']
+        >>> b.keep_natural_aa()
+        >>> b.sequences
+        ['GLFDIVKKVVGALGSL']
+        """
+
+        natural_aa = [
+            "A",
+            "C",
+            "D",
+            "E",
+            "F",
+            "G",
+            "H",
+            "I",
+            "K",
+            "L",
+            "M",
+            "N",
+            "P",
+            "Q",
+            "R",
+            "S",
+            "T",
+            "V",
+            "W",
+            "Y",
+        ]
+
+        seqs = []
+        desc = []
+        names = []
+        target = []
+
+        for i, s in enumerate(self.sequences):
+            seq = list(s.upper())
+            if all(c in natural_aa for c in seq):
+                seqs.append(s.upper())
+                if hasattr(self, "descriptor") and self.descriptor.size:
+                    desc.append(self.descriptor[i])
+                if hasattr(self, "names") and self.names:
+                    names.append(self.names[i])
+                if hasattr(self, "target") and self.target.size:
+                    target.append(self.target[i])
+
+        self.sequences = seqs
+        self.names = names
+        self.descriptor = np.array(desc)
+        self.target = np.array(target, dtype="int")
+
+    def load_descriptordata(
+        self, filename, delimiter=",", targets=False, skip_header=0
+    ):
+        """Method to load any data file with sequences and descriptor values and save it to a new insatnce of the
+        class :class:`modlamp.descriptors.PeptideDescriptor`.
+
+        .. note:: Headers are not considered. To skip initial lines in the file, use the *skip_header* option.
+
+        :param filename: {str} filename of the data file to be loaded
+        :param delimiter: {str} column delimiter
+        :param targets: {boolean} whether last column in the file contains a target class vector
+        :param skip_header: {int} number of initial lines to skip in the file
+        :return: loaded sequences, descriptor values and targets in the corresponding attributes.
+        """
+        data = np.genfromtxt(filename, delimiter=delimiter, skip_header=skip_header)
+        data = data[:, 1:]  # skip sequences as they are "nan" when read as float
+        seqs = np.genfromtxt(filename, delimiter=delimiter, dtype="str")
+        seqs = seqs[:, 0]
+        if targets:
+            self.target = np.array(data[:, -1], dtype="int")
+        self.sequences = seqs
+        self.descriptor = data
+
+    def save_descriptor(self, filename, delimiter=",", targets=None, header=None):
+        """Method to save the descriptor values to a .csv/.txt file
+
+        :param filename: filename of the output file
+        :param delimiter: column delimiter
+        :param targets: target class vector to be added to descriptor (same length as :py:attr:`sequences`)
+        :param header: {str} header to be written at the beginning of the file (if ``None``: feature names are taken)
+        :return: output file with peptide names and descriptor values
+        """
+        seqs = np.array(self.sequences, dtype="|S80")[:, np.newaxis]
+        ids = np.array(self.names, dtype="|S80")[:, np.newaxis]
+        if ids.shape == seqs.shape:
+            names = np.hstack((ids, seqs))
+        else:
+            names = seqs
+        if targets and len(targets) == len(self.sequences):
+            target = np.array(targets)[:, np.newaxis]
+            data = np.hstack((names, self.descriptor, target))
+        else:
+            data = np.hstack((names, self.descriptor))
+        if not header:
+            featurenames = [["Sequence"]] + self.featurenames
+            header = ", ".join([f[0] for f in featurenames])
+        np.savetxt(filename, data, delimiter=delimiter, fmt="%s", header=header)
+
+
+def load_scale(scalename):
+    """Method to load scale values for a given amino acid scale
+
+    :param scalename: amino acid scale name, for available scales see the
+        :class:`modlamp.descriptors.PeptideDescriptor()` documentation.
+    :return: amino acid scale values in dictionary format.
+    """
+    # predefined amino acid scales dictionary
+    scales = {
+        "aasi": {
+            "A": [1.89],
+            "C": [1.73],
+            "D": [3.13],
+            "E": [3.14],
+            "F": [1.53],
+            "G": [2.67],
+            "H": [3],
+            "I": [1.97],
+            "K": [2.28],
+            "L": [1.74],
+            "M": [2.5],
+            "N": [2.33],
+            "P": [0.22],
+            "Q": [3.05],
+            "R": [1.91],
+            "S": [2.14],
+            "T": [2.18],
+            "V": [2.37],
+            "W": [2],
+            "Y": [2.01],
+        },
+        "abhprk": {
+            "A": [0, 0, 0, 0, 0, 0],
+            "C": [0, 0, 0, 0, 0, 0],
+            "D": [1, 0, 0, 1, 0, 0],
+            "E": [1, 0, 0, 1, 0, 0],
+            "F": [0, 0, 1, 0, 1, 0],
+            "G": [0, 0, 0, 0, 0, 0],
+            "H": [0, 0, 0, 1, 1, 0],
+            "I": [0, 0, 1, 0, 0, 0],
+            "K": [0, 1, 0, 1, 0, 0],
+            "L": [0, 0, 1, 0, 0, 0],
+            "M": [0, 0, 1, 0, 0, 0],
+            "N": [0, 0, 0, 1, 0, 0],
+            "P": [0, 0, 0, 0, 0, 1],
+            "Q": [0, 0, 0, 1, 0, 0],
+            "R": [0, 1, 0, 1, 0, 0],
+            "S": [0, 0, 0, 1, 0, 0],
+            "T": [0, 0, 0, 1, 0, 0],
+            "V": [0, 0, 1, 0, 0, 0],
+            "W": [0, 0, 1, 0, 1, 0],
+            "Y": [0, 0, 0, 1, 1, 0],
+        },
+        "argos": {
+            "I": [0.77],
+            "F": [1.2],
+            "V": [0.14],
+            "L": [2.3],
+            "W": [0.07],
+            "M": [2.3],
+            "A": [0.64],
+            "G": [-0.48],
+            "C": [0.25],
+            "Y": [-0.41],
+            "P": [-0.31],
+            "T": [-0.13],
+            "S": [-0.25],
+            "H": [-0.87],
+            "E": [-0.94],
+            "N": [-0.89],
+            "Q": [-0.61],
+            "D": [-1],
+            "K": [-1],
+            "R": [-0.68],
+        },
+        "bulkiness": {
+            "A": [0.443],
+            "C": [0.551],
+            "D": [0.453],
+            "E": [0.557],
+            "F": [0.898],
+            "G": [0],
+            "H": [0.563],
+            "I": [0.985],
+            "K": [0.674],
+            "L": [0.985],
+            "M": [0.703],
+            "N": [0.516],
+            "P": [0.768],
+            "Q": [0.605],
+            "R": [0.596],
+            "S": [0.332],
+            "T": [0.677],
+            "V": [0.995],
+            "W": [1],
+            "Y": [0.801],
+        },
+        "charge_phys": {
+            "A": [0.0],
+            "C": [-0.1],
+            "D": [-1.0],
+            "E": [-1.0],
+            "F": [0.0],
+            "G": [0.0],
+            "H": [0.1],
+            "I": [0.0],
+            "K": [1.0],
+            "L": [0.0],
+            "M": [0.0],
+            "N": [0.0],
+            "P": [0.0],
+            "Q": [0.0],
+            "R": [1.0],
+            "S": [0.0],
+            "T": [0.0],
+            "V": [0.0],
+            "W": [0.0],
+            "Y": [0.0],
+        },
+        "charge_acid": {
+            "A": [0.0],
+            "C": [-0.1],
+            "D": [-1.0],
+            "E": [-1.0],
+            "F": [0.0],
+            "G": [0.0],
+            "H": [1.0],
+            "I": [0.0],
+            "K": [1.0],
+            "L": [0.0],
+            "M": [0.0],
+            "N": [0.0],
+            "P": [0.0],
+            "Q": [0.0],
+            "R": [1.0],
+            "S": [0.0],
+            "T": [0.0],
+            "V": [0.0],
+            "W": [0.0],
+            "Y": [0.0],
+        },
+        "cougar": {
+            "A": [0.25, 0.62, 1.89],
+            "C": [0.208, 0.29, 1.73],
+            "D": [0.875, -0.9, 3.13],
+            "E": [0.833, -0.74, 3.14],
+            "F": [0.042, 1.2, 1.53],
+            "G": [1, 0.48, 2.67],
+            "H": [0.083, -0.4, 3],
+            "I": [0.667, 1.4, 1.97],
+            "K": [0.708, -1.5, 2.28],
+            "L": [0.292, 1.1, 1.74],
+            "M": [0, 0.64, 2.5],
+            "N": [0.667, -0.78, 2.33],
+            "P": [0.875, 0.12, 0.22],
+            "Q": [0.792, -0.85, 3.05],
+            "R": [0.958, -2.5, 1.91],
+            "S": [0.875, -0.18, 2.14],
+            "T": [0.583, -0.05, 2.18],
+            "V": [0.375, 1.1, 2.37],
+            "W": [0.042, 0.81, 2],
+            "Y": [0.5, 0.26, 2.01],
+        },
+        "eisenberg": {
+            "I": [1.4],
+            "F": [1.2],
+            "V": [1.1],
+            "L": [1.1],
+            "W": [0.81],
+            "M": [0.64],
+            "A": [0.62],
+            "G": [0.48],
+            "C": [0.29],
+            "Y": [0.26],
+            "P": [0.12],
+            "T": [-0.05],
+            "S": [-0.18],
+            "H": [-0.4],
+            "E": [-0.74],
+            "N": [-0.78],
+            "Q": [-0.85],
+            "D": [-0.9],
+            "K": [-1.5],
+            "R": [-2.5],
+        },
+        "ez": {
+            "A": [-0.29, 10.22, 4.67],
+            "C": [0.95, 13.69, 5.77],
+            "D": [1.19, 14.25, 8.98],
+            "E": [1.3, 14.66, 4.16],
+            "F": [-0.8, 19.67, 7.12],
+            "G": [-0.01, 13.86, 6],
+            "H": [0.75, 12.26, 2.77],
+            "I": [-0.56, 14.34, 10.69],
+            "K": [1.66, 11.11, 2.09],
+            "L": [-0.64, 17.34, 8.61],
+            "M": [-0.28, 18.04, 7.13],
+            "N": [0.89, 12.78, 6.28],
+            "P": [0.83, 18.09, 3.53],
+            "Q": [1.21, 10.46, 2.59],
+            "R": [1.55, 9.34, 4.68],
+            "S": [0.1, 13.86, 6],
+            "T": [0.01, 13.86, 6],
+            "V": [-0.47, 11.35, 4.97],
+            "W": [-0.85, 11.65, 7.2],
+            "Y": [-0.42, 13.04, 6.2],
+        },
+        "flexibility": {
+            "A": [0.25],
+            "C": [0.208],
+            "D": [0.875],
+            "E": [0.833],
+            "F": [0.042],
+            "G": [1],
+            "H": [0.083],
+            "I": [0.667],
+            "K": [0.708],
+            "L": [0.292],
+            "M": [0.0],
+            "N": [0.667],
+            "P": [0.875],
+            "Q": [0.792],
+            "R": [0.958],
+            "S": [0.875],
+            "T": [0.583],
+            "V": [0.375],
+            "W": [0.042],
+            "Y": [0.5],
+        },
+        "grantham": {
+            "A": [0, 8.1, 31],
+            "C": [2.75, 5.5, 55],
+            "D": [1.38, 13.0, 54],
+            "E": [0.92, 12.3, 83],
+            "F": [0, 5.2, 132],
+            "G": [0.74, 9.0, 3],
+            "H": [0.58, 10.4, 96],
+            "I": [0, 5.2, 111],
+            "K": [0.33, 11.3, 119],
+            "L": [0, 4.9, 111],
+            "M": [0, 5.7, 105],
+            "N": [1.33, 11.6, 56],
+            "P": [0.39, 8.0, 32.5],
+            "Q": [0.89, 10.5, 85],
+            "R": [0.65, 10.5, 124],
+            "S": [1.42, 9.2, 32],
+            "T": [0.71, 8.6, 61],
+            "V": [0, 5.9, 84],
+            "W": [0.13, 5.4, 170],
+            "Y": [0.20, 6.2, 136],
+        },
+        "gravy": {
+            "I": [4.5],
+            "V": [4.2],
+            "L": [3.8],
+            "F": [2.8],
+            "C": [2.5],
+            "M": [1.9],
+            "A": [1.8],
+            "G": [-0.4],
+            "T": [-0.7],
+            "W": [-0.9],
+            "S": [-0.8],
+            "Y": [-1.3],
+            "P": [-1.6],
+            "H": [-3.2],
+            "E": [-3.5],
+            "Q": [-3.5],
+            "D": [-3.5],
+            "N": [-3.5],
+            "K": [-3.9],
+            "R": [-4.5],
+        },
+        "hopp-woods": {
+            "A": [-0.5],
+            "C": [-1],
+            "D": [3],
+            "E": [3],
+            "F": [-2.5],
+            "G": [0],
+            "H": [-0.5],
+            "I": [-1.8],
+            "K": [3],
+            "L": [-1.8],
+            "M": [-1.3],
+            "N": [0.2],
+            "P": [0],
+            "Q": [0.2],
+            "R": [3],
+            "S": [0.3],
+            "T": [-0.4],
+            "V": [-1.5],
+            "W": [-3.4],
+            "Y": [-2.3],
+        },
+        "isaeci": {
+            "A": [62.9, 0.05],
+            "C": [78.51, 0.15],
+            "D": [18.46, 1.25],
+            "E": [30.19, 1.31],
+            "F": [189.42, 0.14],
+            "G": [19.93, 0.02],
+            "H": [87.38, 0.56],
+            "I": [149.77, 0.09],
+            "K": [102.78, 0.53],
+            "L": [154.35, 0.1],
+            "M": [132.22, 0.34],
+            "N": [19.53, 1.36],
+            "P": [122.35, 0.16],
+            "Q": [17.87, 1.31],
+            "R": [52.98, 1.69],
+            "S": [19.75, 0.56],
+            "T": [59.44, 0.65],
+            "V": [120.91, 0.07],
+            "W": [179.16, 1.08],
+            "Y": [132.16, 0.72],
+        },
+        "janin": {
+            "I": [1.2],
+            "F": [0.87],
+            "V": [1],
+            "L": [0.87],
+            "W": [0.59],
+            "M": [0.73],
+            "A": [0.59],
+            "G": [0.59],
+            "C": [1.4],
+            "Y": [-0.4],
+            "P": [-0.26],
+            "T": [-0.12],
+            "S": [0.02],
+            "H": [0.02],
+            "E": [-0.83],
+            "N": [-0.55],
+            "Q": [-0.83],
+            "D": [-0.69],
+            "K": [-2.4],
+            "R": [-1.8],
+        },
+        "kytedoolittle": {
+            "I": [1.7],
+            "F": [1.1],
+            "V": [1.6],
+            "L": [1.4],
+            "W": [-0.14],
+            "M": [0.8],
+            "A": [0.77],
+            "G": [0.03],
+            "C": [1],
+            "Y": [-0.27],
+            "P": [-0.37],
+            "T": [-0.07],
+            "S": [-0.1],
+            "H": [-0.91],
+            "E": [-1],
+            "N": [-1],
+            "Q": [-1],
+            "D": [-1],
+            "K": [-1.1],
+            "R": [-1.3],
+        },
+        "levitt_alpha": {
+            "A": [1.29],
+            "C": [1.11],
+            "D": [1.04],
+            "E": [1.44],
+            "F": [1.07],
+            "G": [0.56],
+            "H": [1.22],
+            "I": [0.97],
+            "K": [1.23],
+            "L": [1.3],
+            "M": [1.47],
+            "N": [0.9],
+            "P": [0.52],
+            "Q": [1.27],
+            "R": [0.96],
+            "S": [0.82],
+            "T": [0.82],
+            "V": [0.91],
+            "W": [0.99],
+            "Y": [0.72],
+        },
+        "mss": {
+            "A": [13.02],
+            "C": [23.7067],
+            "D": [22.02],
+            "E": [20.0233],
+            "F": [23.5288],
+            "G": [1.01],
+            "H": [23.5283],
+            "I": [22.3611],
+            "K": [18.9756],
+            "L": [19.6944],
+            "M": [21.92],
+            "N": [21.8567],
+            "P": [19.0242],
+            "Q": [19.9689],
+            "R": [19.0434],
+            "S": [18.3533],
+            "T": [22.3567],
+            "V": [21.0267],
+            "W": [26.1975],
+            "Y": [24.1954],
+        },
+        "msw": {
+            "A": [-0.73, 0.2, -0.62],
+            "C": [-0.66, 0.26, -0.27],
+            "D": [0.11, -1, -0.96],
+            "E": [0.24, -0.39, -0.04],
+            "F": [0.76, 0.85, -0.34],
+            "G": [-0.31, -0.28, -0.75],
+            "H": [0.84, 0.67, -0.78],
+            "I": [-0.91, 0.83, -0.25],
+            "K": [-0.51, 0.08, 0.6],
+            "L": [-0.74, 0.72, -0.16],
+            "M": [-0.7, 1, -0.32],
+            "N": [0.14, 0.2, -0.66],
+            "P": [-0.43, 0.73, -0.6],
+            "Q": [0.3, 1, -0.3],
+            "R": [-0.22, 0.27, 1],
+            "S": [-0.8, 0.61, -1],
+            "T": [-0.58, 0.85, -0.89],
+            "V": [-1, 0.79, -0.58],
+            "W": [1, 0.98, -0.47],
+            "Y": [0.97, 0.66, -0.16],
+        },
+        "pepcats": {
+            "A": [1, 0, 0, 0, 0, 0],
+            "C": [1, 0, 1, 1, 0, 0],
+            "D": [0, 0, 1, 0, 0, 1],
+            "E": [0, 0, 1, 0, 0, 1],
+            "F": [1, 1, 0, 0, 0, 0],
+            "G": [0, 0, 0, 0, 0, 0],
+            "H": [1, 1, 0, 1, 1, 0],
+            "I": [1, 0, 0, 0, 0, 0],
+            "K": [1, 0, 0, 1, 1, 0],
+            "L": [1, 0, 0, 0, 0, 0],
+            "M": [1, 0, 1, 0, 0, 0],
+            "N": [0, 0, 1, 1, 0, 0],
+            "P": [1, 0, 0, 0, 0, 0],
+            "Q": [0, 0, 1, 1, 0, 0],
+            "R": [1, 0, 0, 1, 1, 0],
+            "S": [0, 0, 1, 1, 0, 0],
+            "T": [0, 0, 1, 1, 0, 0],
+            "V": [1, 0, 0, 0, 0, 0],
+            "W": [1, 1, 0, 1, 0, 0],
+            "Y": [1, 1, 1, 1, 0, 0],
+        },
+        "peparc": {
+            "A": [1, 0, 0, 0, 0],
+            "C": [0, 1, 0, 0, 0],
+            "D": [0, 1, 0, 1, 0],
+            "E": [0, 1, 0, 1, 0],
+            "F": [1, 0, 0, 0, 0],
+            "G": [0, 0, 0, 0, 0],
+            "H": [0, 1, 1, 0, 0],
+            "I": [1, 0, 0, 0, 0],
+            "K": [0, 1, 1, 0, 0],
+            "L": [1, 0, 0, 0, 0],
+            "M": [1, 0, 0, 0, 0],
+            "N": [0, 1, 0, 0, 0],
+            "P": [0, 0, 0, 0, 1],
+            "Q": [0, 1, 0, 0, 0],
+            "R": [0, 1, 1, 0, 0],
+            "S": [0, 1, 0, 0, 0],
+            "T": [0, 1, 0, 0, 0],
+            "V": [1, 0, 0, 0, 0],
+            "W": [1, 0, 0, 0, 0],
+            "Y": [1, 0, 0, 0, 0],
+        },
+        "polarity": {
+            "A": [0.395],
+            "C": [0.074],
+            "D": [1.0],
+            "E": [0.914],
+            "F": [0.037],
+            "G": [0.506],
+            "H": [0.679],
+            "I": [0.037],
+            "K": [0.79],
+            "L": [0.0],
+            "M": [0.099],
+            "N": [0.827],
+            "P": [0.383],
+            "Q": [0.691],
+            "R": [0.691],
+            "S": [0.531],
+            "T": [0.457],
+            "V": [0.123],
+            "W": [0.062],
+            "Y": [0.16],
+        },
+        "ppcali": {
+            "A": [
+                0.070781,
+                0.036271,
+                2.042,
+                0.083272,
+                0.69089,
+                0.15948,
+                -0.80893,
+                0.24698,
+                0.86525,
+                0.68563,
+                -0.24665,
+                0.61314,
+                -0.53343,
+                -0.50878,
+                -1.3646,
+                2.2679,
+                -1.5644,
+                -0.75043,
+                -0.65875,
+            ],
+            "C": [
+                0.61013,
+                -0.93043,
+                -0.85983,
+                -2.2704,
+                1.5877,
+                -2.0066,
+                -0.30314,
+                1.2544,
+                -0.2832,
+                -1.2844,
+                -0.73449,
+                -0.11235,
+                -0.41152,
+                -0.0050164,
+                0.28307,
+                0.20522,
+                -0.021084,
+                -0.15627,
+                -0.32689,
+            ],
+            "D": [
+                -1.3215,
+                0.24063,
+                -0.032754,
+                -0.37863,
+                1.2051,
+                1.0001,
+                2.1827,
+                0.19212,
+                -0.60529,
+                0.37639,
+                -0.46451,
+                -0.46788,
+                1.4077,
+                -2.1661,
+                0.72604,
+                -0.12332,
+                -0.8243,
+                -0.082989,
+                0.053476,
+            ],
+            "E": [
+                -0.87713,
+                1.4905,
+                1.0755,
+                0.35944,
+                1.567,
+                0.41365,
+                1.0944,
+                0.72634,
+                -0.74957,
+                0.038939,
+                0.075057,
+                0.78637,
+                -1.4543,
+                1.6667,
+                -0.097439,
+                -0.24293,
+                1.7687,
+                0.36174,
+                -0.11585,
+            ],
+            "F": [
+                1.3557,
+                -0.10336,
+                -0.4309,
+                0.41269,
+                -0.083356,
+                0.83783,
+                0.095381,
+                -0.65222,
+                -0.3119,
+                0.43293,
+                -1.0011,
+                -0.66855,
+                -0.10242,
+                1.2066,
+                2.6234,
+                1.9981,
+                -0.25016,
+                0.71979,
+                0.21569,
+            ],
+            "G": [
+                -1.0818,
+                -2.1561,
+                0.77082,
+                -0.92747,
+                -1.0748,
+                1.7997,
+                -1.3708,
+                1.279,
+                -1.2098,
+                0.46065,
+                0.43076,
+                0.20037,
+                -0.2302,
+                0.2646,
+                0.57149,
+                -0.68432,
+                0.19341,
+                -0.061606,
+                -0.08071,
+            ],
+            "H": [
+                -0.050161,
+                0.69246,
+                -0.88397,
+                -0.64601,
+                0.24622,
+                0.10487,
+                -1.1317,
+                -2.3661,
+                -0.89918,
+                0.46391,
+                -0.62359,
+                2.5478,
+                -0.34737,
+                -0.52062,
+                0.17522,
+                -0.88648,
+                -0.4755,
+                0.023187,
+                -0.28261,
+            ],
+            "I": [
+                1.4829,
+                -0.46435,
+                0.50189,
+                0.55724,
+                -0.51535,
+                -0.29914,
+                0.97236,
+                -0.15793,
+                -0.98246,
+                -0.54347,
+                0.97806,
+                0.37577,
+                1.618,
+                0.62323,
+                -0.59359,
+                -0.35483,
+                -0.085017,
+                0.55825,
+                -2.7542,
+            ],
+            "K": [
+                -0.85344,
+                1.529,
+                0.27747,
+                0.32993,
+                -1.1786,
+                -0.16633,
+                -1.0459,
+                0.44621,
+                0.41027,
+                -2.5318,
+                0.91329,
+                0.53385,
+                0.61417,
+                -1.111,
+                1.1323,
+                0.95105,
+                0.76769,
+                -0.016115,
+                0.054995,
+            ],
+            "L": [
+                1.2857,
+                0.039488,
+                1.5378,
+                0.87969,
+                -0.21419,
+                0.40389,
+                -0.20426,
+                -0.14351,
+                0.61024,
+                -1.1927,
+                -2.2149,
+                -0.84248,
+                -0.5061,
+                -0.48548,
+                0.10791,
+                -2.1503,
+                -0.12006,
+                -0.60222,
+                0.26546,
+            ],
+            "M": [
+                1.137,
+                0.64388,
+                0.13724,
+                -0.2988,
+                1.2288,
+                0.24981,
+                -1.6427,
+                -0.75868,
+                -0.54902,
+                1.0571,
+                1.272,
+                -1.9104,
+                0.70919,
+                -0.93575,
+                -0.6314,
+                -0.079654,
+                1.634,
+                -0.0021923,
+                0.49825,
+            ],
+            "N": [
+                -1.084,
+                -0.176,
+                -0.47062,
+                -0.92245,
+                -0.32953,
+                0.74278,
+                0.34551,
+                -1.4605,
+                0.25219,
+                -1.2107,
+                -0.59978,
+                -0.79183,
+                1.3268,
+                1.9839,
+                -1.6137,
+                0.5333,
+                0.033889,
+                -1.0331,
+                0.83019,
+            ],
+            "P": [
+                -1.1823,
+                -1.6911,
+                -1.1331,
+                3.073,
+                1.1942,
+                -0.93426,
+                -0.72985,
+                -0.042441,
+                -0.19264,
+                -0.21603,
+                -0.1239,
+                0.054016,
+                0.15241,
+                -0.019691,
+                -0.20543,
+                0.10206,
+                0.07671,
+                -0.081968,
+                0.20348,
+            ],
+            "Q": [
+                -0.57747,
+                0.97452,
+                -0.077547,
+                -0.0033488,
+                0.17184,
+                -0.52537,
+                -0.27362,
+                -0.1366,
+                0.2057,
+                -0.013066,
+                1.8834,
+                -1.2736,
+                -0.84991,
+                1.0445,
+                0.69027,
+                -1.2866,
+                -2.6776,
+                0.1683,
+                0.086105,
+            ],
+            "R": [
+                -0.62245,
+                1.545,
+                -0.61966,
+                0.19057,
+                -1.7485,
+                -1.3909,
+                -0.47526,
+                1.3938,
+                -0.84556,
+                1.7344,
+                -1.6516,
+                -0.52678,
+                0.6791,
+                0.24374,
+                -0.62551,
+                -0.0028271,
+                -0.053884,
+                0.14926,
+                -0.17232,
+            ],
+            "S": [
+                -0.86409,
+                -0.77147,
+                0.38542,
+                -0.59389,
+                -0.53313,
+                -0.47585,
+                0.31966,
+                -0.89716,
+                1.8029,
+                0.26431,
+                -0.23173,
+                -0.37626,
+                -0.47349,
+                -0.42878,
+                -0.47297,
+                -0.079826,
+                0.57043,
+                3.2057,
+                -0.18413,
+            ],
+            "T": [
+                -0.33027,
+                -0.57447,
+                0.18653,
+                -0.28941,
+                -0.62681,
+                -1.0737,
+                0.80363,
+                -0.59525,
+                1.8786,
+                1.3971,
+                0.63929,
+                0.21281,
+                -0.067048,
+                0.096271,
+                1.323,
+                -0.36173,
+                1.2261,
+                -2.2771,
+                -0.65412,
+            ],
+            "V": [
+                1.1675,
+                -0.61554,
+                0.95405,
+                0.11662,
+                -0.74473,
+                -1.1482,
+                1.1309,
+                0.12079,
+                -0.77171,
+                0.18597,
+                0.93442,
+                1.201,
+                0.3826,
+                -0.091573,
+                -0.31269,
+                0.074367,
+                -0.22946,
+                0.24322,
+                2.9836,
+            ],
+            "W": [
+                1.1881,
+                0.43789,
+                -1.7915,
+                0.138,
+                0.43088,
+                1.6467,
+                -0.11987,
+                1.7369,
+                2.0818,
+                0.33122,
+                0.31829,
+                1.1586,
+                0.67649,
+                0.30819,
+                -0.55772,
+                -0.54491,
+                -0.17969,
+                0.24477,
+                0.38674,
+            ],
+            "Y": [
+                0.54671,
+                -0.1468,
+                -1.5688,
+                0.19001,
+                -1.2736,
+                0.66162,
+                1.1614,
+                -0.18614,
+                -0.70654,
+                -0.43634,
+                0.44775,
+                -0.71366,
+                -2.5907,
+                -1.1649,
+                -1.1576,
+                0.66572,
+                0.21019,
+                -0.61016,
+                -0.34844,
+            ],
+        },
+        "refractivity": {
+            "A": [0.102045615],
+            "C": [0.841053374],
+            "D": [0.282153774],
+            "E": [0.405831178],
+            "F": [0.691276746],
+            "G": [0],
+            "H": [0.512814484],
+            "I": [0.448154244],
+            "K": [0.50058782],
+            "L": [0.441570656],
+            "M": [0.508817305],
+            "N": [0.282153774],
+            "P": [0.256995062],
+            "Q": [0.405831178],
+            "R": [0.626851634],
+            "S": [0.149306372],
+            "T": [0.258876087],
+            "V": [0.327298378],
+            "W": [1],
+            "Y": [0.741359041],
+        },
+        "t_scale": {
+            "A": [-8.4, -8.01, -3.73, -3.65, -6.12, -1.59, 1.56],
+            "C": [-2.44, -1.96, 0.93, -2.35, 1.31, 2.29, -1.52],
+            "D": [-6.84, -0.94, 17.68, -0.03, 3.44, 9.07, 4.32],
+            "E": [-6.5, 16.2, 17.28, 3.11, -4.75, -2.54, 4.72],
+            "F": [21.59, -5.73, 1.03, -3.3, 2.64, -5.02, 1.7],
+            "G": [-8.48, -10.37, -5.14, -6.51, -11.84, -3.6, 2.01],
+            "H": [15.28, -3.67, 6.72, -6.38, 4.12, -1.55, -2.85],
+            "I": [-2.97, 4.64, -0.77, 11, 3.26, -4.36, -7.88],
+            "K": [2.7, 13.46, -14.03, -2.55, 2.77, 0.15, 3.19],
+            "L": [2.61, 5.96, 1.97, 2.59, -4.77, -4.84, -5.44],
+            "M": [3.38, 12.43, -4.77, 0.45, -1.55, -0.6, 3.26],
+            "N": [-3.11, -1.22, 6.26, -9.38, 9.94, 7.66, -4.81],
+            "P": [-5.35, -9.07, -1.52, -8.79, -8.73, 4.29, -9.91],
+            "Q": [-5.31, 15.64, 8.44, 1.03, -4.32, -4.4, -0.52],
+            "R": [-2.27, 18.9, -18.24, -3.47, 3.03, 6.64, 0.45],
+            "S": [-15.88, -11.21, -2.44, -3.61, 3.46, -0.37, 8.98],
+            "T": [-17.81, -13.64, -5.19, 10.57, 6.91, -4.43, 3.49],
+            "V": [-5.8, -6.15, -2.26, 9.87, 5.28, -1.49, -7.54],
+            "W": [21.68, -8.78, -2.53, 15.53, -8.15, 11.98, 3.23],
+            "Y": [23.9, -6.47, 0.31, -4.14, 4.08, -7.28, 3.59],
+        },
+        "tm_tend": {
+            "A": [0.38],
+            "C": [-0.3],
+            "D": [-3.27],
+            "E": [-2.9],
+            "F": [1.98],
+            "G": [-0.19],
+            "H": [-1.44],
+            "I": [1.97],
+            "K": [-3.46],
+            "L": [1.82],
+            "M": [1.4],
+            "N": [-1.62],
+            "P": [-1.44],
+            "Q": [-1.84],
+            "R": [-2.57],
+            "S": [-0.53],
+            "T": [-0.32],
+            "V": [1.46],
+            "W": [1.53],
+            "Y": [0.49],
+        },
+        "z3": {
+            "A": [0.07, -1.73, 0.09],
+            "C": [0.71, -0.97, 4.13],
+            "D": [3.64, 1.13, 2.36],
+            "E": [3.08, 0.39, -0.07],
+            "F": [-4.92, 1.3, 0.45],
+            "G": [2.23, -5.36, 0.3],
+            "H": [2.41, 1.74, 1.11],
+            "I": [-4.44, -1.68, -1.03],
+            "K": [2.84, 1.41, -3.14],
+            "L": [-4.19, -1.03, -0.98],
+            "M": [-2.49, -0.27, -0.41],
+            "N": [3.22, 1.45, 0.84],
+            "P": [-1.22, 0.88, 2.23],
+            "Q": [2.18, 0.53, -1.14],
+            "R": [2.88, 2.52, -3.44],
+            "S": [1.96, -1.63, 0.57],
+            "T": [0.92, -2.09, -1.4],
+            "V": [-2.69, -2.53, -1.29],
+            "W": [-4.75, 3.65, 0.85],
+            "Y": [-1.39, 2.32, 0.01],
+        },
+        "z5": {
+            "A": [0.24, -2.32, 0.6, -0.14, 1.3],
+            "C": [0.84, -1.67, 3.71, 0.18, -2.65],
+            "D": [3.98, 0.93, 1.93, -2.46, 0.75],
+            "E": [3.11, 0.26, -0.11, -3.04, -0.25],
+            "F": [-4.22, 1.94, 1.06, 0.54, -0.62],
+            "G": [2.05, -4.06, 0.36, -0.82, -0.38],
+            "H": [2.47, 1.95, 0.26, 3.9, 0.09],
+            "I": [-3.89, -1.73, -1.71, -0.84, 0.26],
+            "K": [2.29, 0.89, -2.49, 1.49, 0.31],
+            "L": [-4.28, -1.3, -1.49, -0.72, 0.84],
+            "M": [-2.85, -0.22, 0.47, 1.94, -0.98],
+            "N": [3.05, 1.62, 1.04, -1.15, 1.61],
+            "P": [-1.66, 0.27, 1.84, 0.7, 2],
+            "Q": [1.75, 0.5, -1.44, -1.34, 0.66],
+            "R": [3.52, 2.5, -3.5, 1.99, -0.17],
+            "S": [2.39, -1.07, 1.15, -1.39, 0.67],
+            "T": [0.75, -2.18, -1.12, -1.46, -0.4],
+            "V": [-2.59, -2.64, -1.54, -0.85, -0.02],
+            "W": [-4.36, 3.94, 0.59, 3.44, -1.59],
+            "Y": [-2.54, 2.44, 0.43, 0.04, -1.47],
+        },
+    }
+    if scalename == "all":
+        d = {
+            "I": [],
+            "F": [],
+            "V": [],
+            "L": [],
+            "W": [],
+            "M": [],
+            "A": [],
+            "G": [],
+            "C": [],
+            "Y": [],
+            "P": [],
+            "T": [],
+            "S": [],
+            "H": [],
+            "E": [],
+            "N": [],
+            "Q": [],
+            "D": [],
+            "K": [],
+            "R": [],
+        }
+        for scale in scales.keys():
+            for k, v in scales[scale].items():
+                d[k].extend(v)
+        return "all", d
+
+    elif scalename == "instability":
+        d = {
+            "A": {
+                "A": 1.0,
+                "C": 44.94,
+                "E": 1.0,
+                "D": -7.49,
+                "G": 1.0,
+                "F": 1.0,
+                "I": 1.0,
+                "H": -7.49,
+                "K": 1.0,
+                "M": 1.0,
+                "L": 1.0,
+                "N": 1.0,
+                "Q": 1.0,
+                "P": 20.26,
+                "S": 1.0,
+                "R": 1.0,
+                "T": 1.0,
+                "W": 1.0,
+                "V": 1.0,
+                "Y": 1.0,
+            },
+            "C": {
+                "A": 1.0,
+                "C": 1.0,
+                "E": 1.0,
+                "D": 20.26,
+                "G": 1.0,
+                "F": 1.0,
+                "I": 1.0,
+                "H": 33.6,
+                "K": 1.0,
+                "M": 33.6,
+                "L": 20.26,
+                "N": 1.0,
+                "Q": -6.54,
+                "P": 20.26,
+                "S": 1.0,
+                "R": 1.0,
+                "T": 33.6,
+                "W": 24.68,
+                "V": -6.54,
+                "Y": 1.0,
+            },
+            "E": {
+                "A": 1.0,
+                "C": 44.94,
+                "E": 33.6,
+                "D": 20.26,
+                "G": 1.0,
+                "F": 1.0,
+                "I": 20.26,
+                "H": -6.54,
+                "K": 1.0,
+                "M": 1.0,
+                "L": 1.0,
+                "N": 1.0,
+                "Q": 20.26,
+                "P": 20.26,
+                "S": 20.26,
+                "R": 1.0,
+                "T": 1.0,
+                "W": -14.03,
+                "V": 1.0,
+                "Y": 1.0,
+            },
+            "D": {
+                "A": 1.0,
+                "C": 1.0,
+                "E": 1.0,
+                "D": 1.0,
+                "G": 1.0,
+                "F": -6.54,
+                "I": 1.0,
+                "H": 1.0,
+                "K": -7.49,
+                "M": 1.0,
+                "L": 1.0,
+                "N": 1.0,
+                "Q": 1.0,
+                "P": 1.0,
+                "S": 20.26,
+                "R": -6.54,
+                "T": -14.03,
+                "W": 1.0,
+                "V": 1.0,
+                "Y": 1.0,
+            },
+            "G": {
+                "A": -7.49,
+                "C": 1.0,
+                "E": -6.54,
+                "D": 1.0,
+                "G": 13.34,
+                "F": 1.0,
+                "I": -7.49,
+                "H": 1.0,
+                "K": -7.49,
+                "M": 1.0,
+                "L": 1.0,
+                "N": -7.49,
+                "Q": 1.0,
+                "P": 1.0,
+                "S": 1.0,
+                "R": 1.0,
+                "T": -7.49,
+                "W": 13.34,
+                "V": 1.0,
+                "Y": -7.49,
+            },
+            "F": {
+                "A": 1.0,
+                "C": 1.0,
+                "E": 1.0,
+                "D": 13.34,
+                "G": 1.0,
+                "F": 1.0,
+                "I": 1.0,
+                "H": 1.0,
+                "K": -14.03,
+                "M": 1.0,
+                "L": 1.0,
+                "N": 1.0,
+                "Q": 1.0,
+                "P": 20.26,
+                "S": 1.0,
+                "R": 1.0,
+                "T": 1.0,
+                "W": 1.0,
+                "V": 1.0,
+                "Y": 33.601,
+            },
+            "I": {
+                "A": 1.0,
+                "C": 1.0,
+                "E": 44.94,
+                "D": 1.0,
+                "G": 1.0,
+                "F": 1.0,
+                "I": 1.0,
+                "H": 13.34,
+                "K": -7.49,
+                "M": 1.0,
+                "L": 20.26,
+                "N": 1.0,
+                "Q": 1.0,
+                "P": -1.88,
+                "S": 1.0,
+                "R": 1.0,
+                "T": 1.0,
+                "W": 1.0,
+                "V": -7.49,
+                "Y": 1.0,
+            },
+            "H": {
+                "A": 1.0,
+                "C": 1.0,
+                "E": 1.0,
+                "D": 1.0,
+                "G": -9.37,
+                "F": -9.37,
+                "I": 44.94,
+                "H": 1.0,
+                "K": 24.68,
+                "M": 1.0,
+                "L": 1.0,
+                "N": 24.68,
+                "Q": 1.0,
+                "P": -1.88,
+                "S": 1.0,
+                "R": 1.0,
+                "T": -6.54,
+                "W": -1.88,
+                "V": 1.0,
+                "Y": 44.94,
+            },
+            "K": {
+                "A": 1.0,
+                "C": 1.0,
+                "E": 1.0,
+                "D": 1.0,
+                "G": -7.49,
+                "F": 1.0,
+                "I": -7.49,
+                "H": 1.0,
+                "K": 1.0,
+                "M": 33.6,
+                "L": -7.49,
+                "N": 1.0,
+                "Q": 24.64,
+                "P": -6.54,
+                "S": 1.0,
+                "R": 33.6,
+                "T": 1.0,
+                "W": 1.0,
+                "V": -7.49,
+                "Y": 1.0,
+            },
+            "M": {
+                "A": 13.34,
+                "C": 1.0,
+                "E": 1.0,
+                "D": 1.0,
+                "G": 1.0,
+                "F": 1.0,
+                "I": 1.0,
+                "H": 58.28,
+                "K": 1.0,
+                "M": -1.88,
+                "L": 1.0,
+                "N": 1.0,
+                "Q": -6.54,
+                "P": 44.94,
+                "S": 44.94,
+                "R": -6.54,
+                "T": -1.88,
+                "W": 1.0,
+                "V": 1.0,
+                "Y": 24.68,
+            },
+            "L": {
+                "A": 1.0,
+                "C": 1.0,
+                "E": 1.0,
+                "D": 1.0,
+                "G": 1.0,
+                "F": 1.0,
+                "I": 1.0,
+                "H": 1.0,
+                "K": -7.49,
+                "M": 1.0,
+                "L": 1.0,
+                "N": 1.0,
+                "Q": 33.6,
+                "P": 20.26,
+                "S": 1.0,
+                "R": 20.26,
+                "T": 1.0,
+                "W": 24.68,
+                "V": 1.0,
+                "Y": 1.0,
+            },
+            "N": {
+                "A": 1.0,
+                "C": -1.88,
+                "E": 1.0,
+                "D": 1.0,
+                "G": -14.03,
+                "F": -14.03,
+                "I": 44.94,
+                "H": 1.0,
+                "K": 24.68,
+                "M": 1.0,
+                "L": 1.0,
+                "N": 1.0,
+                "Q": -6.54,
+                "P": -1.88,
+                "S": 1.0,
+                "R": 1.0,
+                "T": -7.49,
+                "W": -9.37,
+                "V": 1.0,
+                "Y": 1.0,
+            },
+            "Q": {
+                "A": 1.0,
+                "C": -6.54,
+                "E": 20.26,
+                "D": 20.26,
+                "G": 1.0,
+                "F": -6.54,
+                "I": 1.0,
+                "H": 1.0,
+                "K": 1.0,
+                "M": 1.0,
+                "L": 1.0,
+                "N": 1.0,
+                "Q": 20.26,
+                "P": 20.26,
+                "S": 44.94,
+                "R": 1.0,
+                "T": 1.0,
+                "W": 1.0,
+                "V": -6.54,
+                "Y": -6.54,
+            },
+            "P": {
+                "A": 20.26,
+                "C": -6.54,
+                "E": 18.38,
+                "D": -6.54,
+                "G": 1.0,
+                "F": 20.26,
+                "I": 1.0,
+                "H": 1.0,
+                "K": 1.0,
+                "M": -6.54,
+                "L": 1.0,
+                "N": 1.0,
+                "Q": 20.26,
+                "P": 20.26,
+                "S": 20.26,
+                "R": -6.54,
+                "T": 1.0,
+                "W": -1.88,
+                "V": 20.26,
+                "Y": 1.0,
+            },
+            "S": {
+                "A": 1.0,
+                "C": 33.6,
+                "E": 20.26,
+                "D": 1.0,
+                "G": 1.0,
+                "F": 1.0,
+                "I": 1.0,
+                "H": 1.0,
+                "K": 1.0,
+                "M": 1.0,
+                "L": 1.0,
+                "N": 1.0,
+                "Q": 20.26,
+                "P": 44.94,
+                "S": 20.26,
+                "R": 20.26,
+                "T": 1.0,
+                "W": 1.0,
+                "V": 1.0,
+                "Y": 1.0,
+            },
+            "R": {
+                "A": 1.0,
+                "C": 1.0,
+                "E": 1.0,
+                "D": 1.0,
+                "G": -7.49,
+                "F": 1.0,
+                "I": 1.0,
+                "H": 20.26,
+                "K": 1.0,
+                "M": 1.0,
+                "L": 1.0,
+                "N": 13.34,
+                "Q": 20.26,
+                "P": 20.26,
+                "S": 44.94,
+                "R": 58.28,
+                "T": 1.0,
+                "W": 58.28,
+                "V": 1.0,
+                "Y": -6.54,
+            },
+            "T": {
+                "A": 1.0,
+                "C": 1.0,
+                "E": 20.26,
+                "D": 1.0,
+                "G": -7.49,
+                "F": 13.34,
+                "I": 1.0,
+                "H": 1.0,
+                "K": 1.0,
+                "M": 1.0,
+                "L": 1.0,
+                "N": -14.03,
+                "Q": -6.54,
+                "P": 1.0,
+                "S": 1.0,
+                "R": 1.0,
+                "T": 1.0,
+                "W": -14.03,
+                "V": 1.0,
+                "Y": 1.0,
+            },
+            "W": {
+                "A": -14.03,
+                "C": 1.0,
+                "E": 1.0,
+                "D": 1.0,
+                "G": -9.37,
+                "F": 1.0,
+                "I": 1.0,
+                "H": 24.68,
+                "K": 1.0,
+                "M": 24.68,
+                "L": 13.34,
+                "N": 13.34,
+                "Q": 1.0,
+                "P": 1.0,
+                "S": 1.0,
+                "R": 1.0,
+                "T": -14.03,
+                "W": 1.0,
+                "V": -7.49,
+                "Y": 1.0,
+            },
+            "V": {
+                "A": 1.0,
+                "C": 1.0,
+                "E": 1.0,
+                "D": -14.03,
+                "G": -7.49,
+                "F": 1.0,
+                "I": 1.0,
+                "H": 1.0,
+                "K": -1.88,
+                "M": 1.0,
+                "L": 1.0,
+                "N": 1.0,
+                "Q": 1.0,
+                "P": 20.26,
+                "S": 1.0,
+                "R": 1.0,
+                "T": -7.49,
+                "W": 1.0,
+                "V": 1.0,
+                "Y": -6.54,
+            },
+            "Y": {
+                "A": 24.68,
+                "C": 1.0,
+                "E": -6.54,
+                "D": 24.68,
+                "G": -7.49,
+                "F": 1.0,
+                "I": 1.0,
+                "H": 13.34,
+                "K": 1.0,
+                "M": 44.94,
+                "L": 1.0,
+                "N": 1.0,
+                "Q": 1.0,
+                "P": 13.34,
+                "S": 1.0,
+                "R": -15.91,
+                "T": -7.49,
+                "W": -9.37,
+                "V": 1.0,
+                "Y": 13.34,
+            },
+        }
+        return "instability", d
+
+    else:
+        return scalename, scales[scalename]
+
+
+def read_fasta(inputfile):
+    """Method for loading sequences from a FASTA formatted file into :py:attr:`sequences` & :py:attr:`names`.
+    This method is used by the base class :class:`modlamp.descriptors.PeptideDescriptor` if the input is a FASTA file.
+
+    :param inputfile: .fasta file with sequences and headers to read
+    :return: list of sequences in the attribute :py:attr:`sequences` with corresponding sequence names in
+        :py:attr:`names`.
+    """
+    names = list()  # list for storing names
+    sequences = list()  # list for storing sequences
+    seq = str()
+    with open(inputfile) as f:
+        all = f.readlines()
+        last = all[-1]
+        for line in all:
+            if line.startswith(">"):
+                names.append(
+                    line.split(" ")[0][1:].strip()
+                )  # add FASTA name without description as molecule name
+                sequences.append(seq.strip())
+                seq = str()
+            elif line == last:
+                seq += line.strip()  # remove potential white space
+                sequences.append(seq.strip())
+            else:
+                seq += line.strip()  # remove potential white space
+    return sequences[1:], names
+
+
+def save_fasta(filename, sequences, names=None):
+    """Method for saving sequences in the instance :py:attr:`sequences` to a file in FASTA format.
+
+    :param filename: {str} output filename (ending .fasta)
+    :param sequences: {list} sequences to be saved to file
+    :param names: {list} whether sequence names from self.names should be saved as sequence identifiers
+    :return: a FASTA formatted file containing the generated sequences
+    """
+    if os.path.exists(filename):
+        os.remove(filename)  # remove outputfile, it it exists
+
+    with open(filename, "w") as o:
+        for n, seq in enumerate(sequences):
+            if names:
+                o.write(">" + str(names[n]) + "\n")
+            else:
+                o.write(">Seq_" + str(n) + "\n")
+            o.write(seq + "\n")
+
+
+def aa_weights():
+    """Function holding molecular weight data on all natural amino acids.
+
+    :return: dictionary with amino acid letters and corresponding weights
+
+    .. versionadded:: v2.4.1
+    """
+    weights = {
+        "A": 89.093,
+        "C": 121.158,
+        "D": 133.103,
+        "E": 147.129,
+        "F": 165.189,
+        "G": 75.067,
+        "H": 155.155,
+        "I": 131.173,
+        "K": 146.188,
+        "L": 131.173,
+        "M": 149.211,
+        "N": 132.118,
+        "P": 115.131,
+        "Q": 146.145,
+        "R": 174.20,
+        "S": 105.093,
+        "T": 119.119,
+        "V": 117.146,
+        "W": 204.225,
+        "Y": 181.189,
+    }
+    return weights
+
+
+def count_aas(seq, scale="relative"):
+    """Function to count the amino acids occuring in a given sequence.
+
+    :param seq: {str} amino acid sequence
+    :param scale: {'absolute' or 'relative'} defines whether counts or frequencies are given for each AA
+    :return: {dict} dictionary with amino acids as keys and their counts in the sequence as values.
+    """
+    if seq == "":  # error if len(seq) == 0
+        seq = " "
+    aas = [
+        "A",
+        "C",
+        "D",
+        "E",
+        "F",
+        "G",
+        "H",
+        "I",
+        "K",
+        "L",
+        "M",
+        "N",
+        "P",
+        "Q",
+        "R",
+        "S",
+        "T",
+        "V",
+        "W",
+        "Y",
+    ]
+    scl = 1.0
+    if scale == "relative":
+        scl = len(seq)
+    aa = {a: (float(seq.count(a)) / scl) for a in aas}
+    aa = collections.OrderedDict(sorted(list(aa.items())))
+    return aa
+
+
+def count_ngrams(seq, n):
+    """Function to count the n-grams of an amino acid sequence. N can be one integer or a list of integers
+
+    :param seq: {str} amino acid sequence
+    :param n: {int or list of ints} defines whether counts or frequencies are given for each AA
+    :return: {dict} dictionary with n-grams as keys and their counts in the sequence as values.
+    """
+    if seq == "":
+        seq = " "
+    if isinstance(n, int):
+        n = [n]
+    ngrams = list()
+    for i in n:
+        ngrams.extend([seq[j : j + i] for j in range(len(seq) - (i - 1))])
+    counts = {g: (seq.count(g)) for g in set(ngrams)}
+    counts = collections.OrderedDict(
+        sorted(counts.items(), key=operator.itemgetter(1), reverse=True)
+    )
+    return counts
+
+
+def aa_energies():
+    """Function holding free energies of transfer between cyclohexane and water for all natural amino acids.
+    H. G. Boman, D. Wade, I. a Boman, B. Wåhlin, R. B. Merrifield, *FEBS Lett*. **1989**, *259*, 103–106.
+
+    :return: dictionary with amino acid letters and corresponding energies.
+    """
+    energies = {
+        "L": -4.92,
+        "I": -4.92,
+        "V": -4.04,
+        "F": -2.98,
+        "M": -2.35,
+        "W": -2.33,
+        "A": -1.81,
+        "C": -1.28,
+        "G": -0.94,
+        "Y": 0.14,
+        "T": 2.57,
+        "S": 3.40,
+        "H": 4.66,
+        "Q": 5.54,
+        "K": 5.55,
+        "N": 6.64,
+        "E": 6.81,
+        "D": 8.72,
+        "R": 14.92,
+        "P": 0.0,
+    }
+    return energies
+
+
+def ngrams_apd():
+    """Function returning the most frequent 2-, 3- and 4-grams from all sequences in the `APD3
+    <http://aps.unmc.edu/AP/>`_, version August 2016 with 2727 sequences.
+    For all 2, 3 and 4grams, all possible ngrams were generated from all sequences and the top 50 most frequent
+    assembled into a list. Finally, leading and tailing spaces were striped and duplicates as well as ngrams containing
+    spaces were removed.
+
+    :return: numpy.array containing most frequent ngrams
+    """
+    return np.array(
+        [
+            "AGK",
+            "CKI",
+            "RR",
+            "YGGG",
+            "LSGL",
+            "RG",
+            "YGGY",
+            "PRP",
+            "LGGG",
+            "GV",
+            "GT",
+            "GS",
+            "GR",
+            "IAG",
+            "GG",
+            "GF",
+            "GC",
+            "GGYG",
+            "GA",
+            "GL",
+            "GK",
+            "GI",
+            "IPC",
+            "KAA",
+            "LAK",
+            "GLGG",
+            "GGLG",
+            "CKIT",
+            "GAGK",
+            "LLSG",
+            "LKK",
+            "FLP",
+            "LSG",
+            "SCK",
+            "LLS",
+            "GETC",
+            "VLG",
+            "GKLL",
+            "LLG",
+            "C",
+            "KCKI",
+            "G",
+            "VGK",
+            "CSC",
+            "TKKC",
+            "GCS",
+            "GKA",
+            "IGK",
+            "GESC",
+            "KVCY",
+            "KKL",
+            "KKI",
+            "KKC",
+            "LGGL",
+            "GLL",
+            "CGE",
+            "GGYC",
+            "GLLS",
+            "GLF",
+            "AKK",
+            "GKAA",
+            "ESCV",
+            "GLP",
+            "CGES",
+            "PCGE",
+            "FL",
+            "CGET",
+            "GLW",
+            "KGAA",
+            "KAAL",
+            "GGY",
+            "GGG",
+            "IKG",
+            "LKG",
+            "GGL",
+            "CK",
+            "GTC",
+            "CG",
+            "SKKC",
+            "CS",
+            "CR",
+            "KC",
+            "AGKA",
+            "KA",
+            "KG",
+            "LKCK",
+            "SCKL",
+            "KK",
+            "KI",
+            "KN",
+            "KL",
+            "SK",
+            "KV",
+            "SL",
+            "SC",
+            "SG",
+            "AAA",
+            "VAK",
+            "AAL",
+            "AAK",
+            "GGGG",
+            "KNVA",
+            "GGGL",
+            "GYG",
+            "LG",
+            "LA",
+            "LL",
+            "LK",
+            "LS",
+            "LP",
+            "GCSC",
+            "TC",
+            "GAA",
+            "AA",
+            "VA",
+            "VC",
+            "AG",
+            "VG",
+            "AI",
+            "AK",
+            "VL",
+            "AL",
+            "TPGC",
+            "IK",
+            "IA",
+            "IG",
+            "YGG",
+            "LGK",
+            "CSCK",
+            "GYGG",
+            "LGG",
+            "KGA",
+        ]
+    )
+
+
+def aa_formulas():
+    """
+    Function returning the molecular formulas of all amino acids. All amino acids are considered in the neutral form
+    (uncharged).
+    """
+    formulas = {
+        "A": {"C": 3, "H": 7, "N": 1, "O": 2, "S": 0},
+        "C": {"C": 3, "H": 7, "N": 1, "O": 2, "S": 1},
+        "D": {"C": 4, "H": 7, "N": 1, "O": 4, "S": 0},
+        "E": {"C": 5, "H": 9, "N": 1, "O": 4, "S": 0},
+        "F": {"C": 9, "H": 11, "N": 1, "O": 2, "S": 0},
+        "G": {"C": 2, "H": 5, "N": 1, "O": 2, "S": 0},
+        "H": {"C": 6, "H": 9, "N": 3, "O": 2, "S": 0},
+        "I": {"C": 6, "H": 13, "N": 1, "O": 2, "S": 0},
+        "K": {"C": 6, "H": 14, "N": 2, "O": 2, "S": 0},
+        "L": {"C": 6, "H": 13, "N": 1, "O": 2, "S": 0},
+        "M": {"C": 5, "H": 11, "N": 1, "O": 2, "S": 1},
+        "N": {"C": 4, "H": 8, "N": 2, "O": 3, "S": 0},
+        "P": {"C": 5, "H": 9, "N": 1, "O": 2, "S": 0},
+        "Q": {"C": 5, "H": 10, "N": 2, "O": 3, "S": 0},
+        "R": {"C": 6, "H": 14, "N": 4, "O": 2, "S": 0},
+        "S": {"C": 3, "H": 7, "N": 1, "O": 3, "S": 0},
+        "T": {"C": 4, "H": 9, "N": 1, "O": 3, "S": 0},
+        "V": {"C": 5, "H": 11, "N": 1, "O": 2, "S": 0},
+        "W": {"C": 11, "H": 12, "N": 2, "O": 2, "S": 0},
+        "Y": {"C": 9, "H": 11, "N": 1, "O": 3, "S": 0},
+    }
+    return formulas
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plotWheels/descriptors.py	Mon Jun 05 02:44:43 2023 +0000
@@ -0,0 +1,1183 @@
+# -*- coding: utf-8 -*-
+"""
+.. currentmodule:: modlamp.descriptors
+
+.. moduleauthor:: modlab Alex Mueller ETH Zurich <alex.mueller@pharma.ethz.ch>
+
+This module incorporates different classes to calculate peptide descriptor values. The following classes are available:
+
+=============================        ============================================================================
+Class                                Characteristics
+=============================        ============================================================================
+:py:class:`GlobalDescriptor`         Global one-dimensional peptide descriptors calculated from the AA sequence.
+:py:class:`PeptideDescriptor`        AA scale based global or convoluted descriptors (auto-/cross-correlated).
+=============================        ============================================================================
+
+.. seealso:: :class:`modlamp.core.BaseDescriptor` from which the classes in :mod:`modlamp.descriptors` inherit.
+"""
+
+import sys
+
+import numpy as np
+from scipy import stats
+from sklearn.externals.joblib import Parallel, delayed
+
+from plotWheels.core import (
+    BaseDescriptor,
+    load_scale,
+    count_aas,
+    aa_weights,
+    aa_energies,
+    aa_formulas,
+)
+
+__author__ = "Alex Müller, Gisela Gabernet"
+__docformat__ = "restructuredtext en"
+
+
+def _one_autocorr(seq, window, scale):
+    """Private function used for calculating auto-correlated descriptors for 1 given sequence, window and an AA scale.
+    This function is used by the :py:func:`calculate_autocorr` method of :py:class:`PeptideDescriptor`.
+
+    :param seq: {str} amino acid sequence to calculate descriptor for
+    :param window: {int} correlation-window size
+    :param scale: {str} amino acid scale to be used to calculate descriptor
+    :return: {numpy.array} calculated descriptor data
+    """
+    try:
+        m = list()  # list of lists to store translated sequence values
+        for l in range(len(seq)):  # translate AA sequence into values
+            m.append(scale[str(seq[l])])
+        # auto-correlation in defined sequence window
+        seqdesc = list()
+        for dist in range(window):  # for all correlation distances
+            for val in range(
+                len(scale["A"])
+            ):  # for all features of the descriptor scale
+                valsum = list()
+                cntr = 0.0
+                for pos in range(len(seq)):  # for every position in the sequence
+                    if (pos + dist) < len(
+                        seq
+                    ):  # check if corr distance is possible at that sequence position
+                        cntr += 1  # counter to scale sum
+                        valsum.append(m[pos][val] * m[pos + dist][val])
+                seqdesc.append(
+                    sum(valsum) / cntr
+                )  # append scaled correlation distance values
+        return seqdesc
+    except ZeroDivisionError:
+        print(
+            "ERROR!\nThe chosen correlation window % i is larger than the sequence %s !"
+            % (window, seq)
+        )
+
+
+def _one_crosscorr(seq, window, scale):
+    """Private function used for calculating cross-correlated descriptors for 1 given sequence, window and an AA scale.
+    This function is used by the :py:func:`calculate_crosscorr` method of :py:class:`PeptideDescriptor`.
+
+    :param seq: {str} amino acid sequence to calculate descriptor for
+    :param window: {int} correlation-window size
+    :param scale: {str} amino acid scale to be used to calculate descriptor
+    :return: {numpy.array} calculated descriptor data
+    """
+    try:
+        m = list()  # list of lists to store translated sequence values
+        for l in range(len(seq)):  # translate AA sequence into values
+            m.append(scale[str(seq[l])])
+        # auto-correlation in defined sequence window
+        seqdesc = list()
+        for val in range(len(scale["A"])):  # for all features of the descriptor scale
+            for cc in range(len(scale["A"])):  # for every feature cross correlation
+                if (val + cc) < len(
+                    scale["A"]
+                ):  # check if corr distance is in range of the num of features
+                    for dist in range(window):  # for all correlation distances
+                        cntr = float()
+                        valsum = list()
+                        for pos in range(
+                            len(seq)
+                        ):  # for every position in the sequence
+                            if (pos + dist) < len(
+                                seq
+                            ):  # check if corr distance is possible at that sequence pos
+                                cntr += 1  # counter to scale sum
+                                valsum.append(m[pos][val] * m[pos + dist][val + cc])
+                        seqdesc.append(
+                            sum(valsum) / cntr
+                        )  # append scaled correlation distance values
+        return seqdesc
+    except ZeroDivisionError:
+        print(
+            "ERROR!\nThe chosen correlation window % i is larger than the sequence %s !"
+            % (window, seq)
+        )
+
+
+def _one_arc(seq, modality, scale):
+    """Privat function used for calculating arc descriptors for one sequence and AA scale. This function is used by
+    :py:func:`calculate_arc` method method of :py:class:`PeptideDescriptor`.
+
+    :param seq: {str} amino acid sequence to calculate descriptor for
+    :param scale: {str} amino acid scale to be used to calculate descriptor
+    :return: {numpy.array} calculated descriptor data
+    """
+    desc_mat = []
+    for aa in seq:
+        desc_mat.append(scale[aa])
+    desc_mat = np.asarray(desc_mat)
+
+    # Check descriptor dimension
+    desc_dim = desc_mat.shape[1]
+
+    # list to store descriptor values for all windows
+    allwindows_arc = []
+
+    if len(seq) > 18:
+        window = 18
+        # calculates number of windows in sequence
+        num_windows = len(seq) - window
+    else:
+        window = len(seq)
+        num_windows = 1
+
+    # loop through all windows
+    for j in range(num_windows):
+        # slices descriptor matrix into current window
+        window_mat = desc_mat[j : j + window, :]
+
+        # defines order of amino acids in helical projection
+        order = [0, 11, 4, 15, 8, 1, 12, 5, 16, 9, 2, 13, 6, 17, 10, 3, 14, 7]
+
+        # orders window descriptor matrix into helical projection order
+        ordered = []
+        for pos in order:
+            try:
+                ordered.append(window_mat[pos, :])
+            except:
+                # for sequences of len < 18 adding dummy vector with 2s, length of descriptor dimensions
+                ordered.append([2] * desc_dim)
+        ordered = np.asarray(ordered)
+
+        window_arc = []
+
+        # loop through pharmacophoric features
+        for m in range(desc_dim):
+            all_arcs = (
+                []
+            )  # stores all arcs that can be found of a pharmacophoric feature
+            arc = 0
+
+            for n in range(
+                18
+            ):  # for all positions in helix, regardless of sequence length
+                if (
+                    ordered[n, m] == 0
+                ):  # if position does not contain pharmacophoric feature
+                    all_arcs.append(arc)  # append previous arc to all arcs list
+                    arc = 0  # arc is initialized
+                elif (
+                    ordered[n, m] == 1
+                ):  # if position contains pharmacophoric feature(PF), elongate arc by 20°
+                    arc += 20
+                elif ordered[n, m] == 2:  # if position doesn't contain amino acid:
+                    if (
+                        ordered[n - 1, m] == 1
+                    ):  # if previous position contained PF add 10°
+                        arc += 10
+                    elif (
+                        ordered[n - 1, m] == 0
+                    ):  # if previous position didn't contain PF don't add anything
+                        arc += 0
+                    elif (
+                        ordered[n - 2, m] == 1
+                    ):  # if previous position is empty then check second previous for PF
+                        arc += 10
+                    if (
+                        n == 17
+                    ):  # if we are at the last position check for position n=0 instead of next position.
+                        if ordered[0, m] == 1:  # if it contains PF add 10° extra
+                            arc += 10
+                    else:  # if next position contains PF add 10° extra
+                        if ordered[n + 1, m] == 1:
+                            arc += 10
+                        elif ordered[n + 1, m] == 0:
+                            arc += 0
+                        else:  # if next position is empty check for 2nd next position
+                            if n == 16:
+                                if ordered[0, m] == 1:
+                                    arc += 10
+                            else:
+                                if ordered[n + 2, m] == 1:
+                                    arc += 10
+
+            all_arcs.append(arc)
+            if not arc == 360:
+                arc0 = all_arcs.pop() + all_arcs[0]  # join first and last arc together
+                all_arcs = [arc0] + all_arcs[1:]
+
+            window_arc.append(
+                np.max(all_arcs)
+            )  # append to window arcs the maximum arc of this PF
+        allwindows_arc.append(window_arc)  # append all PF arcs of this window
+
+    allwindows_arc = np.asarray(allwindows_arc)
+
+    if modality == "max":
+        final_arc = np.max(
+            allwindows_arc, axis=0
+        )  # calculate maximum / mean arc along all windows
+    elif modality == "mean":
+        final_arc = np.mean(allwindows_arc, axis=0)
+    else:
+        print('modality is unknown, please choose between "max" and "mean"\n.')
+        sys.exit()
+    return final_arc
+
+
+def _charge(seq, ph=7.0, amide=False):
+    """Calculates charge of a single sequence. The method used is first described by Bjellqvist. In the case of
+    amidation, the value for the  'Cterm' pKa is 15 (and Cterm is added to the pos_pks dictionary.
+    The pKa scale is extracted from: http://www.hbcpnetbase.com/ (CRC Handbook of Chemistry and Physics, 96th ed).
+
+    **pos_pks** = {'Nterm': 9.38, 'K': 10.67, 'R': 12.10, 'H': 6.04}
+
+    **neg_pks** = {'Cterm': 2.15, 'D': 3.71, 'E': 4.15, 'C': 8.14, 'Y': 10.10}
+
+    :param ph: {float} pH at which to calculate peptide charge.
+    :param amide: {boolean} whether the sequences have an amidated C-terminus.
+    :return: {array} descriptor values in the attribute :py:attr:`descriptor
+    """
+
+    if amide:
+        pos_pks = {"Nterm": 9.38, "K": 10.67, "R": 12.10, "H": 6.04}
+        neg_pks = {"Cterm": 15.0, "D": 3.71, "E": 4.15, "C": 8.14, "Y": 10.10}
+    else:
+        pos_pks = {"Nterm": 9.38, "K": 10.67, "R": 12.10, "H": 6.04}
+        neg_pks = {"Cterm": 2.15, "D": 3.71, "E": 4.15, "C": 8.14, "Y": 10.10}
+
+    aa_content = count_aas(seq, scale="absolute")
+    aa_content["Nterm"] = 1.0
+    aa_content["Cterm"] = 1.0
+    pos_charge = 0.0
+    for aa, pK in pos_pks.items():
+        c_r = 10 ** (pK - ph)
+        partial_charge = c_r / (c_r + 1.0)
+        pos_charge += aa_content[aa] * partial_charge
+    neg_charge = 0.0
+    for aa, pK in neg_pks.items():
+        c_r = 10 ** (ph - pK)
+        partial_charge = c_r / (c_r + 1.0)
+        neg_charge += aa_content[aa] * partial_charge
+    return round(pos_charge - neg_charge, 3)
+
+
+class GlobalDescriptor(BaseDescriptor):
+    """
+    Base class for global, non-amino acid scale dependant descriptors. The following descriptors can be calculated by
+    the **methods** linked below:
+
+    - `Sequence Length      <modlamp.html#modlamp.descriptors.GlobalDescriptor.length>`_
+    - `Molecular Formula    <modlamp.html#modlamp.descriptors.GlobalDescriptor.formula>`_
+    - `Molecular Weight     <modlamp.html#modlamp.descriptors.GlobalDescriptor.calculate_MW>`_
+    - `Sequence Charge      <modlamp.html#modlamp.descriptors.GlobalDescriptor.calculate_charge>`_
+    - `Charge Density       <modlamp.html#modlamp.descriptors.GlobalDescriptor.charge_density>`_
+    - `Isoelectric Point    <modlamp.html#modlamp.descriptors.GlobalDescriptor.isoelectric_point>`_
+    - `Instability Index    <modlamp.html#modlamp.descriptors.GlobalDescriptor.instability_index>`_
+    - `Aromaticity          <modlamp.html#modlamp.descriptors.GlobalDescriptor.aromaticity>`_
+    - `Aliphatic Index      <modlamp.html#modlamp.descriptors.GlobalDescriptor.aliphatic_index>`_
+    - `Boman Index          <modlamp.html#modlamp.descriptors.GlobalDescriptor.boman_index>`_
+    - `Hydrophobic Ratio    <modlamp.html#modlamp.descriptors.GlobalDescriptor.hydrophobic_ratio>`_
+    - `all of the above     <modlamp.html#modlamp.descriptors.GlobalDescriptor.calculate_all>`_
+    """
+
+    def length(self, append=False):
+        """
+        Method to calculate the length (total AA count) of every sequence in the attribute :py:attr:`sequences`.
+
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: array of sequence lengths in the attribute :py:attr:`descriptor`
+        :Example:
+
+        >>> desc = GlobalDescriptor(['AFDGHLKI','KKLQRSDLLRTK','KKLASCNNIPPR'])
+        >>> desc.length()
+        >>> desc.descriptor
+        array([[ 8.], [12.], [12.]])
+        """
+        desc = []
+        for seq in self.sequences:
+            desc.append(float(len(seq.strip())))
+        desc = np.asarray(desc).reshape(len(desc), 1)
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+            self.featurenames.append("Length")
+        else:
+            self.descriptor = np.array(desc)
+            self.featurenames = ["Length"]
+
+    def formula(self, amide=False, append=False):
+        """Method to calculate the molecular formula of every sequence in the attribute :py:attr:`sequences`.
+
+        :param amide: {boolean} whether the sequences are C-terminally amidated.
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: array of molecular formulas {str} in the attribute :py:attr:`descriptor`
+        :Example:
+
+        >>> desc = GlobalDescriptor(['KADSFLSADGHSADFSLDKKLKERL', 'ERTILSDFPQWWFASLDFLNC', 'ACDEFGHIKLMNPQRSTVWY'])
+        >>> desc.formula(amide=True)
+        >>> for v in desc.descriptor:
+        ...     print(v[0])
+        C122 H197 N35 O39
+        C121 H168 N28 O33 S
+        C106 H157 N29 O30 S2
+
+        .. seealso:: :py:func:`modlamp.core.aa_formulas()`
+
+        .. versionadded:: v2.7.6
+        """
+        desc = []
+        formulas = aa_formulas()
+        for seq in self.sequences:
+            f = {"C": 0, "H": 0, "N": 0, "O": 0, "S": 0}
+            for aa in seq:  # loop over all AAs
+                for k in f.keys():
+                    f[k] += formulas[aa][k]
+
+            # substract H2O for every peptide bond
+            f["H"] -= 2 * (len(seq) - 1)
+            f["O"] -= len(seq) - 1
+
+            if amide:  # add C-terminal amide --> replace OH with NH2
+                f["O"] -= 1
+                f["H"] += 1
+                f["N"] += 1
+
+            if f["S"] != 0:
+                val = "C%s H%s N%s O%s %s%s" % (
+                    f["C"],
+                    f["H"],
+                    f["N"],
+                    f["O"],
+                    "S",
+                    f["S"],
+                )
+            else:
+                val = "C%s H%s N%s O%s" % (f["C"], f["H"], f["N"], f["O"])
+
+            desc.append([val])
+
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+            self.featurenames.append("Formula")
+        else:
+            self.descriptor = np.array(desc)
+            self.featurenames = ["Formula"]
+
+    def calculate_MW(self, amide=False, append=False):
+        """Method to calculate the molecular weight [g/mol] of every sequence in the attribute :py:attr:`sequences`.
+
+        :param amide: {boolean} whether the sequences are C-terminally amidated (subtracts 0.95 from the MW).
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: array of descriptor values in the attribute :py:attr:`descriptor`
+        :Example:
+
+        >>> desc = GlobalDescriptor('IAESFKGHIPL')
+        >>> desc.calculate_MW(amide=True)
+        >>> desc.descriptor
+        array([[ 1210.43]])
+
+        .. seealso:: :py:func:`modlamp.core.aa_weights()`
+
+        .. versionchanged:: v2.1.5 amide option added
+        """
+        desc = []
+        weights = aa_weights()
+        for seq in self.sequences:
+            mw = []
+            for aa in seq:  # sum over aa weights
+                mw.append(weights[aa])
+            desc.append(
+                round(sum(mw) - 18.015 * (len(seq) - 1), 2)
+            )  # sum over AA MW and subtract H20 MW for every
+            # peptide bond
+        desc = np.asarray(desc).reshape(len(desc), 1)
+        if (
+            amide
+        ):  # if sequences are amidated, subtract 0.98 from calculated MW (OH - NH2)
+            desc = [d - 0.98 for d in desc]
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+            self.featurenames.append("MW")
+        else:
+            self.descriptor = np.array(desc)
+            self.featurenames = ["MW"]
+
+    def calculate_charge(self, ph=7.0, amide=False, append=False):
+        """Method to overall charge of every sequence in the attribute :py:attr:`sequences`.
+
+        The method used is first described by Bjellqvist. In the case of amidation, the value for the 'Cterm' pKa is 15
+        (and Cterm is added to the pos_pKs dictionary.
+        The pKa scale is extracted from: http://www.hbcpnetbase.com/ (CRC Handbook of Chemistry and Physics, 96th ed).
+
+        **pos_pKs** = {'Nterm': 9.38, 'K': 10.67, 'R': 12.10, 'H': 6.04}
+
+        **neg_pKs** = {'Cterm': 2.15, 'D': 3.71, 'E': 4.15, 'C': 8.14, 'Y': 10.10}
+
+        :param ph: {float} ph at which to calculate peptide charge.
+        :param amide: {boolean} whether the sequences have an amidated C-terminus.
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: array of descriptor values in the attribute :py:attr:`descriptor`
+        :Example:
+
+        >>> desc = GlobalDescriptor('KLAKFGKRSELVALSG')
+        >>> desc.calculate_charge(ph=7.4, amide=True)
+        >>> desc.descriptor
+        array([[ 3.989]])
+        """
+
+        desc = []
+        for seq in self.sequences:
+            desc.append(
+                _charge(seq, ph, amide)
+            )  # calculate charge with helper function
+        desc = np.asarray(desc).reshape(len(desc), 1)
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+            self.featurenames.append("Charge")
+        else:
+            self.descriptor = np.array(desc)
+            self.featurenames = ["Charge"]
+
+    def charge_density(self, ph=7.0, amide=False, append=False):
+        """Method to calculate the charge density (charge / MW) of every sequences in the attributes :py:attr:`sequences`
+
+        :param ph: {float} pH at which to calculate peptide charge.
+        :param amide: {boolean} whether the sequences have an amidated C-terminus.
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: array of descriptor values in the attribute :py:attr:`descriptor`.
+        :Example:
+
+        >>> desc = GlobalDescriptor('GNSDLLIEQRTLLASDEF')
+        >>> desc.charge_density(ph=6, amide=True)
+        >>> desc.descriptor
+        array([[-0.00097119]])
+        """
+        self.calculate_charge(ph, amide)
+        charges = self.descriptor
+        self.calculate_MW(amide)
+        masses = self.descriptor
+        desc = charges / masses
+        desc = np.asarray(desc).reshape(len(desc), 1)
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+            self.featurenames.append("ChargeDensity")
+        else:
+            self.descriptor = np.array(desc)
+            self.featurenames = ["ChargeDensity"]
+
+    def isoelectric_point(self, amide=False, append=False):
+        """
+        Method to calculate the isoelectric point of every sequence in the attribute :py:attr:`sequences`.
+        The pK scale is extracted from: http://www.hbcpnetbase.com/ (CRC Handbook of Chemistry and Physics, 96th ed).
+
+         **pos_pKs** = {'Nterm': 9.38, 'K': 10.67, 'R': 12.10, 'H': 6.04}
+
+         **neg_pKs** = {'Cterm': 2.15, 'D': 3.71, 'E': 4.15, 'C': 8.14, 'Y': 10.10}
+
+        :param amide: {boolean} whether the sequences have an amidated C-terminus.
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: array of descriptor values in the attribute :py:attr:`descriptor`
+        :Example:
+
+        >>> desc = GlobalDescriptor('KLFDIKFGHIPQRST')
+        >>> desc.isoelectric_point()
+        >>> desc.descriptor
+        array([[ 10.6796875]])
+        """
+        ph, ph1, ph2 = float(), float(), float()
+        desc = []
+        for seq in self.sequences:
+
+            # Bracket between ph1 and ph2
+            ph = 7.0
+            charge = _charge(seq, ph, amide)
+            if charge > 0.0:
+                ph1 = ph
+                charge1 = charge
+                while charge1 > 0.0:
+                    ph = ph1 + 1.0
+                    charge = _charge(seq, ph, amide)
+                    if charge > 0.0:
+                        ph1 = ph
+                        charge1 = charge
+                    else:
+                        ph2 = ph
+                        break
+            else:
+                ph2 = ph
+                charge2 = charge
+                while charge2 < 0.0:
+                    ph = ph2 - 1.0
+                    charge = _charge(seq, ph, amide)
+                    if charge < 0.0:
+                        ph2 = ph
+                        charge2 = charge
+                    else:
+                        ph1 = ph
+                        break
+            # Bisection
+            while ph2 - ph1 > 0.0001 and charge != 0.0:
+                ph = (ph1 + ph2) / 2.0
+                charge = _charge(seq, ph, amide)
+                if charge > 0.0:
+                    ph1 = ph
+                else:
+                    ph2 = ph
+            desc.append(ph)
+        desc = np.asarray(desc).reshape(len(desc), 1)
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+            self.featurenames.append("pI")
+        else:
+            self.descriptor = np.array(desc)
+            self.featurenames = ["pI"]
+
+    def instability_index(self, append=False):
+        """
+        Method to calculate the instability of every sequence in the attribute :py:attr:`sequences`.
+        The instability index is a prediction of protein stability based on the amino acid composition.
+        ([1] K. Guruprasad, B. V Reddy, M. W. Pandit, Protein Eng. 1990, 4, 155–161.)
+
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: array of descriptor values in the attribute :py:attr:`descriptor`
+        :Example:
+
+        >>> desc = GlobalDescriptor('LLASMNDLLAKRST')
+        >>> desc.instability_index()
+        >>> desc.descriptor
+        array([[ 63.95714286]])
+        """
+
+        desc = []
+        dimv = load_scale("instability")[1]
+        for seq in self.sequences:
+            stabindex = float()
+            for i in range(len(seq) - 1):
+                stabindex += dimv[seq[i]][seq[i + 1]]
+            desc.append((10.0 / len(seq)) * stabindex)
+        desc = np.asarray(desc).reshape(len(desc), 1)
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+            self.featurenames.append("InstabilityInd")
+        else:
+            self.descriptor = np.array(desc)
+            self.featurenames = ["InstabilityInd"]
+
+    def aromaticity(self, append=False):
+        """
+        Method to calculate the aromaticity of every sequence in the attribute :py:attr:`sequences`.
+        According to Lobry, 1994, it is simply the relative frequency of Phe+Trp+Tyr.
+
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: array of descriptor values in the attribute :py:attr:`descriptor`
+        :Example:
+
+        >>> desc = GlobalDescriptor('GLFYWRFFLQRRFLYWW')
+        >>> desc.aromaticity()
+        >>> desc.descriptor
+        array([[ 0.52941176]])
+        """
+        desc = []
+        for seq in self.sequences:
+            f = seq.count("F")
+            w = seq.count("W")
+            y = seq.count("Y")
+            desc.append(float(f + w + y) / len(seq))
+        desc = np.asarray(desc).reshape(len(desc), 1)
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+            self.featurenames.append("Aromaticity")
+        else:
+            self.descriptor = np.array(desc)
+            self.featurenames = ["Aromaticity"]
+
+    def aliphatic_index(self, append=False):
+        """
+        Method to calculate the aliphatic index of every sequence in the attribute :py:attr:`sequences`.
+        According to Ikai, 1980, the aliphatic index is a measure of thermal stability of proteins and is dependant
+        on the relative volume occupied by aliphatic amino acids (A,I,L & V).
+        ([1] A. Ikai, J. Biochem. 1980, 88, 1895–1898.)
+
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: array of descriptor values in the attribute :py:attr:`descriptor`
+        :Example:
+
+        >>> desc = GlobalDescriptor('KWLKYLKKLAKLVK')
+        >>> desc.aliphatic_index()
+        >>> desc.descriptor
+        array([[ 139.28571429]])
+        """
+        desc = []
+        aa_dict = aa_weights()
+        for seq in self.sequences:
+            d = {aa: seq.count(aa) for aa in aa_dict.keys()}  # count aa
+            d = {
+                k: (float(d[k]) / len(seq)) * 100 for k in d.keys()
+            }  # get mole percent of all AA
+            desc.append(
+                d["A"] + 2.9 * d["V"] + 3.9 * (d["I"] + d["L"])
+            )  # formula for calculating the AI (Ikai, 1980)
+        desc = np.asarray(desc).reshape(len(desc), 1)
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+            self.featurenames.append("AliphaticInd")
+        else:
+            self.descriptor = np.array(desc)
+            self.featurenames = ["AliphaticInd"]
+
+    def boman_index(self, append=False):
+        """Method to calculate the boman index of every sequence in the attribute :py:attr:`sequences`.
+        According to Boman, 2003, the boman index is a measure for protein-protein interactions and is calculated by
+        summing over all amino acid free energy of transfer [kcal/mol] between water and cyclohexane,[2] followed by
+        dividing by    sequence length.
+        ([1] H. G. Boman, D. Wade, I. a Boman, B. Wåhlin, R. B. Merrifield, *FEBS Lett*. **1989**, *259*, 103–106.
+        [2] A. Radzick, R. Wolfenden, *Biochemistry* **1988**, *27*, 1664–1670.)
+
+        .. seealso:: :py:func:`modlamp.core.aa_energies()`
+
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: array of descriptor values in the attribute :py:attr:`descriptor`
+        :Example:
+
+        >>> desc = GlobalDescriptor('GLFDIVKKVVGALGSL')
+        >>> desc.boman_index()
+        >>> desc.descriptor
+        array([[-1.011875]])
+        """
+        d = aa_energies()
+        desc = []
+        for seq in self.sequences:
+            val = []
+            for a in seq:
+                val.append(d[a])
+            desc.append(sum(val) / len(val))
+        desc = np.asarray(desc).reshape(len(desc), 1)
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+            self.featurenames.append("BomanInd")
+        else:
+            self.descriptor = np.array(desc)
+            self.featurenames = ["BomanInd"]
+
+    def hydrophobic_ratio(self, append=False):
+        """
+        Method to calculate the hydrophobic ratio of every sequence in the attribute :py:attr:`sequences`, which is the
+        relative frequency of the amino acids **A,C,F,I,L,M & V**.
+
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: array of descriptor values in the attribute :py:attr:`descriptor`
+        :Example:
+
+        >>> desc = GlobalDescriptor('VALLYWRTVLLAIII')
+        >>> desc.hydrophobic_ratio()
+        >>> desc.descriptor
+        array([[ 0.73333333]])
+        """
+        desc = []
+        aa_dict = aa_weights()
+        for seq in self.sequences:
+            pa = {aa: seq.count(aa) for aa in aa_dict.keys()}  # count aa
+            # formula for calculating the AI (Ikai, 1980):
+            desc.append(
+                (pa["A"] + pa["C"] + pa["F"] + pa["I"] + pa["L"] + pa["M"] + pa["V"])
+                / float(len(seq))
+            )
+        desc = np.asarray(desc).reshape(len(desc), 1)
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+            self.featurenames.append("HydrophRatio")
+        else:
+            self.descriptor = np.array(desc)
+            self.featurenames = ["HydrophRatio"]
+
+    def calculate_all(self, ph=7.4, amide=True):
+        """Method combining all global descriptors and appending them into the feature matrix in the attribute
+        :py:attr:`descriptor`.
+
+        :param ph: {float} pH at which to calculate peptide charge
+        :param amide: {boolean} whether the sequences have an amidated C-terminus.
+        :return: array of descriptor values in the attribute :py:attr:`descriptor`
+        :Example:
+
+        >>> desc = GlobalDescriptor('AFGHFKLKKLFIFGHERT')
+        >>> desc.calculate_all(amide=True)
+        >>> desc.featurenames
+        ['Length', 'MW', 'ChargeDensity', 'pI', 'InstabilityInd', 'Aromaticity', 'AliphaticInd', 'BomanInd', 'HydRatio']
+        >>> desc.descriptor
+        array([[ 18.,  2.17559000e+03,   1.87167619e-03,   1.16757812e+01, ...  1.10555556e+00,   4.44444444e-01]])
+        >>> desc.save_descriptor('/path/to/outputfile.csv')  # save the descriptor data (with feature names header)
+        """
+
+        # This is a strange way of doing it. However, the append=True option excludes length and charge, no idea why!
+        fn = []
+        self.length()  # sequence length
+        l = self.descriptor
+        fn.extend(self.featurenames)
+        self.calculate_MW(amide=amide)  # molecular weight
+        mw = self.descriptor
+        fn.extend(self.featurenames)
+        self.calculate_charge(ph=ph, amide=amide)  # net charge
+        c = self.descriptor
+        fn.extend(self.featurenames)
+        self.charge_density(ph=ph, amide=amide)  # charge density
+        cd = self.descriptor
+        fn.extend(self.featurenames)
+        self.isoelectric_point(amide=amide)  # pI
+        pi = self.descriptor
+        fn.extend(self.featurenames)
+        self.instability_index()  # instability index
+        si = self.descriptor
+        fn.extend(self.featurenames)
+        self.aromaticity()  # global aromaticity
+        ar = self.descriptor
+        fn.extend(self.featurenames)
+        self.aliphatic_index()  # aliphatic index
+        ai = self.descriptor
+        fn.extend(self.featurenames)
+        self.boman_index()  # Boman index
+        bi = self.descriptor
+        fn.extend(self.featurenames)
+        self.hydrophobic_ratio()  # Hydrophobic ratio
+        hr = self.descriptor
+        fn.extend(self.featurenames)
+
+        self.descriptor = np.concatenate((l, mw, c, cd, pi, si, ar, ai, bi, hr), axis=1)
+        self.featurenames = fn
+
+
+class PeptideDescriptor(BaseDescriptor):
+    """Base class for peptide descriptors. The following **amino acid descriptor scales** are available for descriptor
+    calculation:
+
+    - **AASI**           (An amino acid selectivity index scale for helical antimicrobial peptides, *[1] D. Juretić, D. Vukicević, N. Ilić, N. Antcheva, A. Tossi, J. Chem. Inf. Model. 2009, 49, 2873–2882.*)
+    - **ABHPRK**         (modlabs inhouse physicochemical feature scale (Acidic, Basic, Hydrophobic, Polar, aRomatic, Kink-inducer)
+    - **argos**          (Argos hydrophobicity amino acid scale, *[2] Argos, P., Rao, J. K. M. & Hargrave, P. A., Eur. J. Biochem. 2005, 128, 565–575.*)
+    - **bulkiness**      (Amino acid side chain bulkiness scale, *[3] J. M. Zimmerman, N. Eliezer, R. Simha, J. Theor. Biol. 1968, 21, 170–201.*)
+    - **charge_phys**    (Amino acid charge at pH 7.0 - Hystidine charge +0.1.)
+    - **charge_acid**    (Amino acid charge at acidic pH - Hystidine charge +1.0.)
+    - **cougar**         (modlabs inhouse selection of global peptide descriptors)
+    - **eisenberg**      (the Eisenberg hydrophobicity consensus amino acid scale, *[4] D. Eisenberg, R. M. Weiss, T. C. Terwilliger, W. Wilcox, Faraday Symp. Chem. Soc. 1982, 17, 109.*)
+    - **Ez**             (potential that assesses energies of insertion of amino acid side chains into lipid bilayers, *[5] A. Senes, D. C. Chadi, P. B. Law, R. F. S. Walters, V. Nanda, W. F. DeGrado, J. Mol. Biol. 2007, 366, 436–448.*)
+    - **flexibility**    (amino acid side chain flexibilitiy scale, *[6] R. Bhaskaran, P. K. Ponnuswamy, Int. J. Pept. Protein Res. 1988, 32, 241–255.*)
+    - **grantham**       (amino acid side chain composition, polarity and molecular volume, *[8] Grantham, R. Science. 185, 862–864 (1974).*)
+    - **gravy**          (GRAVY hydrophobicity amino acid scale, *[9] J. Kyte, R. F. Doolittle, J. Mol. Biol. 1982, 157, 105–132.*)
+    - **hopp-woods**     (Hopp-Woods amino acid hydrophobicity scale,*[10] T. P. Hopp, K. R. Woods, Proc. Natl. Acad. Sci. 1981, 78, 3824–3828.*)
+    - **ISAECI**         (Isotropic Surface Area (ISA) and Electronic Charge Index (ECI) of amino acid side chains, *[11] E. R. Collantes, W. J. Dunn, J. Med. Chem. 1995, 38, 2705–2713.*)
+    - **janin**          (Janin hydrophobicity amino acid scale, *[12] J. L. Cornette, K. B. Cease, H. Margalit, J. L. Spouge, J. A. Berzofsky, C. DeLisi, J. Mol. Biol. 1987, 195, 659–685.*)
+    - **kytedoolittle**  (Kyte & Doolittle hydrophobicity amino acid scale, *[13] J. Kyte, R. F. Doolittle, J. Mol. Biol. 1982, 157, 105–132.*)
+    - **levitt_alpha**   (Levitt amino acid alpha-helix propensity scale, extracted from http://web.expasy.org/protscale. *[14] M. Levitt, Biochemistry 1978, 17, 4277-4285.*)
+    - **MSS**            (A graph-theoretical index that reflects topological shape and size of amino acid side chains, *[15] C. Raychaudhury, A. Banerjee, P. Bag, S. Roy, J. Chem. Inf. Comput. Sci. 1999, 39, 248–254.*)
+    - **MSW**            (Amino acid scale based on a PCA of the molecular surface based WHIM descriptor (MS-WHIM), extended to natural amino acids, *[16] A. Zaliani, E. Gancia, J. Chem. Inf. Comput. Sci 1999, 39, 525–533.*)
+    - **pepArc**         (modlabs pharmacophoric feature scale, dimensions are: hydrophobicity, polarity, positive charge, negative charge, proline.)
+    - **pepcats**        (modlabs pharmacophoric feature based PEPCATS scale, *[17] C. P. Koch, A. M. Perna, M. Pillong, N. K. Todoroff, P. Wrede, G. Folkers, J. A. Hiss, G. Schneider, PLoS Comput. Biol. 2013, 9, e1003088.*)
+    - **polarity**       (Amino acid polarity scale, *[18] J. M. Zimmerman, N. Eliezer, R. Simha, J. Theor. Biol. 1968, 21, 170–201.*)
+    - **PPCALI**         (modlabs inhouse scale derived from a PCA of 143 amino acid property scales, *[19] C. P. Koch, A. M. Perna, M. Pillong, N. K. Todoroff, P. Wrede, G. Folkers, J. A. Hiss, G. Schneider, PLoS Comput. Biol. 2013, 9, e1003088.*)
+    - **refractivity**   (Relative amino acid refractivity values, *[20] T. L. McMeekin, M. Wilensky, M. L. Groves, Biochem. Biophys. Res. Commun. 1962, 7, 151–156.*)
+    - **t_scale**        (A PCA derived scale based on amino acid side chain properties calculated with 6 different probes of the GRID program, *[21] M. Cocchi, E. Johansson, Quant. Struct. Act. Relationships 1993, 12, 1–8.*)
+    - **TM_tend**        (Amino acid transmembrane propensity scale, extracted from http://web.expasy.org/protscale, *[22] Zhao, G., London E. Protein Sci. 2006, 15, 1987-2001.*)
+    - **z3**             (The original three dimensional Z-scale, *[23] S. Hellberg, M. Sjöström, B. Skagerberg, S. Wold, J. Med. Chem. 1987, 30, 1126–1135.*)
+    - **z5**             (The extended five dimensional Z-scale, *[24] M. Sandberg, L. Eriksson, J. Jonsson, M. Sjöström, S. Wold, J. Med. Chem. 1998, 41, 2481–2491.*)
+
+    Further, amino acid scale independent methods can be calculated with help of the :class:`GlobalDescriptor` class.
+
+    """
+
+    def __init__(self, seqs, scalename="Eisenberg"):
+        """
+        :param seqs: a .fasta file with sequences, a list of sequences or a single sequence as string to calculate the
+            descriptor values for.
+        :param scalename: {str} name of the amino acid scale (one of the given list above) used to calculate the
+            descriptor values
+        :return: initialized attributes :py:attr:`sequences`, :py:attr:`names` and dictionary :py:attr:`scale` with
+            amino acid scale values of the scale name in :py:attr:`scalename`.
+        :Example:
+
+        >>> AMP = PeptideDescriptor('KLLKLLKKLLKLLK','pepcats')
+        >>> AMP.sequences
+        ['KLLKLLKKLLKLLK']
+        >>> seqs = PeptideDescriptor('/Path/to/file.fasta', 'eisenberg')  # load sequences from .fasta file
+        >>> seqs.sequences
+        ['AFDGHLKI','KKLQRSDLLRTK','KKLASCNNIPPR'...]
+        """
+        super(PeptideDescriptor, self).__init__(seqs)
+        self.scalename, self.scale = load_scale(scalename.lower())
+        self.all_moms = list()  # for passing hydrophobic moments to calculate_profile
+        self.all_globs = list()  # for passing global  to calculate_profile
+
+    def load_scale(self, scalename):
+        """Method to load amino acid values from a given scale
+
+        :param scalename: {str} name of the amino acid scale to be loaded.
+        :return: loaded amino acid scale values in a dictionary in the attribute :py:attr:`scale`.
+
+        .. seealso:: :func:`modlamp.core.load_scale()`
+        """
+        self.scalename, self.scale = load_scale(scalename.lower())
+
+    def calculate_autocorr(self, window, append=False):
+        """Method for auto-correlating the amino acid values for a given descriptor scale
+
+        :param window: {int} correlation window for descriptor calculation in a sliding window approach
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: calculated descriptor numpy.array in the attribute :py:attr:`descriptor`.
+        :Example:
+
+        >>> AMP = PeptideDescriptor('GLFDIVKKVVGALGSL','PPCALI')
+        >>> AMP.calculate_autocorr(7)
+        >>> AMP.descriptor
+        array([[  1.28442339e+00,   1.29025116e+00,   1.03240901e+00, .... ]])
+        >>> AMP.descriptor.shape
+        (1, 133)
+
+        .. versionchanged:: v.2.3.0
+        """
+        desc = Parallel(n_jobs=-1)(
+            delayed(_one_autocorr)(seq, window, self.scale) for seq in self.sequences
+        )
+
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+        else:
+            self.descriptor = np.array(desc)
+
+    def calculate_crosscorr(self, window, append=False):
+        """Method for cross-correlating the amino acid values for a given descriptor scale
+
+        :param window: {int} correlation window for descriptor calculation in a sliding window approach
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: calculated descriptor numpy.array in the attribute :py:attr:`descriptor`.
+        :Example:
+
+        >>> AMP = PeptideDescriptor('GLFDIVKKVVGALGSL','pepcats')
+        >>> AMP.calculate_crosscorr(7)
+        >>> AMP.descriptor
+        array([[ 0.6875    ,  0.46666667,  0.42857143,  0.61538462,  0.58333333, ... ]])
+        >>> AMP.descriptor.shape
+        (1, 147)
+        """
+        desc = Parallel(n_jobs=-1)(
+            delayed(_one_crosscorr)(seq, window, self.scale) for seq in self.sequences
+        )
+
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+        else:
+            self.descriptor = np.array(desc)
+
+    def calculate_moment(self, window=1000, angle=100, modality="max", append=False):
+        """Method for calculating the maximum or mean moment of the amino acid values for a given descriptor scale and
+        window.
+
+        :param window: {int} amino acid window in which to calculate the moment. If the sequence is shorter than the
+            window, the length of the sequence is taken. So if the default window of 1000 is chosen, for all sequences
+            shorter than 1000, the **global** hydrophobic moment will be calculated. Otherwise, the maximal
+            hydrophiobic moment for the chosen window size found in the sequence will be returned.
+        :param angle: {int} angle in which to calculate the moment. **100** for alpha helices, **180** for beta sheets.
+        :param modality: {'all', 'max' or 'mean'} Calculate respectively maximum or mean hydrophobic moment. If all,
+            moments for all windows are returned.
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: Calculated descriptor as a numpy.array in the attribute :py:attr:`descriptor` and all possible global
+            values in :py:attr:`all_moms` (needed for the :py:func:`calculate_profile` method)
+        :Example:
+
+        >>> AMP = PeptideDescriptor('GLFDIVKKVVGALGSL', 'eisenberg')
+        >>> AMP.calculate_moment()
+        >>> AMP.descriptor
+        array([[ 0.48790226]])
+        """
+        if self.scale["A"] == list:
+            print(
+                "\n Descriptor moment calculation is only possible for one dimensional descriptors.\n"
+            )
+
+        else:
+            desc = []
+            for seq in self.sequences:
+                wdw = min(
+                    window, len(seq)
+                )  # if sequence is shorter than window, take the whole sequence instead
+                mtrx = []
+                mwdw = []
+
+                for aa in range(len(seq)):
+                    mtrx.append(self.scale[str(seq[aa])])
+
+                for i in range(len(mtrx) - wdw + 1):
+                    mwdw.append(sum(mtrx[i : i + wdw], []))
+
+                mwdw = np.asarray(mwdw)
+                rads = (
+                    angle * (np.pi / 180) * np.asarray(range(wdw))
+                )  # calculate actual moment (radial)
+                vcos = (mwdw * np.cos(rads)).sum(axis=1)
+                vsin = (mwdw * np.sin(rads)).sum(axis=1)
+                moms = np.sqrt(vsin**2 + vcos**2) / wdw
+
+                if modality == "max":  # take window with maximal value
+                    moment = np.max(moms)
+                elif modality == "mean":  # take average value over all windows
+                    moment = np.mean(moms)
+                elif modality == "all":
+                    moment = moms
+                else:
+                    print(
+                        '\nERROR!\nModality parameter is wrong, please choose between "all", "max" and "mean".\n'
+                    )
+                    return
+                desc.append(moment)
+                self.all_moms.append(moms)
+
+            desc = np.asarray(desc).reshape(len(desc), 1)  # final descriptor array
+
+            if append:
+                self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+            else:
+                self.descriptor = np.array(desc)
+
+    def calculate_global(self, window=1000, modality="max", append=False):
+        """Method for calculating a global / window averaging descriptor value of a given AA scale
+
+        :param window: {int} amino acid window in which to calculate the moment. If the sequence is shorter than the
+            window, the length of the sequence is taken.
+        :param modality: {'max' or 'mean'} Calculate respectively maximum or mean hydrophobic moment.
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: Calculated descriptor as a numpy.array in the attribute :py:attr:`descriptor` and all possible global
+            values in :py:attr:`all_globs` (needed for the :py:func:`calculate_profile` method)
+        :Example:
+
+        >>> AMP = PeptideDescriptor('GLFDIVKKVVGALGSL','eisenberg')
+        >>> AMP.calculate_global(window=1000, modality='max')
+        >>> AMP.descriptor
+        array([[ 0.44875]])
+        """
+        desc = list()
+        for n, seq in enumerate(self.sequences):
+            wdw = min(
+                window, len(seq)
+            )  # if sequence is shorter than window, take the whole sequence instead
+            mtrx = []
+            mwdw = []
+
+            for l in range(len(seq)):  # translate AA sequence into values
+                mtrx.append(self.scale[str(seq[l])])
+
+            for i in range(len(mtrx) - wdw + 1):
+                mwdw.append(
+                    sum(mtrx[i : i + wdw], [])
+                )  # list of all the values for the different windows
+
+            mwdw = np.asarray(mwdw)
+            glob = np.sum(mwdw, axis=1) / float(wdw)
+            outglob = float()
+
+            if modality in ["max", "mean"]:
+                if modality == "max":
+                    outglob = np.max(
+                        glob
+                    )  # returned moment will be the maximum of all windows
+                elif modality == "mean":
+                    outglob = np.mean(
+                        glob
+                    )  # returned moment will be the mean of all windows
+                else:
+                    print(
+                        'Modality parameter is wrong, please choose between "max" and "mean"\n.'
+                    )
+                    return
+            desc.append(outglob)
+            self.all_globs.append(glob)
+
+        desc = np.asarray(desc).reshape(len(desc), 1)
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+        else:
+            self.descriptor = np.array(desc)
+
+    def calculate_profile(self, prof_type="uH", window=7, append=False):
+        """Method for calculating hydrophobicity or hydrophobic moment profiles for given sequences and fitting for
+        slope and intercept. The hydrophobicity scale used is "eisenberg"
+
+        :param prof_type: prof_type of profile, available: 'H' for hydrophobicity or 'uH' for hydrophobic moment
+        :param window: {int} size of sliding window used (odd-numbered).
+        :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the
+            attribute :py:attr:`descriptor`.
+        :return: Fitted slope and intercept of calculated profile for every given sequence in the attribute
+            :py:attr:`descriptor`.
+        :Example:
+
+        >>> AMP = PeptideDescriptor('KLLKLLKKVVGALG','kytedoolittle')
+        >>> AMP.calculate_profile(prof_type='H')
+        >>> AMP.descriptor
+        array([[ 0.03731293,  0.19246599]])
+        """
+        if prof_type == "uH":
+            self.calculate_moment(window=window)
+            y_vals = self.all_moms
+        elif prof_type == "H":
+            self.calculate_global(window=window)
+            y_vals = self.all_globs
+        else:
+            print(
+                'prof_type parameter is unknown, choose "uH" for hydrophobic moment or "H" for hydrophobicity\n.'
+            )
+            sys.exit()
+
+        desc = list()
+        for n, seq in enumerate(self.sequences):
+            x_vals = range(len(seq))[int((window - 1) / 2) : -int((window - 1) / 2)]
+            if len(seq) <= window:
+                slope, intercept, r_value, p_value, std_err = [0, 0, 0, 0, 0]
+            else:
+                slope, intercept, r_value, p_value, std_err = stats.linregress(
+                    x_vals, y_vals[n]
+                )
+            desc.append([slope, intercept])
+
+        if append:
+            self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+        else:
+            self.descriptor = np.array(desc)
+
+    def calculate_arc(self, modality="max", append=False):
+        """Method for calculating property arcs as seen in the helical wheel plot. Use for binary amino acid scales only.
+
+        :param modality: modality of the arc to calculate, to choose between "max" and "mean".
+        :param append: if true, append to current descriptor stored in the descriptor attribute.
+        :return: calculated descriptor as numpy.array in the descriptor attribute.
+
+        :Example:
+
+        >>> arc = PeptideDescriptor("KLLKLLKKLLKLLK", scalename="peparc")
+        >>> arc.calculate_arc(modality="max", append=False)
+        >>> arc.descriptor
+        array([[200, 160, 160,   0,   0]])
+        """
+        desc = Parallel(n_jobs=-1)(
+            delayed(_one_arc)(seq, modality, self.scale) for seq in self.sequences
+        )
+
+        # Converts each of the amino acids to descriptor vector
+        for seq in self.sequences:
+
+            # desc_mat = []
+            # for aa in seq:
+            #     desc_mat.append(self.scale[aa])
+            # desc_mat = np.asarray(desc_mat)
+            #
+            # # Check descriptor dimension
+            # desc_dim = desc_mat.shape[1]
+            #
+            # # list to store descriptor values for all windows
+            # allwindows_arc = []
+            #
+            # if len(seq) > 18:
+            #     window = 18
+            #     # calculates number of windows in sequence
+            #     num_windows = len(seq) - window
+            # else:
+            #     window = len(seq)
+            #     num_windows = 1
+            #
+            # # loop through all windows
+            # for j in range(num_windows):
+            #     # slices descriptor matrix into current window
+            #     window_mat = desc_mat[j:j + window, :]
+            #
+            #     # defines order of amino acids in helical projection
+            #     order = [0, 11, 4, 15, 8, 1, 12, 5, 16, 9, 2, 13, 6, 17, 10, 3, 14, 7]
+            #
+            #     # orders window descriptor matrix into helical projection order
+            #     ordered = []
+            #     for pos in order:
+            #         try:
+            #             ordered.append(window_mat[pos, :])
+            #         except:
+            #             # for sequences of len < 18 adding dummy vector with 2s, length of descriptor dimensions
+            #             ordered.append([2] * desc_dim)
+            #     ordered = np.asarray(ordered)
+            #
+            #     window_arc = []
+            #
+            #     # loop through pharmacophoric features
+            #     for m in range(desc_dim):
+            #         all_arcs = []  # stores all arcs that can be found of a pharmacophoric feature
+            #         arc = 0
+            #
+            #         for n in range(18):  # for all positions in helix, regardless of sequence length
+            #             if ordered[n, m] == 0:  # if position does not contain pharmacophoric feature
+            #                 all_arcs.append(arc)  # append previous arc to all arcs list
+            #                 arc = 0  # arc is initialized
+            #             elif ordered[n, m] == 1:  # if position contains pharmacophoric feature(PF), elongate arc by 20°
+            #                 arc += 20
+            #             elif ordered[n, m] == 2:  # if position doesn't contain amino acid:
+            #                 if ordered[n - 1, m] == 1:  # if previous position contained PF add 10°
+            #                     arc += 10
+            #                 elif ordered[n - 1, m] == 0:  # if previous position didn't contain PF don't add anything
+            #                     arc += 0
+            #                 elif ordered[
+            #                             n - 2, m] == 1:  # if previous position is empty then check second previous for PF
+            #                     arc += 10
+            #                 if n == 17:  # if we are at the last position check for position n=0 instead of next position.
+            #                     if ordered[0, m] == 1:  # if it contains PF add 10° extra
+            #                         arc += 10
+            #                 else:  # if next position contains PF add 10° extra
+            #                     if ordered[n + 1, m] == 1:
+            #                         arc += 10
+            #                     elif ordered[n + 1, m] == 0:
+            #                         arc += 0
+            #                     else:  # if next position is empty check for 2nd next position
+            #                         if n == 16:
+            #                             if ordered[0, m] == 1:
+            #                                 arc += 10
+            #                         else:
+            #                             if ordered[n + 2, m] == 1:
+            #                                 arc += 10
+            #
+            #         all_arcs.append(arc)
+            #         if not arc == 360:
+            #             arc0 = all_arcs.pop() + all_arcs[0]  # join first and last arc together
+            #             all_arcs = [arc0] + all_arcs[1:]
+            #
+            #         window_arc.append(np.max(all_arcs))  # append to window arcs the maximum arc of this PF
+            #     allwindows_arc.append(window_arc)  # append all PF arcs of this window
+            #
+            # allwindows_arc = np.asarray(allwindows_arc)
+            #
+            # if modality == 'max':
+            #     final_arc = np.max(allwindows_arc, axis=0)  # calculate maximum / mean arc along all windows
+            # elif modality == 'mean':
+            #     final_arc = np.mean(allwindows_arc, axis=0)
+            # else:
+            #     print('modality is unknown, please choose between "max" and "mean"\n.')
+            #     sys.exit()
+
+            if append:
+                self.descriptor = np.hstack((self.descriptor, np.array(desc)))
+            else:
+                self.descriptor = np.array(desc)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plotWheels/helical_wheel.py	Mon Jun 05 02:44:43 2023 +0000
@@ -0,0 +1,562 @@
+import matplotlib
+
+matplotlib.use("Agg")
+
+import matplotlib.lines as lines
+import matplotlib.patches as patches
+import matplotlib.pyplot as plt
+
+# from mpl_toolkits.mplot3d import Axes3D
+import numpy as np
+from scipy.stats.kde import gaussian_kde
+
+from plotWheels.core import load_scale
+from plotWheels.descriptors import PeptideDescriptor
+
+
+def helical_wheel(
+    sequence,
+    colorcoding="rainbow",
+    text_color=None,
+    lineweights=True,
+    filename=None,
+    seq=False,
+    moment=False,
+    seqRange=1,
+    t_size=32,
+    rot=float(90),
+    dpi=150,
+    numbering=True,
+):
+    """A function to project a given peptide sequence onto a helical wheel plot. It can be useful to illustrate the
+    properties of alpha-helices, like positioning of charged and hydrophobic residues along the sequence.
+
+    :param sequence: {str} the peptide sequence for which the helical wheel should be drawn
+    :param colorcoding: {str} the color coding to be used, available: *rainbow*, *charge*, *polar*, *simple*,
+        *amphipathic*, *custom_input*, *none*
+    :param lineweights: {boolean} defines whether connection lines decrease in thickness along the sequence
+    :param filename: {str} filename  where to save the plot. *default = None* --> show the plot
+    :param seq: {bool} whether the amino acid sequence should be plotted as a title
+    :param moment: {bool} whether the Eisenberg hydrophobic moment should be calculated and plotted
+    :param seqRange: {int} starting value of residue location in sequence
+    :param t_size: {int} text size
+    :param rot: {float} rotation by radians --> converted to degrees.
+    :param dpi: {int} dpi parameter for saved files
+    :return: a helical wheel projection plot of the given sequence (interactively or in **filename**)
+    :Example:
+
+    >>> helical_wheel('GLFDIVKKVVGALG')
+    >>> helical_wheel('KLLKLLKKLLKLLK', colorcoding='charge')
+    >>> helical_wheel('AKLWLKAGRGFGRG', colorcoding='none', lineweights=False)
+    >>> helical_wheel('ACDEFGHIKLMNPQRSTVWY')
+
+    .. image:: ../docs/static/wheel1.png
+        :height: 300px
+    .. image:: ../docs/static/wheel2.png
+        :height: 300px
+    .. image:: ../docs/static/wheel3.png
+        :height: 300px
+    .. image:: ../docs/static/wheel4.png
+        :height: 300px
+
+    .. versionadded:: v2.1.5
+    """
+    # color mappings
+    aa = [
+        "A",
+        "C",
+        "D",
+        "E",
+        "F",
+        "G",
+        "H",
+        "I",
+        "K",
+        "L",
+        "M",
+        "N",
+        "P",
+        "Q",
+        "R",
+        "S",
+        "T",
+        "V",
+        "W",
+        "Y",
+    ]
+    if colorcoding == type(str):
+        f_rainbow = [
+            "#3e3e28",
+            "#ffcc33",
+            "#b30047",
+            "#b30047",
+            "#ffcc33",
+            "#3e3e28",
+            "#80d4ff",
+            "#ffcc33",
+            "#0047b3",
+            "#ffcc33",
+            "#ffcc33",
+            "#b366ff",
+            "#29a329",
+            "#b366ff",
+            "#0047b3",
+            "#ff66cc",
+            "#ff66cc",
+            "#ffcc33",
+            "#ffcc33",
+            "#ffcc33",
+        ]
+        f_charge = [
+            "#000000",
+            "#000000",
+            "#ff4d94",
+            "#ff4d94",
+            "#000000",
+            "#000000",
+            "#80d4ff",
+            "#000000",
+            "#80d4ff",
+            "#000000",
+            "#000000",
+            "#000000",
+            "#000000",
+            "#000000",
+            "#80d4ff",
+            "#000000",
+            "#000000",
+            "#000000",
+            "#000000",
+            "#000000",
+        ]
+        f_polar = [
+            "#000000",
+            "#000000",
+            "#80d4ff",
+            "#80d4ff",
+            "#000000",
+            "#000000",
+            "#80d4ff",
+            "#000000",
+            "#80d4ff",
+            "#000000",
+            "#000000",
+            "#80d4ff",
+            "#000000",
+            "#80d4ff",
+            "#80d4ff",
+            "#80d4ff",
+            "#80d4ff",
+            "#000000",
+            "#000000",
+            "#000000",
+        ]
+        f_simple = [
+            "#ffcc33",
+            "#ffcc33",
+            "#0047b3",
+            "#0047b3",
+            "#ffcc33",
+            "#7f7f7f",
+            "#0047b3",
+            "#ffcc33",
+            "#0047b3",
+            "#ffcc33",
+            "#ffcc33",
+            "#0047b3",
+            "#ffcc33",
+            "#0047b3",
+            "#0047b3",
+            "#0047b3",
+            "#0047b3",
+            "#ffcc33",
+            "#ffcc33",
+            "#ffcc33",
+        ]
+        f_none = ["#ffffff"] * 20
+        f_amphi = [
+            "#ffcc33",
+            "#29a329",
+            "#b30047",
+            "#b30047",
+            "#f79318",
+            "#80d4ff",
+            "#0047b3",
+            "#ffcc33",
+            "#0047b3",
+            "#ffcc33",
+            "#ffcc33",
+            "#80d4ff",
+            "#29a329",
+            "#80d4ff",
+            "#0047b3",
+            "#80d4ff",
+            "#80d4ff",
+            "#ffcc33",
+            "#f79318",
+            "#f79318",
+        ]
+        t_rainbow = [
+            "w",
+            "k",
+            "w",
+            "w",
+            "k",
+            "w",
+            "k",
+            "k",
+            "w",
+            "k",
+            "k",
+            "k",
+            "k",
+            "k",
+            "w",
+            "k",
+            "k",
+            "k",
+            "k",
+            "k",
+        ]
+        t_charge = [
+            "w",
+            "w",
+            "k",
+            "k",
+            "w",
+            "w",
+            "k",
+            "w",
+            "k",
+            "w",
+            "w",
+            "w",
+            "w",
+            "w",
+            "k",
+            "w",
+            "w",
+            "w",
+            "w",
+            "w",
+        ]
+        t_polar = [
+            "w",
+            "w",
+            "k",
+            "k",
+            "w",
+            "w",
+            "k",
+            "w",
+            "k",
+            "w",
+            "w",
+            "k",
+            "w",
+            "k",
+            "k",
+            "k",
+            "k",
+            "w",
+            "w",
+            "w",
+        ]
+        t_simple = [
+            "k",
+            "k",
+            "w",
+            "w",
+            "k",
+            "w",
+            "w",
+            "k",
+            "w",
+            "k",
+            "k",
+            "k",
+            "k",
+            "w",
+            "w",
+            "w",
+            "w",
+            "k",
+            "k",
+            "k",
+        ]
+        t_none = ["k"] * 20
+        t_amphi = [
+            "k",
+            "k",
+            "w",
+            "w",
+            "w",
+            "k",
+            "w",
+            "k",
+            "w",
+            "k",
+            "k",
+            "k",
+            "w",
+            "k",
+            "w",
+            "k",
+            "k",
+            "k",
+            "w",
+            "w",
+        ]
+        d_eisberg = load_scale("eisenberg")[1]  # eisenberg hydrophobicity values for HM
+    else:
+        f_custom = colorcoding
+        t_custom = text_color
+        d_eisberg = load_scale("eisenberg")[1]
+
+    if lineweights:
+        lw = np.arange(0.1, 5.5, 5.0 / (len(sequence) - 1))  # line thickness array
+        lw = lw[::-1]  # inverse order
+    else:
+        lw = [2.0] * (len(sequence) - 1)
+    # check which color coding to use
+    if colorcoding == type(str):
+        if colorcoding == "rainbow":
+            df = dict(zip(aa, f_rainbow))
+            dt = dict(zip(aa, t_rainbow))
+        elif colorcoding == "charge":
+            df = dict(zip(aa, f_charge))
+            dt = dict(zip(aa, t_charge))
+        elif colorcoding == "polar":
+            df = dict(zip(aa, f_polar))
+            dt = dict(zip(aa, t_polar))
+        elif colorcoding == "simple":
+            df = dict(zip(aa, f_simple))
+            dt = dict(zip(aa, t_simple))
+        elif colorcoding == "none":
+            df = dict(zip(aa, f_none))
+            dt = dict(zip(aa, t_none))
+        elif colorcoding == "amphipathic":
+            df = dict(zip(aa, f_amphi))
+            dt = dict(zip(aa, t_amphi))
+        else:
+            print("Unknown color coding, 'rainbow' used instead")
+            df = dict(zip(aa, f_rainbow))
+            dt = dict(zip(aa, t_rainbow))
+    else:
+        df = dict(zip(aa, f_custom))
+        dt = dict(zip(aa, t_custom))
+
+    # degree to radian
+    deg = np.arange(float(len(sequence))) * -100.0
+    deg = [d + rot for d in deg]  # start at 270 degree in unit circle (on top)
+    rad = np.radians(deg)
+
+    # dict for coordinates and eisenberg values
+    d_hydro = dict(zip(rad, [0.0] * len(rad)))
+
+    # create figure
+    fig = plt.figure(frameon=False, figsize=(10, 10))
+    ax = fig.add_subplot(111)
+    old = None
+    hm = list()
+
+    # iterate over sequence
+    for i, r in enumerate(rad):
+        new = (np.cos(r), np.sin(r))  # new AA coordinates
+        if i < 18:
+            # plot the connecting lines
+            if old is not None:
+                line = lines.Line2D(
+                    (old[0], new[0]),
+                    (old[1], new[1]),
+                    transform=ax.transData,
+                    color="k",
+                    linewidth=lw[i - 1],
+                )
+                line.set_zorder(1)  # 1 = level behind circles
+                ax.add_line(line)
+        elif 17 < i < 36:
+            line = lines.Line2D(
+                (old[0], new[0]),
+                (old[1], new[1]),
+                transform=ax.transData,
+                color="k",
+                linewidth=lw[i - 1],
+            )
+            line.set_zorder(1)  # 1 = level behind circles
+            ax.add_line(line)
+            new = (np.cos(r) * 1.2, np.sin(r) * 1.2)
+        elif i == 36:
+            line = lines.Line2D(
+                (old[0], new[0]),
+                (old[1], new[1]),
+                transform=ax.transData,
+                color="k",
+                linewidth=lw[i - 1],
+            )
+            line.set_zorder(1)  # 1 = level behind circles
+            ax.add_line(line)
+            new = (np.cos(r) * 1.4, np.sin(r) * 1.4)
+        else:
+            new = (np.cos(r) * 1.4, np.sin(r) * 1.4)
+
+        # plot circles
+        circ = patches.Circle(
+            new,
+            radius=0.125,
+            transform=ax.transData,
+            edgecolor="k",
+            facecolor=df[sequence[i]],
+        )
+        circ.set_zorder(2)  # level in front of lines
+        ax.add_patch(circ)
+
+        # check if N- or C-terminus and add subscript, then plot AA letter
+        if numbering:
+            size = t_size
+            if i == 0:
+                ax.text(
+                    new[0],
+                    new[1],
+                    sequence[i] + "$_N$",
+                    va="center",
+                    ha="center",
+                    transform=ax.transData,
+                    size=size,
+                    color=dt[sequence[i]],
+                    fontweight="bold",
+                )
+            elif i == len(sequence) - 1:
+                ax.text(
+                    new[0],
+                    new[1],
+                    sequence[i] + "$_C$",
+                    va="center",
+                    ha="center",
+                    transform=ax.transData,
+                    size=size,
+                    color=dt[sequence[i]],
+                    fontweight="bold",
+                )
+            else:
+                seqRange += 1
+                ax.text(
+                    new[0],
+                    new[1],
+                    sequence[i] + "$_{" + str(seqRange) + "}$",
+                    va="center",
+                    ha="center",
+                    transform=ax.transData,
+                    size=size,
+                    color=dt[sequence[i]],
+                    fontweight="bold",
+                )
+
+            eb = d_eisberg[sequence[i]][0]  # eisenberg value for this AA
+            hm.append(
+                [eb * new[0], eb * new[1]]
+            )  # save eisenberg hydrophobicity vector value to later calculate HM
+
+            old = (np.cos(r), np.sin(r))  # save as previous coordinates
+
+        else:
+            size = t_size
+            if i == 0:
+                ax.text(
+                    new[0],
+                    new[1],
+                    sequence[i] + "$_N$",
+                    va="center",
+                    ha="center",
+                    transform=ax.transData,
+                    size=size,
+                    color=dt[sequence[i]],
+                    fontweight="bold",
+                )
+            elif i == len(sequence) - 1:
+                ax.text(
+                    new[0],
+                    new[1],
+                    sequence[i] + "$_C$",
+                    va="center",
+                    ha="center",
+                    transform=ax.transData,
+                    size=size,
+                    color=dt[sequence[i]],
+                    fontweight="bold",
+                )
+            else:
+                ax.text(
+                    new[0],
+                    new[1],
+                    sequence[i],
+                    va="center",
+                    ha="center",
+                    transform=ax.transData,
+                    size=size,
+                    color=dt[sequence[i]],
+                    fontweight="bold",
+                )
+
+            eb = d_eisberg[sequence[i]][0]  # eisenberg value for this AA
+            hm.append(
+                [eb * new[0], eb * new[1]]
+            )  # save eisenberg hydrophobicity vector value to later calculate HM
+
+            old = (np.cos(r), np.sin(r))  # save as previous coordinates
+
+    # draw hydrophobic moment arrow if moment option
+    if moment:
+        v_hm = np.sum(np.array(hm), 0)
+        x = 0.0333 * v_hm[0]
+        y = 0.0333 * v_hm[1]
+        ax.arrow(
+            0.0,
+            0.0,
+            x,
+            y,
+            head_width=0.04,
+            head_length=0.03,
+            transform=ax.transData,
+            color="k",
+            linewidth=6.0,
+        )
+        desc = PeptideDescriptor(sequence)  # calculate hydrophobic moment
+        desc.calculate_moment()
+        if (
+            abs(x) < 0.2 and y > 0.0
+        ):  # right positioning of HM text so arrow does not cover it
+            z = -0.2
+        else:
+            z = 0.2
+        plt.text(
+            0.0,
+            z,
+            str(round(desc.descriptor[0][0], 3)),
+            fontdict={"fontsize": 20, "fontweight": "bold", "ha": "center"},
+        )
+
+    # plot shape
+    if len(sequence) < 19:
+        ax.set_xlim(-1.2, 1.2)
+        ax.set_ylim(-1.2, 1.2)
+    else:
+        ax.set_xlim(-1.4, 1.4)
+        ax.set_ylim(-1.4, 1.4)
+    ax.spines["right"].set_visible(False)
+    ax.spines["top"].set_visible(False)
+    ax.spines["left"].set_visible(False)
+    ax.spines["bottom"].set_visible(False)
+    cur_axes = plt.gca()
+    cur_axes.axes.get_xaxis().set_visible(False)
+    cur_axes.axes.get_yaxis().set_visible(False)
+    plt.tight_layout()
+
+    if seq:
+        plt.title(sequence, fontweight="bold", fontsize=20)
+
+    # show or save plot
+    if filename:
+        plt.savefig(filename, dpi=dpi)
+    else:
+        plt.show()