# HG changeset patch # User bebatut # Date 1461600923 14400 # Node ID 54d811ad2b52197338ebc21cf1dc84bca1a3d0f3 planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/cdhit/ commit ea0424ae045ac797c080aeabab9a8536f7eb2f84-dirty diff -r 000000000000 -r 54d811ad2b52 README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Mon Apr 25 12:15:23 2016 -0400 @@ -0,0 +1,12 @@ +CD_HIT memory usage +==================== + +By default, maximum of 4Gb is attributed to CD_HIT. + +To change the maximum memory usage, you can edit the CD_HIT_MEM_OPTIONS in the file: + +/cd-hit/4.6.4/bebatut/cdhit//env.sh + +For example to increase to 8Gb, you will write: + +CD_HIT_MEM_OPTIONS='-M 8000' \ No newline at end of file diff -r 000000000000 -r 54d811ad2b52 cd_hit_est.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cd_hit_est.xml Mon Apr 25 12:15:23 2016 -0400 @@ -0,0 +1,92 @@ + + Cluster a nucleotide dataset into representative sequences + + + cdhit_macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Cluster 0 + 0 2799aa, >PF04998.6|RPOC2_CHLRE/275-3073... * + >Cluster 1 + 0 2214aa, >PF06317.1|Q6Y625_9VIRU/1-2214... at 80% + 1 2215aa, >PF06317.1|O09705_9VIRU/1-2215... at 84% + 2 2217aa, >PF06317.1|Q6Y630_9VIRU/1-2217... * + 3 2216aa, >PF06317.1|Q6GWS6_9VIRU/1-2216... at 84% + 4 527aa, >PF06317.1|Q67E14_9VIRU/6-532... at 63% + >Cluster 2 + 0 2202aa, >PF06317.1|Q6UY61_9VIRU/8-2209... at 60% + 1 2208aa, >PF06317.1|Q6IVU4_JUNIN/1-2208... * + 2 2207aa, >PF06317.1|Q6IVU0_MACHU/1-2207... at 73% + 3 2208aa, >PF06317.1|RRPO_TACV/1-2208... at 69% + ]]> + + + 10.1093/bioinformatics/btl158 + 10.1093/bioinformatics/bts565 + + diff -r 000000000000 -r 54d811ad2b52 cd_hit_protein.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cd_hit_protein.xml Mon Apr 25 12:15:23 2016 -0400 @@ -0,0 +1,88 @@ + + Cluster a protein dataset into representative sequences + + cdhit_macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Cluster 0 + 0 2799aa, >PF04998.6|RPOC2_CHLRE/275-3073... * + >Cluster 1 + 0 2214aa, >PF06317.1|Q6Y625_9VIRU/1-2214... at 80% + 1 2215aa, >PF06317.1|O09705_9VIRU/1-2215... at 84% + 2 2217aa, >PF06317.1|Q6Y630_9VIRU/1-2217... * + 3 2216aa, >PF06317.1|Q6GWS6_9VIRU/1-2216... at 84% + 4 527aa, >PF06317.1|Q67E14_9VIRU/6-532... at 63% + >Cluster 2 + 0 2202aa, >PF06317.1|Q6UY61_9VIRU/8-2209... at 60% + 1 2208aa, >PF06317.1|Q6IVU4_JUNIN/1-2208... * + 2 2207aa, >PF06317.1|Q6IVU0_MACHU/1-2207... at 73% + 3 2208aa, >PF06317.1|RRPO_TACV/1-2208... at 69% + ]]> + + + 10.1093/bioinformatics/btl158 + 10.1093/bioinformatics/bts565 + + diff -r 000000000000 -r 54d811ad2b52 cdhit_macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cdhit_macros.xml Mon Apr 25 12:15:23 2016 -0400 @@ -0,0 +1,116 @@ + + + + cd-hit + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 54d811ad2b52 test-data/cd_hit_est_in.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cd_hit_est_in.fa Mon Apr 25 12:15:23 2016 -0400 @@ -0,0 +1,74 @@ +>F12Fcsw_481739 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGACGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F14Fcsw_133982 +GGCGACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGCCCAA +>F14Fcsw_149685 +GGCGACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F14Fcsw_175165 +CGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGATAGTAGGCGGGGTAACGGCCCACCTAGTCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F14Fcsw_176364 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGCCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACGCGGTCCAA +>F14Fcsw_224425 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTAAGACACGGTCCAA +>F14Fcsw_27361 +CGACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F14Fcsw_2745 +GACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F14Fcsw_37069 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGCCCAG +>F14Fcsw_38031 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCAA +>F14Fcsw_49588 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACGCGGTCCAA +>F22Fcsw_400293 +CCGGCGCACGGGTGAGTAACACGTATCCAACCTTCCGTACACTCAGGGATAGCCTTTCGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGGATTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F23Fcsw_133990 +GGCGACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCGACAACACTGGGATAGCCTTTCGAAAGAAAGATTAATACCGGATGGCATAGTTTTCCCGCATGGAAAAACTATTAAAGAATTTCGGTTATCGATGGGGATGCGTTCCATTAGGCAGTTGGCGGGGTAACGGCCCACCAAACCGACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F23Fcsw_160873 +CGGGTGAGTAACGCGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGGTAGTAGGCGGGGTAACGGCCCACCTAGCCAACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F23Fcsw_86009 +GGCGACCGGCGCACGGGTGAGTAACGCGTATCCAACCTTCCGTACACTCAGGGATAGCCTTTCGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGGATTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F23Fcsw_96640 +TCCGTACACTCAGGGATAGCCTTTCGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGAATTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGCCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F31Fcsw_135439 +TTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGGTAGTAGGCGGGGTAACGGCCCACCTAGCCGGCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F32Fcsw_322472 +AACCTTCCGTACACTCAGGGATAGCCTTTCGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGAACTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGCCTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGCCCAG +>F33Fcsw_137774 +CTGAGTGGCGGACGGGTGAGTAACACGTATCCAACCTGCCGATGACTCGGGGATAGCCTTTCGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGAACTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGCCTTCGATGGATAGGGGTTCTGAGAGGAAGGCCCCCCACATTGGAACTGAGACACGGTCCAA +>F34Fcsw_50866 +CGACCGGCGCACGGGTGAGTAACGCGTATCCAACCTTCCGTACACTCAGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGTATATTAAAACCGCATGGTTTTACTATTAAAGAATTTCGGTTATCGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M12Fcsw_69587 +CGGGTGAGTAACACGTGGGCAACCTGCCTCATAGAGGGGAATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGGCAGTTGGTGAGGTAACGGCTCACCAAACCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M13Fcsw_127764 +GACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACGACGATGCGTAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGTACTGAGACACGGACCAA +>M13Fcsw_128004 +CGGACGGGTGAGTAACGCGTGAGTAACCTGCCGATAACTCAGGGATAGCCTTTCGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGAATTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGGCGACGATGCGTAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGTACTGAGACACGGACCAA +>M13Fcsw_198303 +TAACACGTATCCAACCTGCCTCATACTCGGGGATAACCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGATAGTAGGCGGGGTAACGGCCCACCTAGTCAACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M14Fcsw_117325 +GTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGATAGTAGGCGGGGTAACGGCCCACCTAGTCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M14Fcsw_151062 +CGACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M14Fcsw_181677 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGATAGTAGGCGGGGTAACGGCCCACCTAGTCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M14Fcsw_186607 +GACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGGTTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M24Fcsw_136217 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCACCGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACATAGGAACTGAGACACGGTCCTA +>M41Fcsw_259146 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTACCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGCCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M42Fcsw_137216 +CAACCTACCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M42Fcsw_138199 +GACCGGCGCACGGGTGAGTAACACGTATCCAACCTACCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M42Fcsw_225418 +CACGTATCCAACCTGCCGTCTACTCTTGGACAGCCTTCTGAAAGGAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGGATTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGCCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCACATTGGAACTGAGACACGGTCCAA +>M42Fcsw_263016 +ACCTACCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCCA +>M42Fcsw_334979 +GGGATAGCCGCCCGAAAGGACGGGTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGGATTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGCCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M43Fcsw_250770 +GTATCCAACCTGCCGTCTACTCTTGGACAGCCTTCTGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGGATTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCTAGTCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M44Fcsw_200453 +CTAGTGGCGGACGGGTGAGTAACGCGTATCCAACCTGCCGATGACTCGGGGATAGCCTTTCGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGAACTTCGGTCATCGATGGGGATGCGTTCCATTAGATAGTAGGCGGGGTAACGGCCCACCTAGTCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA diff -r 000000000000 -r 54d811ad2b52 test-data/cd_hit_protein_in.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cd_hit_protein_in.fasta Mon Apr 25 12:15:23 2016 -0400 @@ -0,0 +1,50 @@ +>sp|P00325|ADH1B_HUMAN Alcohol dehydrogenase 1B OS=Homo sapiens GN=ADH1B PE=1 SV=2 +MSTAGKVIKCKAAVLWEVKKPFSIEDVEVAPPKAYEVRIKMVAVGICRTDDHVVSGNLVT +PLPVILGHEAAGIVESVGEGVTTVKPGDKVIPLFTPQCGKCRVCKNPESNYCLKNDLGNP +RGTLQDGTRRFTCRGKPIHHFLGTSTFSQYTVVDENAVAKIDAASPLEKVCLIGCGFSTG +YGSAVNVAKVTPGSTCAVFGLGGVGLSAVMGCKAAGAARIIAVDINKDKFAKAKELGATE +CINPQDYKKPIQEVLKEMTDGGVDFSFEVIGRLDTMMASLLCCHEACGTSVIVGVPPASQ +NLSINPMLLLTGRTWKGAVYGGFKSKEGIPKLVADFMAKKFSLDALITHVLPFEKINEGF +DLLHSGKSIRTVLTF +>tr|K7D361|K7D361_PANTR Alcohol dehydrogenase 1B (Class I), beta polypeptide OS=Pan troglodytes GN=ADH1B PE=2 SV=1 +MSTAGKVIKCKAAVLWEVKKPFSIEDVEVAPPKAYEVRIKMVAVGICRTDDHVVSGNLVT +PLPAILGHEAAGIVESVGEGVTTVKPGDKVIPLFTPQCGKCRVCKNPESNYCLKNDLGNP +RGTLQDGTRRFTCRGKPIHHFLGTSTFSQYTVVDENAVAKIDAASPLEKVCLIGCGFSTG +YGSAVNVAKVTPGSTCAVFGLGGVGLSAVMGCKAAGAARIIAVDINKDKFAKAKELGATE +CINPQDYKKPIQEVLKEMTDGGVDFSFEVIGRLDTMMASLLCCHEACGTSVIVGVPPASQ +NLSINPMLLLTGRTWKGAVYGGFKSKEGIPKLVADFMAKKFSLDALITHVLPFEKINEGF +DLLHSGKSIRTVLTF +>sp|P00329|ADH1_MOUSE Alcohol dehydrogenase 1 OS=Mus musculus GN=Adh1 PE=2 SV=2 +MSTAGKVIKCKAAVLWELHKPFTIEDIEVAPPKAHEVRIKMVATGVCRSDDHVVSGTLVT +PLPAVLGHEGAGIVESVGEGVTCVKPGDKVIPLFSPQCGECRICKHPESNFCSRSDLLMP +RGTLREGTSRFSCKGKQIHNFISTSTFSQYTVVDDIAVAKIDGASPLDKVCLIGCGFSTG +YGSAVKVAKVTPGSTCAVFGLGGVGLSVIIGCKAAGAARIIAVDINKDKFAKAKELGATE +CINPQDYSKPIQEVLQEMTDGGVDFSFEVIGRLDTMTSALLSCHAACGVSVVVGVPPNAQ +NLSMNPMLLLLGRTWKGAIFGGFKSKDSVPKLVADFMAKKFPLDPLITHVLPFEKINEAF +DLLRSGKSIRTVLTF +>sp|P00338-2|LDHA_HUMAN Isoform 2 of L-lactate dehydrogenase A chain OS=Homo sapiens GN=LDHA +MATLKDQLIYNLLKEEQTPQNKITVVGVGAVGMACAISILMKDLADELALVDVIEDKLKG +EMMDLQHGSLFLRTPKIVSGKDYNVTANSKLVIITAGARQQEGESRLNLVQRNVNIFKFI +IPNVVKYSPNCKLLIVSNPVDILTYVAWKISGFPKNRVIGSGCNLDSARFRYLMGERLGV +HPLSCHGWVLGEHGDSSVPVWSGMNVAGVSLKTLHPDLGTDKDKEQWKECRYTLGDPKGA +AILKSSDVISFHCLGYNRILGGGCACCPFYLICD +>sp|P00338-5|LDHA_HUMAN Isoform 5 of L-lactate dehydrogenase A chain OS=Homo sapiens GN=LDHA +MATLKDQLIYNLLKEEQTPQNKITVVGVGAVGMACAISILMKDLADELALVDVIEDKLKG +EMMDLQHGSLFLRTPKIVSGKDYNVTANSKLVIITAGARQQEGESRLNLVQRNVNIFKFI +IPNVVKYSPNCKLLIVSNPVDILTYVAWKISGFPKNRVIGSGCNLDSARFRYLMGERLGV +HPLSCHGWVLGEHGDSSVPVWSGMNVAGVSLKTLHPDLGTDKDKEQWKEVHKQVVERVFT +E +>sp|P00340|LDHA_CHICK L-lactate dehydrogenase A chain OS=Gallus gallus GN=LDHA PE=1 SV=3 +MSLKDHLIHNVHKEEHAHAHNKISVVGVGAVGMACAISILMKDLADELTLVDVVEDKLKG +EMLDLQHGSLFLKTPKIISGKDYSVTAHSKLVIVTAGARQQEGESRLNLVQRNVNIFKFI +IPNVVKYSPDCKLLIVSNPVDILTYVAWKISGFPKHRVIGSGCNLDSARFRHLMGERLGI +HPLSCHGWIVGEHGDSSVPVWSGVNVAGVSLKALHPDMGTDADKEHWKEVHKQVVDSAYE +VIKLKGYTSWAIGLSVADLAETIMKNLRRVHPISTAVKGMHGIKDDVFLSVPCVLGSSGI +TDVVKMILKPDEEEKIKKSADTLWGIQKELQF +>sp|P19858|LDHA_BOVIN L-lactate dehydrogenase A chain OS=Bos taurus GN=LDHA PE=2 SV=2 +MATLKDQLIQNLLKEEHVPQNKITIVGVGAVGMACAISILMKDLADEVALVDVMEDKLKG +EMMDLQHGSLFLRTPKIVSGKDYNVTANSRLVIITAGARQQEGESRLNLVQRNVNIFKFI +IPNIVKYSPNCKLLVVSNPVDILTYVAWKISGFPKNRVIGSGCNLDSARFRYLMGERLGV +HPLSCHGWILGEHGDSSVPVWSGVNVAGVSLKNLHPELGTDADKEQWKAVHKQVVDSAYE +VIKLKGYTSWAIGLSVADLAESIMKNLRRVHPISTMIKGLYGIKEDVFLSVPCILGQNGI +SDVVKVTLTHEEEACLKKSADTLWGIQKELQF diff -r 000000000000 -r 54d811ad2b52 test-data/est_clusters_output.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/est_clusters_output.txt Mon Apr 25 12:15:23 2016 -0400 @@ -0,0 +1,39 @@ +>Cluster 0 +0 239nt, >F12Fcsw_481739... at +/99.16% +1 243nt, >F14Fcsw_133982... * +2 242nt, >F14Fcsw_149685... at +/99.59% +3 230nt, >F14Fcsw_175165... at +/96.96% +4 239nt, >F14Fcsw_176364... at +/97.91% +5 239nt, >F14Fcsw_224425... at +/99.16% +6 240nt, >F14Fcsw_27361... at +/99.58% +7 239nt, >F14Fcsw_2745... at +/99.58% +8 238nt, >F14Fcsw_37069... at +/99.58% +9 238nt, >F14Fcsw_38031... at +/99.16% +10 239nt, >F14Fcsw_49588... at +/99.16% +11 230nt, >F23Fcsw_160873... at +/96.52% +12 183nt, >F31Fcsw_135439... at +/95.63% +13 241nt, >F34Fcsw_50866... at +/91.29% +14 230nt, >M12Fcsw_69587... at +/92.61% +15 240nt, >M13Fcsw_127764... at +/97.92% +16 222nt, >M13Fcsw_198303... at +/96.40% +17 227nt, >M14Fcsw_117325... at +/96.92% +18 241nt, >M14Fcsw_151062... at +/99.59% +19 239nt, >M14Fcsw_181677... at +/97.07% +20 240nt, >M14Fcsw_186607... at +/99.17% +21 239nt, >M24Fcsw_136217... at +/94.56% +22 239nt, >M41Fcsw_259146... at +/97.91% +23 210nt, >M42Fcsw_137216... at +/99.05% +24 239nt, >M42Fcsw_138199... at +/99.16% +25 208nt, >M42Fcsw_263016... at +/98.56% +>Cluster 1 +0 238nt, >F22Fcsw_400293... at +/91.18% +1 243nt, >F23Fcsw_133990... * +2 243nt, >F23Fcsw_86009... at +/90.95% +3 205nt, >F23Fcsw_96640... at +/91.71% +4 210nt, >F32Fcsw_322472... at +/90.95% +5 242nt, >F33Fcsw_137774... at +/90.91% +6 234nt, >M13Fcsw_128004... at +/90.17% +7 218nt, >M42Fcsw_225418... at +/90.83% +8 193nt, >M42Fcsw_334979... at +/90.16% +9 216nt, >M43Fcsw_250770... at +/90.28% +10 241nt, >M44Fcsw_200453... at +/90.04% diff -r 000000000000 -r 54d811ad2b52 test-data/est_fasta_output.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/est_fasta_output.fasta Mon Apr 25 12:15:23 2016 -0400 @@ -0,0 +1,4 @@ +>F14Fcsw_133982 +GGCGACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGCCCAA +>F23Fcsw_133990 +GGCGACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCGACAACACTGGGATAGCCTTTCGAAAGAAAGATTAATACCGGATGGCATAGTTTTCCCGCATGGAAAAACTATTAAAGAATTTCGGTTATCGATGGGGATGCGTTCCATTAGGCAGTTGGCGGGGTAACGGCCCACCAAACCGACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA diff -r 000000000000 -r 54d811ad2b52 test-data/protein_clusters_output.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/protein_clusters_output.txt Mon Apr 25 12:15:23 2016 -0400 @@ -0,0 +1,12 @@ +>Cluster 0 +0 375aa, >sp|P00325|ADH1B_HUM... * +1 375aa, >tr|K7D361|K7D361_PA... at 99.73% +>Cluster 1 +0 375aa, >sp|P00329|ADH1_MOUS... * +>Cluster 2 +0 332aa, >sp|P00340|LDHA_CHIC... * +>Cluster 3 +0 241aa, >sp|P00338-5|LDHA_HU... at 91.29% +1 332aa, >sp|P19858|LDHA_BOVI... * +>Cluster 4 +0 274aa, >sp|P00338-2|LDHA_HU... * diff -r 000000000000 -r 54d811ad2b52 test-data/protein_fasta_output.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/protein_fasta_output.fasta Mon Apr 25 12:15:23 2016 -0400 @@ -0,0 +1,36 @@ +>sp|P00325|ADH1B_HUMAN Alcohol dehydrogenase 1B OS=Homo sapiens GN=ADH1B PE=1 SV=2 +MSTAGKVIKCKAAVLWEVKKPFSIEDVEVAPPKAYEVRIKMVAVGICRTDDHVVSGNLVT +PLPVILGHEAAGIVESVGEGVTTVKPGDKVIPLFTPQCGKCRVCKNPESNYCLKNDLGNP +RGTLQDGTRRFTCRGKPIHHFLGTSTFSQYTVVDENAVAKIDAASPLEKVCLIGCGFSTG +YGSAVNVAKVTPGSTCAVFGLGGVGLSAVMGCKAAGAARIIAVDINKDKFAKAKELGATE +CINPQDYKKPIQEVLKEMTDGGVDFSFEVIGRLDTMMASLLCCHEACGTSVIVGVPPASQ +NLSINPMLLLTGRTWKGAVYGGFKSKEGIPKLVADFMAKKFSLDALITHVLPFEKINEGF +DLLHSGKSIRTVLTF +>sp|P00329|ADH1_MOUSE Alcohol dehydrogenase 1 OS=Mus musculus GN=Adh1 PE=2 SV=2 +MSTAGKVIKCKAAVLWELHKPFTIEDIEVAPPKAHEVRIKMVATGVCRSDDHVVSGTLVT +PLPAVLGHEGAGIVESVGEGVTCVKPGDKVIPLFSPQCGECRICKHPESNFCSRSDLLMP +RGTLREGTSRFSCKGKQIHNFISTSTFSQYTVVDDIAVAKIDGASPLDKVCLIGCGFSTG +YGSAVKVAKVTPGSTCAVFGLGGVGLSVIIGCKAAGAARIIAVDINKDKFAKAKELGATE +CINPQDYSKPIQEVLQEMTDGGVDFSFEVIGRLDTMTSALLSCHAACGVSVVVGVPPNAQ +NLSMNPMLLLLGRTWKGAIFGGFKSKDSVPKLVADFMAKKFPLDPLITHVLPFEKINEAF +DLLRSGKSIRTVLTF +>sp|P00338-2|LDHA_HUMAN Isoform 2 of L-lactate dehydrogenase A chain OS=Homo sapiens GN=LDHA +MATLKDQLIYNLLKEEQTPQNKITVVGVGAVGMACAISILMKDLADELALVDVIEDKLKG +EMMDLQHGSLFLRTPKIVSGKDYNVTANSKLVIITAGARQQEGESRLNLVQRNVNIFKFI +IPNVVKYSPNCKLLIVSNPVDILTYVAWKISGFPKNRVIGSGCNLDSARFRYLMGERLGV +HPLSCHGWVLGEHGDSSVPVWSGMNVAGVSLKTLHPDLGTDKDKEQWKECRYTLGDPKGA +AILKSSDVISFHCLGYNRILGGGCACCPFYLICD +>sp|P00340|LDHA_CHICK L-lactate dehydrogenase A chain OS=Gallus gallus GN=LDHA PE=1 SV=3 +MSLKDHLIHNVHKEEHAHAHNKISVVGVGAVGMACAISILMKDLADELTLVDVVEDKLKG +EMLDLQHGSLFLKTPKIISGKDYSVTAHSKLVIVTAGARQQEGESRLNLVQRNVNIFKFI +IPNVVKYSPDCKLLIVSNPVDILTYVAWKISGFPKHRVIGSGCNLDSARFRHLMGERLGI +HPLSCHGWIVGEHGDSSVPVWSGVNVAGVSLKALHPDMGTDADKEHWKEVHKQVVDSAYE +VIKLKGYTSWAIGLSVADLAETIMKNLRRVHPISTAVKGMHGIKDDVFLSVPCVLGSSGI +TDVVKMILKPDEEEKIKKSADTLWGIQKELQF +>sp|P19858|LDHA_BOVIN L-lactate dehydrogenase A chain OS=Bos taurus GN=LDHA PE=2 SV=2 +MATLKDQLIQNLLKEEHVPQNKITIVGVGAVGMACAISILMKDLADEVALVDVMEDKLKG +EMMDLQHGSLFLRTPKIVSGKDYNVTANSRLVIITAGARQQEGESRLNLVQRNVNIFKFI +IPNIVKYSPNCKLLVVSNPVDILTYVAWKISGFPKNRVIGSGCNLDSARFRYLMGERLGV +HPLSCHGWILGEHGDSSVPVWSGVNVAGVSLKNLHPELGTDADKEQWKAVHKQVVDSAYE +VIKLKGYTSWAIGLSVADLAESIMKNLRRVHPISTMIKGLYGIKEDVFLSVPCILGQNGI +SDVVKVTLTHEEEACLKKSADTLWGIQKELQF diff -r 000000000000 -r 54d811ad2b52 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Apr 25 12:15:23 2016 -0400 @@ -0,0 +1,35 @@ + + + + + + https://github.com/weizhongli/cdhit/releases/download/V4.6.4/cd-hit-v4.6.4-2015-0603.tar.gz + make + + cd-hit + $INSTALL_DIR + + + cd-hit-est + $INSTALL_DIR + + + $INSTALL_DIR + "-M 4000" + + + + +These links provide information for building the cdhit package: + +http://weizhong-lab.ucsd.edu/cd-hit/ + +https://code.google.com/p/cdhit/ + +https://code.google.com/p/cdhit/source/browse/README + +Change the CDHIT_SITE_OPTIONS variable in the installed env.sh file to adjust +the maximum memory Mb (-M). + + + \ No newline at end of file