# HG changeset patch # User mcharles # Date 1414144460 14400 # Node ID 8dfa09868059b4fb3a76c63ec5b2f214af090b3f # Parent c52e74b98773f07c273f62957e0d5c37c6d092b2 Uploaded diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys.ga --- a/genephys/Galaxy-Workflow-GenePhys.ga Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,674 +0,0 @@ -{ - "a_galaxy_workflow": "true", - "annotation": "", - "format-version": "0.1", - "name": "GenePhys", - "steps": { - "0": { - "annotation": "", - "id": 0, - "input_connections": {}, - "inputs": [ - { - "description": "", - "name": "ASSEMBLY" - } - ], - "name": "Input dataset", - "outputs": [], - "position": { - "left": 201, - "top": 392 - }, - "tool_errors": null, - "tool_id": null, - "tool_state": "{\"name\": \"ASSEMBLY\"}", - "tool_version": null, - "type": "data_input", - "user_outputs": [] - }, - "1": { - "annotation": "", - "id": 1, - "input_connections": {}, - "inputs": [ - { - "description": "", - "name": "GENE XML" - } - ], - "name": "Input dataset", - "outputs": [], - "position": { - "left": 200, - "top": 497 - }, - "tool_errors": null, - "tool_id": null, - "tool_state": "{\"name\": \"GENE XML\"}", - "tool_version": null, - "type": "data_input", - "user_outputs": [] - }, - "2": { - "annotation": "", - "id": 2, - "input_connections": {}, - "inputs": [ - { - "description": "", - "name": "MARKERS" - } - ], - "name": "Input dataset", - "outputs": [], - "position": { - "left": 186, - "top": 618 - }, - "tool_errors": null, - "tool_id": null, - "tool_state": "{\"name\": \"MARKERS\"}", - "tool_version": null, - "type": "data_input", - "user_outputs": [] - }, - "3": { - "annotation": "", - "id": 3, - "input_connections": {}, - "inputs": [ - { - "description": "", - "name": "GENETIC MAP" - } - ], - "name": "Input dataset", - "outputs": [], - "position": { - "left": 200, - "top": 737 - }, - "tool_errors": null, - "tool_id": null, - "tool_state": "{\"name\": \"GENETIC MAP\"}", - "tool_version": null, - "type": "data_input", - "user_outputs": [] - }, - "4": { - "annotation": "", - "id": 4, - "input_connections": {}, - "inputs": [ - { - "description": "", - "name": "REFERENCE(NUC)" - } - ], - "name": "Input dataset", - "outputs": [], - "position": { - "left": 200, - "top": 857 - }, - "tool_errors": null, - "tool_id": null, - "tool_state": "{\"name\": \"REFERENCE(NUC)\"}", - "tool_version": null, - "type": "data_input", - "user_outputs": [] - }, - "5": { - "annotation": "", - "id": 5, - "input_connections": {}, - "inputs": [ - { - "description": "", - "name": "REFERENCE(PROT)" - } - ], - "name": "Input dataset", - "outputs": [], - "position": { - "left": 206, - "top": 982 - }, - "tool_errors": null, - "tool_id": null, - "tool_state": "{\"name\": \"REFERENCE(PROT)\"}", - "tool_version": null, - "type": "data_input", - "user_outputs": [] - }, - "6": { - "annotation": "", - "id": 6, - "input_connections": { - "input_geneticmap": { - "id": 3, - "output_name": "output" - }, - "input_markers": { - "id": 2, - "output_name": "output" - } - }, - "inputs": [], - "name": "extractgenomicsegment", - "outputs": [ - { - "name": "output_file", - "type": "fasta" - } - ], - "position": { - "left": 442, - "top": 200 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "extractgenomicsegment", - "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"window\": \"\\\"200000\\\"\", \"input_geneticmap\": \"null\", \"offset\": \"\\\"100000\\\"\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"input_markers\": \"null\"}", - "tool_version": "0.01", - "type": "tool", - "user_outputs": [] - }, - "7": { - "annotation": "", - "id": 7, - "input_connections": { - "input_fasta": { - "id": 4, - "output_name": "output" - } - }, - "inputs": [], - "name": "fastaGroomerForMakeBlastdb", - "outputs": [ - { - "name": "output_fasta", - "type": "fasta" - } - ], - "position": { - "left": 385, - "top": 811 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "fastaGroomerForMakeBlastdb", - "tool_state": "{\"input_fasta\": \"null\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": 0}", - "tool_version": "0.01", - "type": "tool", - "user_outputs": [] - }, - "8": { - "annotation": "", - "id": 8, - "input_connections": { - "input_fasta": { - "id": 5, - "output_name": "output" - } - }, - "inputs": [], - "name": "fastaGroomerForMakeBlastdb", - "outputs": [ - { - "name": "output_fasta", - "type": "fasta" - } - ], - "position": { - "left": 489, - "top": 955 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "fastaGroomerForMakeBlastdb", - "tool_state": "{\"input_fasta\": \"null\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": 0}", - "tool_version": "0.01", - "type": "tool", - "user_outputs": [] - }, - "9": { - "annotation": "", - "id": 9, - "input_connections": { - "input_genexml": { - "id": 1, - "output_name": "output" - }, - "input_segment": { - "id": 6, - "output_name": "output_file" - } - }, - "inputs": [], - "name": "extractgenesfromsegment", - "outputs": [ - { - "name": "output_gene_nuc", - "type": "fasta" - }, - { - "name": "output_gene_prot", - "type": "fasta" - }, - { - "name": "output_gene_function", - "type": "txt" - } - ], - "position": { - "left": 782, - "top": 207 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "extractgenesfromsegment", - "tool_state": "{\"__page__\": 0, \"input_genexml\": \"null\", \"input_segment\": \"null\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}", - "tool_version": "0.01", - "type": "tool", - "user_outputs": [] - }, - "10": { - "annotation": "", - "id": 10, - "input_connections": { - "input_assembly": { - "id": 0, - "output_name": "output" - }, - "input_segment": { - "id": 6, - "output_name": "output_file" - } - }, - "inputs": [], - "name": "extractgenomicsequencefromsegment", - "outputs": [ - { - "name": "output_file", - "type": "fasta" - } - ], - "position": { - "left": 640, - "top": 497 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "extractgenomicsequencefromsegment", - "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_segment\": \"null\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"input_assembly\": \"null\"}", - "tool_version": "0.01", - "type": "tool", - "user_outputs": [] - }, - "11": { - "annotation": "", - "id": 11, - "input_connections": { - "input_file": { - "id": 7, - "output_name": "output_fasta" - } - }, - "inputs": [], - "name": "NCBI BLAST+ makeblastdb", - "outputs": [ - { - "name": "outfile", - "type": "data" - } - ], - "position": { - "left": 844, - "top": 840 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", - "tool_state": "{\"__page__\": 0, \"mask_data_file\": \"null\", \"input_file\": \"null\", \"dbtype\": \"\\\"nucl\\\"\", \"__rerun_remap_job_id__\": null, \"hash_index\": \"\\\"True\\\"\", \"tax\": \"{\\\"taxselect\\\": \\\"\\\", \\\"__current_case__\\\": 0}\", \"title\": \"\\\"\\\"\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"parse_seqids\": \"\\\"False\\\"\"}", - "tool_version": "0.1.00", - "type": "tool", - "user_outputs": [] - }, - "12": { - "annotation": "", - "id": 12, - "input_connections": { - "input_file": { - "id": 8, - "output_name": "output_fasta" - } - }, - "inputs": [], - "name": "NCBI BLAST+ makeblastdb", - "outputs": [ - { - "name": "outfile", - "type": "data" - } - ], - "position": { - "left": 692, - "top": 673 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", - "tool_state": "{\"__page__\": 0, \"mask_data_file\": \"null\", \"input_file\": \"null\", \"dbtype\": \"\\\"prot\\\"\", \"__rerun_remap_job_id__\": null, \"hash_index\": \"\\\"True\\\"\", \"tax\": \"{\\\"taxselect\\\": \\\"\\\", \\\"__current_case__\\\": 0}\", \"title\": \"\\\"\\\"\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"parse_seqids\": \"\\\"False\\\"\"}", - "tool_version": "0.1.00", - "type": "tool", - "user_outputs": [] - }, - "13": { - "annotation": "", - "id": 13, - "input_connections": { - "db_opts|histdb": { - "id": 11, - "output_name": "outfile" - }, - "query": { - "id": 9, - "output_name": "output_gene_nuc" - } - }, - "inputs": [], - "name": "NCBI BLAST+ blastn", - "outputs": [ - { - "name": "output1", - "type": "tabular" - } - ], - "position": { - "left": 1100, - "top": 334 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", - "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"adv_opts_selector\\\": \\\"basic\\\", \\\"__current_case__\\\": 0}\", \"__rerun_remap_job_id__\": null, \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"query\": \"null\", \"blast_type\": \"\\\"blastn\\\"\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"sseqid\\\", \\\"pident\\\", \\\"length\\\", \\\"mismatch\\\", \\\"gapopen\\\", \\\"qstart\\\", \\\"qend\\\", \\\"sstart\\\", \\\"send\\\", \\\"evalue\\\", \\\"bitscore\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"nident\\\", \\\"positive\\\", \\\"qlen\\\", \\\"slen\\\", \\\"salltitles\\\"]}\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", - "tool_version": "0.1.00", - "type": "tool", - "user_outputs": [] - }, - "14": { - "annotation": "", - "id": 14, - "input_connections": { - "db_opts|histdb": { - "id": 11, - "output_name": "outfile" - }, - "query": { - "id": 9, - "output_name": "output_gene_nuc" - } - }, - "inputs": [], - "name": "NCBI BLAST+ tblastx", - "outputs": [ - { - "name": "output1", - "type": "tabular" - } - ], - "position": { - "left": 1107, - "top": 685.5 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_tblastx_wrapper/0.1.00", - "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"adv_opts_selector\\\": \\\"basic\\\", \\\"__current_case__\\\": 0}\", \"__rerun_remap_job_id__\": null, \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"query_gencode\": \"\\\"1\\\"\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"sseqid\\\", \\\"pident\\\", \\\"length\\\", \\\"mismatch\\\", \\\"gapopen\\\", \\\"qstart\\\", \\\"qend\\\", \\\"sstart\\\", \\\"send\\\", \\\"evalue\\\", \\\"bitscore\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"nident\\\", \\\"positive\\\", \\\"qlen\\\", \\\"slen\\\", \\\"salltitles\\\"]}\", \"query\": \"null\"}", - "tool_version": "0.1.00", - "type": "tool", - "user_outputs": [] - }, - "15": { - "annotation": "", - "id": 15, - "input_connections": { - "db_opts|histdb": { - "id": 12, - "output_name": "outfile" - }, - "query": { - "id": 10, - "output_name": "output_file" - } - }, - "inputs": [], - "name": "NCBI BLAST+ blastx", - "outputs": [ - { - "name": "output1", - "type": "tabular" - } - ], - "position": { - "left": 1104, - "top": 451 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastx_wrapper/0.1.00", - "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"adv_opts_selector\\\": \\\"basic\\\", \\\"__current_case__\\\": 0}\", \"__rerun_remap_job_id__\": null, \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"query_gencode\": \"\\\"1\\\"\", \"query\": \"null\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"sseqid\\\", \\\"pident\\\", \\\"length\\\", \\\"mismatch\\\", \\\"gapopen\\\", \\\"qstart\\\", \\\"qend\\\", \\\"sstart\\\", \\\"send\\\", \\\"evalue\\\", \\\"bitscore\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"nident\\\", \\\"positive\\\", \\\"qlen\\\", \\\"slen\\\", \\\"salltitles\\\"]}\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", - "tool_version": "0.1.00", - "type": "tool", - "user_outputs": [] - }, - "16": { - "annotation": "", - "id": 16, - "input_connections": { - "db_opts|histdb": { - "id": 12, - "output_name": "outfile" - }, - "query": { - "id": 9, - "output_name": "output_gene_prot" - } - }, - "inputs": [], - "name": "NCBI BLAST+ blastp", - "outputs": [ - { - "name": "output1", - "type": "tabular" - } - ], - "position": { - "left": 1106, - "top": 568.5 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastp_wrapper/0.1.00", - "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"adv_opts_selector\\\": \\\"basic\\\", \\\"__current_case__\\\": 0}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"blastp\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"sseqid\\\", \\\"pident\\\", \\\"length\\\", \\\"mismatch\\\", \\\"gapopen\\\", \\\"qstart\\\", \\\"qend\\\", \\\"sstart\\\", \\\"send\\\", \\\"evalue\\\", \\\"bitscore\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"nident\\\", \\\"positive\\\", \\\"qlen\\\", \\\"slen\\\", \\\"salltitles\\\"]}\", \"query\": \"null\"}", - "tool_version": "0.1.00", - "type": "tool", - "user_outputs": [] - }, - "17": { - "annotation": "", - "id": 17, - "input_connections": { - "input_blast": { - "id": 13, - "output_name": "output1" - } - }, - "inputs": [], - "name": "parseblasttab", - "outputs": [ - { - "name": "output_merge", - "type": "txt" - }, - { - "name": "output_best", - "type": "txt" - } - ], - "position": { - "left": 1365, - "top": 333.5 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "parseblasttab", - "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_blast\": \"null\"}", - "tool_version": "0.01", - "type": "tool", - "user_outputs": [] - }, - "18": { - "annotation": "", - "id": 18, - "input_connections": { - "input_blast": { - "id": 14, - "output_name": "output1" - } - }, - "inputs": [], - "name": "parseblasttab", - "outputs": [ - { - "name": "output_merge", - "type": "txt" - }, - { - "name": "output_best", - "type": "txt" - } - ], - "position": { - "left": 1368, - "top": 776.5 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "parseblasttab", - "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_blast\": \"null\"}", - "tool_version": "0.01", - "type": "tool", - "user_outputs": [] - }, - "19": { - "annotation": "", - "id": 19, - "input_connections": { - "input_blast": { - "id": 15, - "output_name": "output1" - } - }, - "inputs": [], - "name": "parseblasttab", - "outputs": [ - { - "name": "output_merge", - "type": "txt" - }, - { - "name": "output_best", - "type": "txt" - } - ], - "position": { - "left": 1366, - "top": 477.5 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "parseblasttab", - "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_blast\": \"null\"}", - "tool_version": "0.01", - "type": "tool", - "user_outputs": [] - }, - "20": { - "annotation": "", - "id": 20, - "input_connections": { - "input_blast": { - "id": 16, - "output_name": "output1" - } - }, - "inputs": [], - "name": "parseblasttab", - "outputs": [ - { - "name": "output_merge", - "type": "txt" - }, - { - "name": "output_best", - "type": "txt" - } - ], - "position": { - "left": 1372, - "top": 623.5 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "parseblasttab", - "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_blast\": \"null\"}", - "tool_version": "0.01", - "type": "tool", - "user_outputs": [] - }, - "21": { - "annotation": "", - "id": 21, - "input_connections": { - "input_blastn": { - "id": 17, - "output_name": "output_best" - }, - "input_blastp": { - "id": 20, - "output_name": "output_best" - }, - "input_blastx": { - "id": 19, - "output_name": "output_best" - }, - "input_tblastx": { - "id": 18, - "output_name": "output_best" - } - }, - "inputs": [], - "name": "mergeAllBestBlast", - "outputs": [ - { - "name": "output_results", - "type": "txt" - } - ], - "position": { - "left": 1739.5, - "top": 384.5 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "mergeAllBestBlast", - "tool_state": "{\"__page__\": 0, \"input_tblastx\": \"null\", \"input_blastp\": \"null\", \"input_blastx\": \"null\", \"__rerun_remap_job_id__\": null, \"input_blastn\": \"null\"}", - "tool_version": "0.01", - "type": "tool", - "user_outputs": [] - } - } -} \ No newline at end of file diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys_-_blastn.ga --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/Galaxy-Workflow-GenePhys_-_blastn.ga Fri Oct 24 05:54:20 2014 -0400 @@ -0,0 +1,204 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "GenePhys - blastn", + "steps": { + "0": { + "annotation": "", + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "NUCLEIC GENE SEQUENCE" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 294, + "top": 225 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"NUCLEIC GENE SEQUENCE\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "1": { + "annotation": "", + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "NUCLEIC DB FILE" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 294, + "top": 382 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"NUCLEIC DB FILE\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "2": { + "annotation": "", + "id": 2, + "input_connections": { + "input_fasta": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "name": "fastaGroomerForMakeBlastdb", + "outputs": [ + { + "name": "output_fasta", + "type": "fasta" + } + ], + "position": { + "left": 550, + "top": 351 + }, + "post_job_actions": { + "HideDatasetActionoutput_fasta": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_fasta" + } + }, + "tool_errors": null, + "tool_id": "fastaGroomerForMakeBlastdb", + "tool_state": "{\"input_fasta\": \"null\", \"__rerun_remap_job_id__\": null, \"__page__\": 0}", + "tool_version": "1.00", + "type": "tool", + "user_outputs": [] + }, + "3": { + "annotation": "", + "id": 3, + "input_connections": { + "input_file": { + "id": 2, + "output_name": "output_fasta" + } + }, + "inputs": [], + "name": "NCBI BLAST+ makeblastdb", + "outputs": [ + { + "name": "outfile", + "type": "data" + } + ], + "position": { + "left": 925.5, + "top": 332 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", + "tool_state": "{\"__page__\": 0, \"mask_data_file\": \"null\", \"input_file\": \"null\", \"dbtype\": \"\\\"nucl\\\"\", \"__rerun_remap_job_id__\": null, \"hash_index\": \"\\\"True\\\"\", \"tax\": \"{\\\"taxselect\\\": \\\"\\\", \\\"__current_case__\\\": 0}\", \"title\": \"\\\"\\\"\", \"parse_seqids\": \"\\\"False\\\"\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "4": { + "annotation": "", + "id": 4, + "input_connections": { + "db_opts|histdb": { + "id": 3, + "output_name": "outfile" + }, + "query": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastn", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 1295, + "top": 212 + }, + "post_job_actions": { + "HideDatasetActionoutput1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output1" + } + }, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"identity_cutoff\\\": \\\"0.0\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"ungapped\\\": \\\"False\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"10\\\"}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"megablast\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"sseqid\\\", \\\"qstart\\\", \\\"qend\\\", \\\"sstart\\\", \\\"send\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"positive\\\", \\\"qlen\\\", \\\"slen\\\", \\\"salltitles\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "5": { + "annotation": "", + "id": 5, + "input_connections": { + "input_blast": { + "id": 4, + "output_name": "output1" + } + }, + "inputs": [], + "name": "MergeBlastResults", + "outputs": [ + { + "name": "output_merge", + "type": "txt" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1604, + "top": 216 + }, + "post_job_actions": { + "RenameDatasetActionoutput_merge": { + "action_arguments": { + "newname": "BLASTN" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_merge" + } + }, + "tool_errors": null, + "tool_id": "MergeBlastResults", + "tool_state": "{\"__page__\": 0, \"max_overlap_fraction\": \"\\\"0.5\\\"\", \"__rerun_remap_job_id__\": null, \"header\": \"\\\"BLASTN\\\"\", \"input_blast\": \"null\", \"max_overlap_length_ignored\": \"\\\"3\\\"\"}", + "tool_version": "1.03", + "type": "tool", + "user_outputs": [] + } + } +} \ No newline at end of file diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys_-_blastp.ga --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/Galaxy-Workflow-GenePhys_-_blastp.ga Fri Oct 24 05:54:20 2014 -0400 @@ -0,0 +1,204 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "GenePhys - blastp", + "steps": { + "0": { + "annotation": "", + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "PROTEIC GENE SEQUENCE" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 294, + "top": 254 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"PROTEIC GENE SEQUENCE\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "1": { + "annotation": "", + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "PROTEIN DB FILE" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 294, + "top": 411 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"PROTEIN DB FILE\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "2": { + "annotation": "", + "id": 2, + "input_connections": { + "input_fasta": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "name": "fastaGroomerForMakeBlastdb", + "outputs": [ + { + "name": "output_fasta", + "type": "fasta" + } + ], + "position": { + "left": 550, + "top": 380 + }, + "post_job_actions": { + "HideDatasetActionoutput_fasta": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_fasta" + } + }, + "tool_errors": null, + "tool_id": "fastaGroomerForMakeBlastdb", + "tool_state": "{\"input_fasta\": \"null\", \"__rerun_remap_job_id__\": null, \"__page__\": 0}", + "tool_version": "1.00", + "type": "tool", + "user_outputs": [] + }, + "3": { + "annotation": "", + "id": 3, + "input_connections": { + "input_file": { + "id": 2, + "output_name": "output_fasta" + } + }, + "inputs": [], + "name": "NCBI BLAST+ makeblastdb", + "outputs": [ + { + "name": "outfile", + "type": "data" + } + ], + "position": { + "left": 923.5, + "top": 362 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", + "tool_state": "{\"__page__\": 0, \"mask_data_file\": \"null\", \"input_file\": \"null\", \"dbtype\": \"\\\"prot\\\"\", \"__rerun_remap_job_id__\": null, \"hash_index\": \"\\\"True\\\"\", \"tax\": \"{\\\"taxselect\\\": \\\"\\\", \\\"__current_case__\\\": 0}\", \"title\": \"\\\"\\\"\", \"parse_seqids\": \"\\\"False\\\"\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "4": { + "annotation": "", + "id": 4, + "input_connections": { + "db_opts|histdb": { + "id": 3, + "output_name": "outfile" + }, + "query": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastp", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 1222, + "top": 214 + }, + "post_job_actions": { + "HideDatasetActionoutput1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output1" + } + }, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastp_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"matrix\\\": \\\"BLOSUM62\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"max_hits\\\": \\\"10\\\"}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"blastp\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"sseqid\\\", \\\"qstart\\\", \\\"qend\\\", \\\"sstart\\\", \\\"send\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"positive\\\", \\\"qlen\\\", \\\"slen\\\", \\\"salltitles\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "5": { + "annotation": "", + "id": 5, + "input_connections": { + "input_blast": { + "id": 4, + "output_name": "output1" + } + }, + "inputs": [], + "name": "MergeBlastResults", + "outputs": [ + { + "name": "output_merge", + "type": "txt" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1544, + "top": 215 + }, + "post_job_actions": { + "RenameDatasetActionoutput_merge": { + "action_arguments": { + "newname": "BLASTP" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_merge" + } + }, + "tool_errors": null, + "tool_id": "MergeBlastResults", + "tool_state": "{\"__page__\": 0, \"max_overlap_fraction\": \"\\\"0.5\\\"\", \"__rerun_remap_job_id__\": null, \"header\": \"\\\"BLASTP\\\"\", \"input_blast\": \"null\", \"max_overlap_length_ignored\": \"\\\"3\\\"\"}", + "tool_version": "1.03", + "type": "tool", + "user_outputs": [] + } + } +} \ No newline at end of file diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys_-_blastx.ga --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/Galaxy-Workflow-GenePhys_-_blastx.ga Fri Oct 24 05:54:20 2014 -0400 @@ -0,0 +1,204 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "GenePhys - blastx", + "steps": { + "0": { + "annotation": "", + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "NUCLEIC GENE SEQUENCE" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 294, + "top": 313 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"NUCLEIC GENE SEQUENCE\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "1": { + "annotation": "", + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "PROTEIN DB FILE" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 298, + "top": 476 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"PROTEIN DB FILE\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "2": { + "annotation": "", + "id": 2, + "input_connections": { + "input_fasta": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "name": "fastaGroomerForMakeBlastdb", + "outputs": [ + { + "name": "output_fasta", + "type": "fasta" + } + ], + "position": { + "left": 550, + "top": 439 + }, + "post_job_actions": { + "HideDatasetActionoutput_fasta": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_fasta" + } + }, + "tool_errors": null, + "tool_id": "fastaGroomerForMakeBlastdb", + "tool_state": "{\"input_fasta\": \"null\", \"__rerun_remap_job_id__\": null, \"__page__\": 0}", + "tool_version": "1.00", + "type": "tool", + "user_outputs": [] + }, + "3": { + "annotation": "", + "id": 3, + "input_connections": { + "input_file": { + "id": 2, + "output_name": "output_fasta" + } + }, + "inputs": [], + "name": "NCBI BLAST+ makeblastdb", + "outputs": [ + { + "name": "outfile", + "type": "data" + } + ], + "position": { + "left": 918.5, + "top": 414 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", + "tool_state": "{\"__page__\": 0, \"mask_data_file\": \"null\", \"input_file\": \"null\", \"dbtype\": \"\\\"prot\\\"\", \"__rerun_remap_job_id__\": null, \"hash_index\": \"\\\"True\\\"\", \"tax\": \"{\\\"taxselect\\\": \\\"\\\", \\\"__current_case__\\\": 0}\", \"title\": \"\\\"\\\"\", \"parse_seqids\": \"\\\"False\\\"\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "4": { + "annotation": "", + "id": 4, + "input_connections": { + "db_opts|histdb": { + "id": 3, + "output_name": "outfile" + }, + "query": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastx", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 1276.5, + "top": 291 + }, + "post_job_actions": { + "HideDatasetActionoutput1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output1" + } + }, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastx_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"matrix\\\": \\\"BLOSUM62\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"ungapped\\\": \\\"False\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"10\\\"}\", \"__rerun_remap_job_id__\": null, \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"query_gencode\": \"\\\"1\\\"\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"sseqid\\\", \\\"qstart\\\", \\\"qend\\\", \\\"sstart\\\", \\\"send\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"positive\\\", \\\"qlen\\\", \\\"slen\\\", \\\"salltitles\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "5": { + "annotation": "", + "id": 5, + "input_connections": { + "input_blast": { + "id": 4, + "output_name": "output1" + } + }, + "inputs": [], + "name": "MergeBlastResults", + "outputs": [ + { + "name": "output_merge", + "type": "txt" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1599, + "top": 291 + }, + "post_job_actions": { + "RenameDatasetActionoutput_merge": { + "action_arguments": { + "newname": "BLASTX" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_merge" + } + }, + "tool_errors": null, + "tool_id": "MergeBlastResults", + "tool_state": "{\"__page__\": 0, \"max_overlap_fraction\": \"\\\"0.5\\\"\", \"__rerun_remap_job_id__\": null, \"header\": \"\\\"BLASTX\\\"\", \"input_blast\": \"null\", \"max_overlap_length_ignored\": \"\\\"3\\\"\"}", + "tool_version": "1.03", + "type": "tool", + "user_outputs": [] + } + } +} \ No newline at end of file diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys_-_tblastx.ga --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/Galaxy-Workflow-GenePhys_-_tblastx.ga Fri Oct 24 05:54:20 2014 -0400 @@ -0,0 +1,204 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "GenePhys - tblastx", + "steps": { + "0": { + "annotation": "", + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "NUCLEIC GENE SEQUENCE" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 294, + "top": 222 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"NUCLEIC GENE SEQUENCE\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "1": { + "annotation": "", + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "NUCLEIC DB FILE" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 294, + "top": 379 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"NUCLEIC DB FILE\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "2": { + "annotation": "", + "id": 2, + "input_connections": { + "input_fasta": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "name": "fastaGroomerForMakeBlastdb", + "outputs": [ + { + "name": "output_fasta", + "type": "fasta" + } + ], + "position": { + "left": 557, + "top": 353 + }, + "post_job_actions": { + "HideDatasetActionoutput_fasta": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_fasta" + } + }, + "tool_errors": null, + "tool_id": "fastaGroomerForMakeBlastdb", + "tool_state": "{\"input_fasta\": \"null\", \"__rerun_remap_job_id__\": null, \"__page__\": 0}", + "tool_version": "1.00", + "type": "tool", + "user_outputs": [] + }, + "3": { + "annotation": "", + "id": 3, + "input_connections": { + "input_file": { + "id": 2, + "output_name": "output_fasta" + } + }, + "inputs": [], + "name": "NCBI BLAST+ makeblastdb", + "outputs": [ + { + "name": "outfile", + "type": "data" + } + ], + "position": { + "left": 925.5, + "top": 329 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", + "tool_state": "{\"__page__\": 0, \"mask_data_file\": \"null\", \"input_file\": \"null\", \"dbtype\": \"\\\"nucl\\\"\", \"__rerun_remap_job_id__\": null, \"hash_index\": \"\\\"True\\\"\", \"tax\": \"{\\\"taxselect\\\": \\\"\\\", \\\"__current_case__\\\": 0}\", \"title\": \"\\\"\\\"\", \"parse_seqids\": \"\\\"False\\\"\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "4": { + "annotation": "", + "id": 4, + "input_connections": { + "db_opts|histdb": { + "id": 3, + "output_name": "outfile" + }, + "query": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "NCBI BLAST+ tblastx", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 1239, + "top": 189 + }, + "post_job_actions": { + "HideDatasetActionoutput1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output1" + } + }, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_tblastx_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"matrix\\\": \\\"BLOSUM62\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"db_gencode\\\": \\\"1\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"10\\\"}\", \"__rerun_remap_job_id__\": null, \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"query_gencode\": \"\\\"1\\\"\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"sseqid\\\", \\\"qstart\\\", \\\"qend\\\", \\\"sstart\\\", \\\"send\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"positive\\\", \\\"qlen\\\", \\\"slen\\\", \\\"salltitles\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "5": { + "annotation": "", + "id": 5, + "input_connections": { + "input_blast": { + "id": 4, + "output_name": "output1" + } + }, + "inputs": [], + "name": "MergeBlastResults", + "outputs": [ + { + "name": "output_merge", + "type": "txt" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1529, + "top": 192 + }, + "post_job_actions": { + "RenameDatasetActionoutput_merge": { + "action_arguments": { + "newname": "TBLASTX" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_merge" + } + }, + "tool_errors": null, + "tool_id": "MergeBlastResults", + "tool_state": "{\"__page__\": 0, \"max_overlap_fraction\": \"\\\"0.5\\\"\", \"__rerun_remap_job_id__\": null, \"header\": \"\\\"TBLASTX\\\"\", \"input_blast\": \"null\", \"max_overlap_length_ignored\": \"\\\"3\\\"\"}", + "tool_version": "1.03", + "type": "tool", + "user_outputs": [] + } + } +} \ No newline at end of file diff -r c52e74b98773 -r 8dfa09868059 genephys/GenePhys.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/GenePhys.pl Fri Oct 24 05:54:20 2014 -0400 @@ -0,0 +1,335 @@ +#!/usr/bin/perl +#V1.1.0 integrated gene extraction +#V1.0.2 integrated segment fasta extraction +#V1.0.1 added log and option +#V1.0.0 +use strict; +use warnings; +use Getopt::Long; + +my $input_blast_files; +my $input_genes_position_file; +my $input_assembly_file; +my $input_markers_position_file; +my $input_markers_file; +my $log_file; +my $output_fasta_file; +my $output_segment_file; +my $output_genes_list_file; +my $EXTRACT_SEQ = "NO"; +my $WINDOW = 200000; +my $OFFSET = 100000; +my $MAX_BLAST_LINES = 1; + +GetOptions ( +"input_assembly_file=s" => \$input_assembly_file, +"input_markers_position_file=s" => \$input_markers_position_file, +"input_markers_file=s" => \$input_markers_file, +"log_file=s" => \$log_file, +"output_fasta_file=s" => \$output_fasta_file, +"output_segment_file=s" => \$output_segment_file, +"extractseq=s" => \$EXTRACT_SEQ, +"window=i" => \$WINDOW, +"offset=i" =>\$OFFSET, +"input_blast_files=s" => \$input_blast_files, +"input_genes_position_file=s"=> \$input_genes_position_file, +"output_genes_list_file=s"=>\$output_genes_list_file, +"max_blast_lines=i" => \$MAX_BLAST_LINES +) or die("Error in command line arguments\n"); + +open(LF, ">$log_file") or die("Can't open $log_file\n"); +#print LF $EXTRACT_SEQ."\n"; + +my $current_annotation=""; +my @list_marquer; +my %chr; +my %position; + +open(MP, $input_markers_position_file) or die("Can't open $input_markers_position_file\n"); + +my $compt=0; +while (my $line=){ + $compt++; + my @cols = split(/\t/,$line); + if ($#cols != 3){ + print STDERR "Error in marker position file format\n$compt : $line\n"; + exit(0); + } + my %current; + # Number#Map#Name#Chr#Position#GeneAT#FunctionAT + my $Name = $cols[0]; + my $Locus = $cols[1]; + my $Chr = $cols[2]; + my $Position = $cols[3]; + + + $chr{$Name} = $Chr; + $position{$Name} = $Position; + + ### Modification 0.9.9 + if ($Locus ne $Name){ + $chr{$Locus} = $Chr; + $position{$Locus} = $Position; + } + ### + +} +close (MP); + +open(MA, $input_markers_file) or die("Can't open $input_markers_file\n"); +while (my $line=){ + my @cols = split (/\s+/,$line); + for (my $i=0;$i<=$#cols;$i++){ + my $current = $cols[$i]; + chomp($current); + if ($current !~ /^\s+$/){ + push(@list_marquer,$current); + } + } +} +close (MA); + +my %coord_by_chr; +for (my $i=0;$i<=$#list_marquer;$i++){ + my $current_name = $list_marquer[$i]; + my $current_chr = $chr{$current_name}; + my $current_position = $position{$current_name}; + + if ($current_position =~ /^\d+$/){ + my @tbl_coord_for_current_chr; + if ($coord_by_chr{$current_chr}){ + @tbl_coord_for_current_chr = @{$coord_by_chr{$current_chr}}; + } + push(@tbl_coord_for_current_chr,$current_position); + $coord_by_chr{$current_chr}=\@tbl_coord_for_current_chr; + } + elsif (($current_position =~/\s*-\s*/)||($current_position =~/none/i)){ + + } + else { + chomp($current_position); + print STDERR "Error Parsing $current_name\tposition not recognized : $current_position \n"; + print $list_marquer[$i],"\n"; + } +} + +open(OS, ">$output_segment_file") or die ("Can't open $output_segment_file\n"); + +my @segment_chr; +my @segment_start; +my @segment_end; + +foreach my $key (sort keys %coord_by_chr){ + my @tbl_coord = @{$coord_by_chr{$key}}; + @tbl_coord = sort { $a <=> $b } @tbl_coord; + my $current_start; + my $current_stop; + my $current_start_with_offset; + my $current_stop_with_offset; + + for (my $i=0;$i<=$#tbl_coord;$i++){ + if (!$current_start){$current_start=$tbl_coord[$i];$current_stop=$tbl_coord[$i]} + + # print "$i : $current_start / $current_stop\n"; + if ($tbl_coord[$i]>$current_stop+$WINDOW){ + #OFFSET + if ($current_start>$OFFSET){$current_start_with_offset=$current_start-$OFFSET;}else{$current_start_with_offset=1;} + $current_stop_with_offset = $current_stop + $OFFSET; + ####### + print OS $key,":",$current_start_with_offset,"..",$current_stop_with_offset,"\n"; + push(@segment_chr,$key); + push(@segment_start,$current_start_with_offset); + push(@segment_end,$current_stop_with_offset); + + $current_start = $tbl_coord[$i]; + $current_stop = $tbl_coord[$i]; + + if ($i==$#tbl_coord){ + #OFFSET + if ($current_start>$OFFSET){$current_start_with_offset=$current_start-$OFFSET;}else{$current_start_with_offset=1;} + $current_stop_with_offset = $current_stop + $OFFSET; + ####### + print OS $key,":",$current_start_with_offset,"..",$current_stop_with_offset,"\n"; + push(@segment_chr,$key); + push(@segment_start,$current_start_with_offset); + push(@segment_end,$current_stop_with_offset); + } + } + else { + $current_stop=$tbl_coord[$i]; + if ($i==$#tbl_coord){ + #OFFSET + if ($current_start>$OFFSET){$current_start_with_offset=$current_start-$OFFSET;}else{$current_start_with_offset=1;} + $current_stop_with_offset = $current_stop + $OFFSET; + ####### + print OS $key,":",$current_start_with_offset,"..",$current_stop_with_offset,"\n"; + push(@segment_chr,$key); + push(@segment_start,$current_start_with_offset); + push(@segment_end,$current_stop_with_offset); + } + } + } +} +close(OS); + +### Sequence extraction +if ($EXTRACT_SEQ eq "YES"){ + + my %genome; + my $current_header; + my $current_seq=""; + open(AF, $input_assembly_file) or die ("Can't open $input_assembly_file\n"); + + while (my $ligne = ){ + if ($ligne =~ /^\>(.*?)\s*$/){ + if ($current_header){ + $genome{$current_header} = $current_seq; + } + $current_header=$1; + $current_seq = ""; + } + else { + if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){ + $current_seq .= $1; + } + else { + print STDERR "Erreur Parsing n°1\n$ligne\n"; + } + } + } + + #TRAITEMENT DU DERNIER + if ($current_header){ + $genome{$current_header} = $current_seq; + undef($current_seq); + } + close (AF); + + open(OF, ">$output_fasta_file") or die ("Can't open $output_fasta_file\n"); + for (my $i=0;$i<=$#segment_chr;$i++){ + my $compt=0; + my $current_seq=""; + print OF ">",$segment_chr[$i],":",$segment_start[$i],"..",$segment_end[$i]."\n"; + ### Modification 0.9.9 + if ($segment_end[$i]>length($genome{$segment_chr[$i]})){ + $segment_end[$i] = length($genome{$segment_chr[$i]}); + } + ### + + my @SEQ = split(//,$genome{$segment_chr[$i]}); + for (my $coord = $segment_start[$i]-1; $coord<=$segment_end[$i]-1;$coord++){ + $compt++; + if ($compt > 60 ){ + $current_seq .= "\n"; + $compt=1; + } + $current_seq .= $SEQ[$coord]; + + } + print OF "$current_seq\n"; + } + close (OF); +} + +### GENE and BLAST Extraction +my @blast_by_base; +my @header; + + +my @blastfiles = split(/\,/,$input_blast_files); +for (my $i=0;$i<=$#blastfiles;$i++){ + my $current_blast_file = $blastfiles[$i]; + my $current_blast_header = "DEFAULT"; + my %current_blast; + open (B,"$current_blast_file") or die ("Can't open $current_blast_file\n"); + while (my $line =){ + if ($line =~ /^\#\#(.*?)$/){ + $current_blast_header = $1; + print LF $current_blast_header."\n"; + } + elsif ($line =~ /^\#/){ + # blast file column legend + } + else { + my @fields = split(/\s+/,$line); + my $gene_id = $fields[0]; + my @blast_for_this_gene; + if ($current_blast{$gene_id}){ + @blast_for_this_gene = @{$current_blast{$gene_id}}; + } + + if ($#blast_for_this_gene<$MAX_BLAST_LINES-1){ + push(@blast_for_this_gene,$line); + print LF $gene_id,"\n"; + } + $current_blast{$gene_id}=\@blast_for_this_gene; + } + } + close(B); + push (@blast_by_base,\%current_blast); + push (@header,$current_blast_header); +} + + +open (OGL,">$output_genes_list_file") or die ("Can't open $output_genes_list_file\n"); + +for (my $i=0;$i<=$#segment_chr;$i++){ + my $segment_chr = $segment_chr[$i]; + my $segment_start = $segment_start[$i]; + my $segment_end = $segment_end[$i]; + + print OGL "#",$segment_chr[$i],":",$segment_start[$i],"..",$segment_end[$i],"\n"; + + open(IG, $input_genes_position_file) or die("Can't open $input_genes_position_file\n"); + while (my $gene_desc=){ + my @gene_desc = split(/\s+/,$gene_desc); + if ($#gene_desc != 4){ + print STDERR "Error in gene position file format\n$gene_desc\n"; + exit(0); + } + my $gene_id = $gene_desc[0]; + my $cds_id = $gene_desc[1]; + my $gene_chr = $gene_desc[2]; + my $gene_start = $gene_desc[3]; + my $gene_end = $gene_desc[4]; + if ($segment_chr eq $gene_chr){ + if ((($gene_start>=$segment_start)&&($gene_start<=$segment_end))||(($gene_end>=$segment_start)&&($gene_end<=$segment_end))){ + print OGL $gene_id," / ",$cds_id,"\n"; + + for (my $i=0;$i<=$#blast_by_base;$i++){ + #print LF $header[$i]."\n"; + my %current_blast = %{$blast_by_base[$i]}; + if ($current_blast{$cds_id}){ + my @blast_by_gene = @{$current_blast{$cds_id}}; + #print LF $#blast_by_gene."\n"; + for (my $j=0;$j<=$#blast_by_gene;$j++){ + my @fields = split(/\t/,$blast_by_gene[$j]); + print OGL $header[$i],"\t"; + print OGL $fields[1],"\t"; + print OGL $fields[3],"\t"; + print OGL $fields[4],"\t"; + print OGL $fields[5],"\t"; + print OGL $fields[10],"\t"; + print OGL $fields[6],"..",$fields[7],"(",$fields[11],")","\t"; + print OGL $fields[8],"..",$fields[9],"(",$fields[12],")","\t"; + print OGL $fields[13]; + } + print OGL "\n"; + } + else { + print OGL $header[$i],"\t","No BLAST results\n"; + print LF $gene_id," / ",$cds_id,"\n"; + } + } + } + } + + } + close(IG); +} + + +close (OGL); + +close (LF); + diff -r c52e74b98773 -r 8dfa09868059 genephys/GenePhys.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/GenePhys.xml Fri Oct 24 05:54:20 2014 -0400 @@ -0,0 +1,48 @@ + +Extract the genes underlying a genetic segment defined by genetic markers + + + #if $extractseq.do_extractseq=="YES" + GenePhys.pl -extractseq $extractseq.do_extractseq -input_assembly_file $input_assembly_file -input_markers_position_file $input_markers_position_file -input_markers_file $input_markers_file -window $window -offset $offset -output_segment_file $output_segment_file -output_fasta_file $output_fasta_file -log_file $log_file -input_blast_files $input_blast_files -max_blast_lines $max_blast_lines -input_genes_position_file $input_genes_position_file -output_genes_list_file $output_genes_list_file + #else + GenePhys.pl -input_markers_position_file $input_markers_position_file -input_markers_file $input_markers_file -window $window -offset $offset -output_segment_file $output_segment_file -output_fasta_file $output_fasta_file -log_file $log_file -input_blast_files $input_blast_files -max_blast_lines $max_blast_lines -input_genes_position_file $input_genes_position_file -output_genes_list_file $output_genes_list_file + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r c52e74b98773 -r 8dfa09868059 genephys/MergeBlastResults.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/MergeBlastResults.pl Fri Oct 24 05:54:20 2014 -0400 @@ -0,0 +1,545 @@ +#!/usr/bin/perl +#V1.0.3 header added +#V1.0.2 suppressed the final sort (very heavyload) and replaced it by another level of hash +#V1.0.1 added log, option parameters +use strict; +use warnings; +use Getopt::Long; + +my $inputblast; +my $outputjoin; +my $log_file; +my $MAX_OVERLAP_FRACTION = 0.5; +my $MAX_OVERLAP_LENGTH_IGNORED = 3; +my $VERBOSE = "OFF"; +my $ALLOWED_GAP_FRACTION_FOR_MERGING = 0.3; +my $HEADER =""; + +GetOptions ( +"input_blasttab_file=s" => \$inputblast, +"output_joinmatch_file=s" => \$outputjoin, +"log_file=s" => \$log_file, +"header=s" => \$HEADER, +"max_overlap_fraction=f" => \$MAX_OVERLAP_FRACTION, +"max_overlap_length_ignored=i" =>\$MAX_OVERLAP_LENGTH_IGNORED +) or die("Error in command line arguments\n"); + +open(IB, $inputblast) or die ("Can't open $inputblast \n"); +open (LF,">$log_file") or die("Can't open $log_file\n"); + + +my %match_by_query; + +my @query_keys; +my %querys; + + +my $stats_nb_match=0; +my $stats_included=0; +my $stats_large_overlapping=0; +my %stats_query_coverage; +$stats_query_coverage{"0-10%"}=0; +$stats_query_coverage{"10-20%"}=0; +$stats_query_coverage{"20-30%"}=0; +$stats_query_coverage{"30-40%"}=0; +$stats_query_coverage{"40-50%"}=0; +$stats_query_coverage{"50-60%"}=0; +$stats_query_coverage{"60-70%"}=0; +$stats_query_coverage{"70-80%"}=0; +$stats_query_coverage{"80-90%"}=0; +$stats_query_coverage{"90-100%"}=0; + +my $current_query=""; +while (my $ligne = ){ + my @fields = split (/\t/,$ligne); + if ($#fields != 9){ + print STDERR "Invalid blasttab format, must have 10 columns\n"; + exit(0); + } + $stats_nb_match++; + my %match; + $match{"Query"}=$fields[0]; + if (!$querys{$match{"Query"}}){ + push(@query_keys,$match{"Query"}); + $querys{$match{"Query"}} = 1; + } + $match{"Subject_id"}=$fields[1]; + $match{"Orientation"}="+"; + $match{"Query_start"}=$fields[2]; + $match{"Query_end"}=$fields[3]; + $match{"Subject_start"}=$fields[4]; + $match{"Subject_end"}=$fields[5]; + + + if ($fields[2]>$fields[3]){ + $match{"Query_start"}=$fields[3]; + $match{"Query_end"}=$fields[2]; + $match{"Orientation"}="-"; + #print "- $ligne"; + } + + if ($fields[4]>$fields[5]){ + $match{"Subject_start"}=$fields[5]; + $match{"Subject_end"}=$fields[4]; + $match{"Orientation"}="-"; + #print "- $ligne"; + } + $match{"Similarity"}=$fields[6]; + $match{"Query_length"}=$fields[7]; + $match{"Subject_length"}=$fields[8]; + chomp($fields[9]); + $match{"Subject"}=$fields[9]; + + $match{"Ligne"}=$ligne; + + my $querykey = $match{"Query"}; + my $key = $match{"Query"}."##".$match{"Subject"}."##".$match{"Orientation"}; + if ($match{"Subject_length"}==0){ + print LF "Match 0",$ligne,"\n",$match{"Subject_length"},"\n"; + } + my %match_by_query_and_subject; + my @match_table; + + if ($match_by_query{$querykey}){ + %match_by_query_and_subject = %{$match_by_query{$querykey}}; + } + if ($match_by_query_and_subject{$key}){ + @match_table=@{$match_by_query_and_subject{$key}}; + } + + push (@match_table,\%match); + $match_by_query_and_subject{$key} = \@match_table; + $match_by_query{$querykey}= \%match_by_query_and_subject; +} + +close (IB); + +#print LF "NB query : $#query_keys\n"; +#foreach my $querykey (sort @query_keys){ +# my %current_match_by_query_and_subject = %{$match_by_query{$querykey}}; +# foreach my $key (sort {$a cmp $b} keys %current_match_by_query_and_subject){ +# my @current_match_table = sort sortbyquerycoord @{$current_match_by_query_and_subject{$key}}; +# for (my $i=0;$i<=$#current_match_table;$i++){ +# my %current_match = %{$current_match_table[$i]}; +# print LF $current_match{"Ligne"}."\n"; +# } +# } +#} +#exit(0); + +open (OJ, ">$outputjoin") or die ("Can't open $outputjoin \n"); +print OJ "##",$HEADER,"\n"; +print OJ "#Query\tSubject_Id\torientation\tQuery_coverage\tSubject_coverage\tIdentity\tmin_query\tmax_query\tmin_subject\tmax_subject\tNBmatch\tq_length\tsub_length\tsubject\n"; + +foreach my $querykey (sort @query_keys){ + my %current_match_by_query_and_subject = %{$match_by_query{$querykey}}; + my @match_joined; #la table qui contient les matchs (hash) projeté de chaque subject pour une query + foreach my $key (sort {$a cmp $b} keys %current_match_by_query_and_subject){ + my @current_match_table = sort sortbyquerycoord @{$current_match_by_query_and_subject{$key}}; + my @duplicate; + my @overlap; + for (my $i=0;$i<=$#current_match_table;$i++){ + push (@duplicate,0); + } + if ($VERBOSE eq "ON"){ + print LF "\nTable Match ($#current_match_table)\t$key\n"; + for (my $i=0;$i<=$#current_match_table;$i++){ + my %match=%{$current_match_table[$i]}; + print LF $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t"; + print LF $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n"; + } + } + + #Scan d'inclusion strict + for (my $i=0;$i<=$#current_match_table;$i++){ + my %match1=%{$current_match_table[$i]}; + for (my $j=0;$j<=$#current_match_table;$j++){ + if (($j != $i)&&($duplicate[$j]==0)){ # On scan dans les deux sens, pas seulement $j = $i+1 a cause du last; + my %match2=%{$current_match_table[$j]}; + # Inclus Subject + if (($match1{"Subject_start"}>=$match2{"Subject_start"})&&($match1{"Subject_end"}<=$match2{"Subject_end"})) + { + $duplicate[$i]=1; + last; + } + # Inclus Query + elsif (($match1{"Query_start"}>=$match2{"Query_start"})&&($match1{"Query_end"}<=$match2{"Query_end"})) + { + $duplicate[$i]=2; + last; + } + + } + } + } + + my @current_match_table_filtered; + for (my $i=0;$i<=$#current_match_table;$i++){ + if ($duplicate[$i] == 0){ + push (@current_match_table_filtered,$current_match_table[$i]); + } + else{ + $stats_included++; + } + } + + if ($#current_match_table > $#current_match_table_filtered){ + @current_match_table = @current_match_table_filtered; + if ($VERBOSE eq "ON"){ + print LF "Table Match filtered 1 ($#current_match_table)\n"; + for (my $i=0;$i<=$#current_match_table;$i++){ + my %match=%{$current_match_table[$i]}; + print LF $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t"; + print LF $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n"; + } + } + } + + undef @duplicate; + for (my $i=0;$i<=$#current_match_table;$i++){ + push (@duplicate,0); + } + ######## + + ###Scan d'overlap trop important + # D'abord subject + for (my $i=0;$i<=$#current_match_table-1;$i++){ + my %match1=%{$current_match_table[$i]}; + for (my $j=$i+1;$j<=$#current_match_table;$j++){ + my %match2=%{$current_match_table[$j]}; + if (($match1{"Subject_start"}<=$match2{"Subject_start"})&&($match1{"Subject_end"}>=$match2{"Subject_start"})&&($match1{"Subject_end"}<=$match2{"Subject_end"})) + { + my $overlap_length = $match1{"Subject_end"} - $match2{"Subject_start"}+1; + my $length1 = $match1{"Subject_end"} - $match1{"Subject_start"} +1; + my $length2 = $match2{"Subject_end"} - $match2{"Subject_start"} +1; + + if (($length1 >= $length2)&&($overlap_length > $length2 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){ + $duplicate[$j]=1; + } + elsif (($length2 >= $length1)&&($overlap_length > $length1 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){ + $duplicate[$i]=1; + } + else { + } + } + elsif (($match2{"Subject_start"}<=$match1{"Subject_start"})&&($match2{"Subject_end"}>=$match1{"Subject_start"})&&($match2{"Subject_end"}<=$match1{"Subject_end"})) #Recherche d'inclusion dans les deux sens car els match sont classé par query coord, par par subject coord + { + my $overlap_length = $match2{"Subject_end"} - $match1{"Subject_start"}+1; + my $length1 = $match1{"Subject_end"} - $match1{"Subject_start"} +1; + my $length2 = $match2{"Subject_end"} - $match2{"Subject_start"} +1; + + if (($length1 >= $length2)&&($overlap_length > $length2 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){ + $duplicate[$j]=1; + } + elsif (($length2 >= $length1)&&($overlap_length > $length1 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){ + $duplicate[$i]=1; + } + else { + } + } + } + } + + undef @current_match_table_filtered; + for (my $i=0;$i<=$#current_match_table;$i++){ + if ($duplicate[$i] == 0){ + push (@current_match_table_filtered,$current_match_table[$i]); + } + else { + $stats_large_overlapping++; + } + } + + if ($#current_match_table > $#current_match_table_filtered){ + @current_match_table = @current_match_table_filtered; + if ($VERBOSE eq "ON"){ + print LF "Table Match filtered 2 ($#current_match_table)\n"; + for (my $i=0;$i<=$#current_match_table;$i++){ + my %match=%{$current_match_table[$i]}; + print LF $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t"; + print LF $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n"; + } + } + + } + + undef @duplicate; + for (my $i=0;$i<=$#current_match_table;$i++){ + push (@duplicate,0); + } + + # Ensuite Query (Subject puis Query pour evitez des deletions complementaires query update qui enleverait toutes les entrées) + + for (my $i=0;$i<=$#current_match_table-1;$i++){ + my %match1=%{$current_match_table[$i]}; + for (my $j=$i+1;$j<=$#current_match_table;$j++){ + my %match2=%{$current_match_table[$j]}; + if (($match1{"Query_start"}<=$match2{"Query_start"})&&($match1{"Query_end"}>=$match2{"Query_start"})&&($match1{"Query_end"}<=$match2{"Query_end"})) + { + my $overlap_length = $match1{"Query_end"} - $match2{"Query_start"}+1; + my $length1 = $match1{"Query_end"} - $match1{"Query_start"} +1; + my $length2 = $match2{"Query_end"} - $match2{"Query_start"} +1; + + if (($length1 >= $length2)&&($overlap_length > $length2 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){ + $duplicate[$j]=1; + } + elsif (($length2 >= $length1)&&($overlap_length > $length1 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){ + $duplicate[$i]=1; + } + else { + } + } + #un seul sans les query sont classés + } + } + + undef @current_match_table_filtered; + for (my $i=0;$i<=$#current_match_table;$i++){ + if ($duplicate[$i] == 0){ + push (@current_match_table_filtered,$current_match_table[$i]); + } + else { + $stats_large_overlapping++; + } + } + + if ($#current_match_table > $#current_match_table_filtered){ + @current_match_table = @current_match_table_filtered; + if ($VERBOSE eq "ON"){ + print LF "Table Match filtered 3 ($#current_match_table)\n"; + for (my $i=0;$i<=$#current_match_table;$i++){ + my %match=%{$current_match_table[$i]}; + print LF $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t"; + print LF $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n"; + } + } + } + + + + #Fusion des Hsp et Calcul des nouvelles metriques + + my $overlap_length = 0; + my $Subject_coverage; + my $Query_coverage; + my $Identity; + my $nb_match = 0; + + + my $nb_covered_subject=0; + my $nb_covered_query=0; + my %match=%{$current_match_table[0]}; + my $q_length = $match{"Query_length"}; + my $sub_length = $match{"Subject_length"}; + my $subject = $match{"Subject"}; + my $min_query = $match{"Query_start"}; + my $max_query = $match{"Query_end"}; + my $min_subject = $match{"Subject_start"}; + my $max_subject = $match{"Subject_end"}; + my $orientation = $match{"Orientation"}; + my $Query = $match{"Query"}; + my $Subject_Id = $match{"Subject_id"}; + my $Subject = $match{"Subject"}; + + + for (my $i=0;$i<=$#current_match_table;$i++){ + my %match1=%{$current_match_table[$i]}; + $nb_covered_subject += $match1{"Subject_end"} - $match1{"Subject_start"} +1; + $nb_covered_query += $match1{"Query_end"} - $match1{"Query_start"} +1; + + if ($match1{"Query_start"}<$min_query){ + $min_query = $match1{"Query_start"}; + } + if ($match1{"Query_end"}>$max_query){ + $max_query = $match1{"Query_end"}; + } + if ($match1{"Subject_start"}<$min_subject){ + $min_subject = $match1{"Subject_start"}; + } + if ($match1{"Subject_end"}>$max_subject){ + $max_subject = $match1{"Subject_end"}; + } + + $nb_match += $match1{"Similarity"}; + + for (my $j=$i+1;$j<=$#current_match_table;$j++){ + my $overlap_query=0; + my $overlap_subject=0; + my %match2=%{$current_match_table[$j]}; + + if (($match1{"Subject_start"}<=$match2{"Subject_start"})&&($match1{"Subject_end"}>=$match2{"Subject_start"})&&($match1{"Subject_end"}<=$match2{"Subject_end"})) + { + $overlap_subject = $match1{"Subject_end"} - $match2{"Subject_start"}+1; + } + elsif (($match2{"Subject_start"}<=$match1{"Subject_start"})&&($match2{"Subject_end"}>=$match1{"Subject_start"})&&($match2{"Subject_end"}<=$match1{"Subject_end"})) + { + $overlap_subject = $match2{"Subject_end"} - $match1{"Subject_start"}+1; + } + + if (($match1{"Query_start"}<=$match2{"Query_start"})&&($match1{"Query_end"}>=$match2{"Query_start"})&&($match1{"Query_end"}<=$match2{"Query_end"})) + { + $overlap_query = $match1{"Query_end"} - $match2{"Query_start"} +1; + } + elsif (($match2{"Query_start"}<=$match1{"Query_start"})&&($match2{"Query_end"}>=$match1{"Query_start"})&&($match2{"Query_end"}<=$match1{"Query_end"})) + { + $overlap_query = $match2{"Query_end"} - $match1{"Query_start"}+1; + } + + if ($overlap_query > $overlap_subject){ + $overlap_length += $overlap_query; + } + else { + $overlap_length += $overlap_subject; + } + + $nb_covered_subject -= $overlap_subject; + $nb_covered_query-= $overlap_query; + } + + } + ### Cas des tblastx ou le nb match est en aa, et les nb_covered en base + if ($nb_match/$nb_covered_subject<0.34){ + $nb_match = $nb_match *3; + } + #### + + $Identity = sprintf("%.2f",($nb_match-$overlap_length)*100/$nb_covered_subject); + + $Subject_coverage = sprintf("%.2f",$nb_covered_subject*100/$sub_length); + $Query_coverage = sprintf("%.2f",$nb_covered_query*100/$q_length); + + if ($Subject_coverage<0.1){$stats_query_coverage{"0-10%"}++;} + elsif($Subject_coverage<0.2){$stats_query_coverage{"10-20%"}++;} + elsif($Subject_coverage<0.3){$stats_query_coverage{"20-30%"}++;} + elsif($Subject_coverage<0.4){$stats_query_coverage{"30-40%"}++;} + elsif($Subject_coverage<0.5){$stats_query_coverage{"40-50%"}++;} + elsif($Subject_coverage<0.6){$stats_query_coverage{"50-60%"}++;} + elsif($Subject_coverage<0.7){$stats_query_coverage{"60-70%"}++;} + elsif($Subject_coverage<0.8){$stats_query_coverage{"70-80%"}++;} + elsif($Subject_coverage<0.9){$stats_query_coverage{"80-90%"}++;} + else{$stats_query_coverage{"90-100%"}++;} + + if ($VERBOSE eq "ON"){ + print LF "Final\n"; + print LF $Query,"\t",$Subject_Id,"\t",$orientation,"\t",$min_query,"\t",$max_query,"\t",$min_subject,"\t",$max_subject,"\t",$sub_length,"\t"; + print LF "NB:",$nb_match,"\t","O:",$overlap_length,"\t","CQ:",$nb_covered_query,"\t","CS:",$nb_covered_subject,"\t",$Query_coverage,"\t",$Subject_coverage,"\t",$Identity,"\n"; + + } + + if ($subject=~/^(.*?)\s*$/){ + $subject = $1; + } + my %current_match_joined; + $current_match_joined{"Query"}=$Query; + $current_match_joined{"Query_start"}=$min_query; + $current_match_joined{"Query_end"}=$max_query; + $current_match_joined{"Query_length"}=$q_length; + $current_match_joined{"QCoverage"} = $Query_coverage; + $current_match_joined{"Subject_id"}=$Subject_Id; + $current_match_joined{"Subject"}=$subject; + $current_match_joined{"Subject_start"}=$min_subject; + $current_match_joined{"Subject_end"}=$max_subject; + $current_match_joined{"Subject_length"}=$sub_length; + $current_match_joined{"SCoverage"} = $Subject_coverage; + $current_match_joined{"Similarity"}=$Identity; + my $NBmatch = $nb_match-$overlap_length; + $current_match_joined{"Nbmatch"}=$NBmatch; + $current_match_joined{"Display"}="$Query\t$Subject_Id\t$orientation\t$Query_coverage%\t$Subject_coverage%\t$Identity%\t$min_query\t$max_query\t$min_subject\t$max_subject\t$NBmatch\t$q_length\t$sub_length\t$subject"; + + push(@match_joined,\%current_match_joined); + #print OJ $match_joined{"Display"},"\n"; + } + my @match_joined_sorted = sort sortbyrelevanceandsubject @match_joined; + for (my $i=0;$i<=$#match_joined_sorted;$i++){ + my %match = %{$match_joined_sorted[$i]}; + print OJ $match{"Display"},"\n"; + } +} + + +#my %all_match_joined_best; + +#foreach my $key (sort sortkey keys %all_match_joined){ +# my %match = %{$all_match_joined{$key}}; +# print OJ $match{"Display"},"\n"; +#} + +#close (OB); +close (OJ); + + +print LF "Nb query : $#query_keys\n"; +print LF "Nb match : $stats_nb_match\n"; +print LF "Nb match filtered included / too large overlap : $stats_included / $stats_large_overlapping \n"; +print LF "Query coverage\n"; +print LF "percent:\t"; +foreach my $key (sort {$a cmp $b} keys %stats_query_coverage) { + print LF $key,"\t"; +} +print LF "\n number :\t"; +foreach my $key (sort {$a cmp $b} keys %stats_query_coverage) { + print LF $stats_query_coverage{$key},"\t"; +} +print LF "\n"; + + +close (LF); + + +# for (my $i=0;$i<=$#all_match_joined;$i++){ + # my $match_joined = %{$all_match_joined[$i]}; + # print $match_joined{"Query"},"\t",$match_joined{"Subject"},"\t",$match_joined{"Subject_id"},"\t",$match_joined{"Similarity"},"\t",$match_joined{"Query_length"},"\t",$match_joined{"Subject_length"},"\n"; +# } + + +sub mysort{ + my %matcha=%{$a}; + my %matchb=%{$b}; + + #print "TEST : ",$matcha{"Query_start"}, " / ", $matchb{"Query_start"},"\n"; + + $matcha{"Query_start"} <=> $matchb{"Query_start"} + || + $matcha{"Query_end"} <=> $matchb{"Query_end"} + +} + +sub sortbyquerycoord{ + my %matcha=%{$a}; + my %matchb=%{$b}; + + #print "TEST : ",$matcha{"Query_start"}, " / ", $matchb{"Query_start"},"\n"; + + $matcha{"Query_start"} <=> $matchb{"Query_start"} + || + $matcha{"Query_end"} <=> $matchb{"Query_end"} + +} + +sub sortbyrelevanceandsubject{ + my %matcha=%{$a}; + my %matchb=%{$b}; + + $matchb{"Nbmatch"} <=> $matcha{"Nbmatch"} + || + $matchb{"QCoverage"} <=> $matcha{"QCoverage"} + || + $matcha{"Subject"} cmp $matchb{"Subject"} +} + + +sub sortkey { + my @fieldsa = split (/\#/,$a); + my @fieldsb = split (/\#/,$b); + + #print "$a\n$b\n"; + #print $fieldsa[0]," cmp ",$fieldsb[0],"\n"; + #exit(0); + + $fieldsa[0] cmp $fieldsb[0] + || + $fieldsa[1] cmp $fieldsb[1] + || + $fieldsa[2] <=> $fieldsb[2] + +} diff -r c52e74b98773 -r 8dfa09868059 genephys/MergeBlastResults.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/MergeBlastResults.xml Fri Oct 24 05:54:20 2014 -0400 @@ -0,0 +1,21 @@ + +Parse Blast result (Tabular) to merge feature + + MergeBlastResults.pl -input_blasttab_file $input_blast -output_joinmatch_file $output_merge -log_file $log_file -max_overlap_fraction $max_overlap_fraction -max_overlap_length_ignored $max_overlap_length_ignored -header $header + + + + + + + + + + + + + + + + + diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenesfromsegment.pl --- a/genephys/extractgenesfromsegment.pl Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,223 +0,0 @@ -#!/usr/bin/perl -use strict; - -my $input_gene_file = $ARGV[0]; -my $input_segment_file = $ARGV[1]; -my $output_seq_nuc = $ARGV[2]; -my $output_seq_prot = $ARGV[3]; - -open(IG, $input_gene_file) or die("Can't open $input_gene_file\n"); -open(IS, $input_segment_file) or die("Can't open $input_segment_file\n"); -open (ON,">$output_seq_nuc") or die ("Can't open $output_seq_nuc\n"); -open (OP,">$output_seq_prot") or die ("Can't open $output_seq_prot\n"); - -my $current_annotation=""; -my @gene_annotation; -my @list_gene; -my @current_gene; -my %current_gene_annotation; - -# while ((my $line=)&&($#list_gene<5)){ -while (my $line=){ - if ($line =~/\/){ - if (@current_gene){ - my %current_gene_annotation = %{&extract_annotation(\@current_gene)}; - - - push(@list_gene,\%current_gene_annotation); - undef @current_gene; - } - push (@current_gene,$line); - - } - else { - push (@current_gene,$line); - } -} -close(IG); - -# for (my $i=0;$i<=$#list_gene;$i++){ - # my %current_gene_annotation = %{$list_gene[$i]}; - # foreach my $key (keys %current_gene_annotation){ - # print "TEST ",$key,"\t",$current_gene_annotation{$key},"\n"; - # } -# } - -# my @segment_chr; -# my @segment_start; -# my @segment_stop; - -while (my $line=){ - print "\n$line"; - if ($line =~/(.*?)\:(\d+)\.\.(\d+)/){ - my $chr = $1; - my $start = $2; - my $stop = $3; - - my @list_gene_selected = @{&extract_gene_from_position($chr,$start,$stop,\@list_gene)}; - - if ($#list_gene_selected>=0){ - for (my $i=0;$i<=$#list_gene_selected;$i++){ - my %current = %{$list_gene_selected[$i]},"\n"; - print $current{"00 BN_Id"},"\t",$current{"01 BN_Position"},"\t",$current{"02 ATH_Function"},"\t",$current{"03 ATH_Id"},"\n"; - - my $seq = $current{"04 Sequence"}; - my $formated_seq; - my @SEQ = split(//,$seq); - my $compt_seq=0; - for (my $i=0;$i<=$#SEQ;$i++){ - if ($SEQ[$i] =~ /[ATGNCXatgcnx]/){ - if ($compt_seq == 60){ - $formated_seq .="\n"; - $compt_seq=0; - } - $formated_seq.= $SEQ[$i]; - $compt_seq ++; - } - } - print ON ">",$current{"01 BN_Position"}," (",$current{"00 BN_Id"},")","\n",$formated_seq,"\n"; - - my $prot = $current{"05 Protein"}; - my $formated_prot; - my @PROT = split(//,$prot); - my $compt_prot=0; - for (my $i=0;$i<=$#PROT;$i++){ - if ($PROT[$i] =~ /[A-Za-z\*\+]/){ - if ($compt_prot == 60){ - $formated_prot .="\n"; - $compt_prot=0; - } - $formated_prot.= $PROT[$i]; - $compt_prot ++; - } - } - print OP ">",$current{"01 BN_Position"}," (",$current{"00 BN_Id"},")","\n",$formated_prot,"\n"; - - # foreach my $key (sort keys %current){ - # print " ",$key,"\t",$current{$key},"\n"; - # } - # print "\n"; - } - } - else { - print " NO GENE FOUND\n"; - } - } - else { - print "Error Parsing n°2 : $line\n"; - } -} - -close (IS); - -close (ON); -close (OP); - - -# my @list_gene_selected = @{&extract_gene_from_position("chrA01",1437,3000,\@list_gene)}; - - -# for (my $i=0;$i<=$#list_gene_selected;$i++){ - # my %current = %{$list_gene_selected[$i]},"\n"; - # foreach my $key (keys %current){ - # print $key,"\t",$current{$key},"\n"; - # } -# } - - - - - - - -sub extract_annotation{ - my $ref = shift; - my @gene = @$ref; - my %gene_annotation; - for (my $i=0;$i<=$#gene;$i++){ - #print "TEST : $gene[$i]\n"; - if ($gene[$i]=~/\(.*?)\<\/Id\>/){ - $gene_annotation{"00 BN_Id"} = $1; - } - elsif ($gene[$i]=~/\(.*?)\<\/Position\>/){ - $gene_annotation{"01 BN_Position"} = $1; - } - elsif ($gene[$i]=~/\(.*?)\<\/ATH_Function\>/){ - $gene_annotation{"02 ATH_Function"} = $1; - } - elsif ($gene[$i]=~/\(.*?)\<\/SId\>/){ - $gene_annotation{"03 ATH_Id"} = $1; - } - elsif ($gene[$i]=~/\(.*?)\<\/CDS_Sequence\>/){ #modif 1.11 - $gene_annotation{"04 Sequence"} = $1; - } - elsif ($gene[$i]=~/\(.*?)\<\/Protein\>/){ - $gene_annotation{"05 Protein"} = $1; - # print "TEST : $1\n"; - # exit (0); - } - } - if ((!$gene_annotation{"00 BN_Id"})||(!$gene_annotation{"01 BN_Position"})||(!$gene_annotation{"04 Sequence"})||(!$gene_annotation{"05 Protein"})){ - - print "Erreur Parsing n°3\n"; - print "Id :",$gene_annotation{"00 BN_Id"},"\n"; - print "Position : ",$gene_annotation{"01 BN_Position"},"\n"; - print "ATH Function : ",$gene_annotation{"02 ATH_Function"},"\n"; - print "ATH Id : ",$gene_annotation{"03 ATH_Id"},"\n"; - print "CDS seq : ",$gene_annotation{"04 Sequence"},"\n"; - print "CDS prot : ",$gene_annotation{"05 Protein"},"\n"; - for (my $i=0;$i<=$#gene;$i++){ - print $gene[$i],"\n"; - } - - exit(0); - - } - - return \%gene_annotation; -} - - -sub extract_gene_from_position{ - my $chr = shift; - my $start = shift; - my $end = shift; - - my $ref = shift; - my @list_gene = @$ref; - my @list_gene_selected; - - for (my $i=0;$i<=$#list_gene;$i++){ - my %current_gene_annotation = %{$list_gene[$i]}; - my $current_position = $current_gene_annotation{"01 BN_Position"}; - my $current_chr; - my $current_start; - my $current_end; - - #Extraction de la position - if ($current_position =~ /^(.*?)\:(\d+)[\.]+(\d+)/){ # modif 1.11 - $current_chr = $1; - $current_start = $2; - $current_end = $3; - if ($current_start > $current_end){ - ($current_start,$current_end) = ($current_end,$current_start); - } - } - else { - print "Erreur Parsing n°1\npos : $current_position\n"; - exit(0); - } - #Test de selection - if ($chr eq $current_chr){ - if ( - ($current_end>=$start)&&($current_end<=$end) || - ($current_start>=$start)&&($current_start<=$end) - ) - { - push(@list_gene_selected,$list_gene[$i]); - } - } - - } - return \@list_gene_selected; -} \ No newline at end of file diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenesfromsegment.xml --- a/genephys/extractgenesfromsegment.xml Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ - -Extract gene sequence (nucleic, proteic) and function - - extractgenesfromsegment.pl $input_genexml $input_segment $output_gene_nuc $output_gene_prot > $output_gene_function - - - - - - - - - - - - - - - diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenomicsegment.pl --- a/genephys/extractgenomicsegment.pl Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,176 +0,0 @@ -#!/usr/bin/perl -#V1.10 -use strict; - - -my $inputfile1 = $ARGV[0]; -my $inputfile2 = $ARGV[1]; -my $WINDOW = $ARGV[2]; -my $OFFSET = $ARGV[3]; - -if (!$WINDOW){$WINDOW = 200000;} -if (!$OFFSET){$OFFSET = 100000;} - -open(IF1, $inputfile1) or die("Can't open $inputfile1\n"); -open(IF2, $inputfile2) or die("Can't open $inputfile2\n"); -my $current_annotation=""; -my @list_marquer; -my %chr; -my %position; - -# print "$inputfile2\n"; - -while (my $line=){ - my @cols = split(/\t/,$line); - my %current; - # Number#Map#Name#Chr#Position#GeneAT#FunctionAT - - my $Number = $cols[0]; - my $Map = $cols[2]; - my $Name = $cols[7]; - my $Locus = $cols[8]; - my $Chr = $cols[19]; - my $Position = $cols[20]; - $Position =~ s/\s+//g; - my $GeneAT=$cols[32]; - my $FunctionAT=$cols[37]; - $chr{$Name} = $Chr; - $position{$Name} = $Position; - - ### Modification 1.10 - if ($Locus ne $Name){ - $chr{$Locus} = $Chr; - $position{$Locus} = $Position; - } - ### - - #print "$Number#$Map#$Name#$Chr#$Position#$GeneAT#$FunctionAT\n"; -} -close (IF1); - -# my @key = keys(%chr); -# for (my $i=0;$i<=$#key;$i++){ - # print $key[$i],"\n"; -# } - -while (my $line=){ - my @cols = split (/\s+/,$line); - for (my $i=0;$i<=$#cols;$i++){ - my $current = $cols[$i]; - chomp($current); - if ($current !~ /^\s+$/){ - push(@list_marquer,$current); - } - } -} -close (IF2); - -my %coord_by_chr; -for (my $i=0;$i<=$#list_marquer;$i++){ - my $current_name = $list_marquer[$i]; - my $current_chr = $chr{$current_name}; - my $current_position = $position{$current_name}; - - if ($current_position =~ /^\d+$/){ - my @tbl_coord_for_current_chr; - if ($coord_by_chr{$current_chr}){ - @tbl_coord_for_current_chr = @{$coord_by_chr{$current_chr}}; - } - push(@tbl_coord_for_current_chr,$current_position); - $coord_by_chr{$current_chr}=\@tbl_coord_for_current_chr; - } - elsif (($current_position eq "-")||($current_position =~/none/i)){ - - } - else { - chomp($current_position); - #$current_position =~ s/\s+//g; - print STDERR "Error Parsing $current_name\tposition not recognized : $current_position \n"; - print $list_marquer[$i],"\n"; - #exit(0); - } -} - -# foreach my $key (keys %coord_by_chr){ - # my @tbl_coord = @{$coord_by_chr{$key}}; - # print "\n$key\n"; - # @tbl_coord = sort { $a <=> $b } @tbl_coord; - # for (my $i=0;$i<=$#tbl_coord;$i++){ - # print $tbl_coord[$i],"\n"; - # } -# } - -foreach my $key (sort keys %coord_by_chr){ - my @tbl_coord = @{$coord_by_chr{$key}}; - # print "TEST : $key\n"; - @tbl_coord = sort { $a <=> $b } @tbl_coord; - my $current_start; - my $current_stop; - my $current_start_offset; - my $current_stop_offset; - - - for (my $i=0;$i<=$#tbl_coord;$i++){ - if (!$current_start){$current_start=$tbl_coord[$i];$current_stop=$tbl_coord[$i]} - - # print "$i : $current_start / $current_stop\n"; - if ($tbl_coord[$i]>$current_stop+$WINDOW){ - #OFFSET - if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;} - $current_stop_offset = $current_stop + $OFFSET; - ####### - print $key,":",$current_start_offset,"..",$current_stop_offset,"\n"; - - $current_start = $tbl_coord[$i]; - $current_stop = $tbl_coord[$i]; - - if ($i==$#tbl_coord){ - #OFFSET - if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;} - $current_stop_offset = $current_stop + $OFFSET; - ####### - print $key,":",$current_start_offset,"..",$current_stop_offset,"\n"; - } - } - else { - $current_stop=$tbl_coord[$i]; - if ($i==$#tbl_coord){ - #OFFSET - if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;} - $current_stop_offset = $current_stop + $OFFSET; - ####### - print $key,":",$current_start_offset,"..",$current_stop_offset,"\n"; - } - } - } -} -#Traitement du dernier - -# if ($#tbl_coord == 0){ - # print $key,":",$tbl_coord[$i],"\n"; -# } -# else { - # if ($i==0){ - # push (@current_table,$tbl_coord[$i]); - # } - # else { - # if ($tbl_coord[$i]>$current_table[$#current_table]+$WINDOW){ - # print $key,":",$current_table[0],":",$current_table[$#current_table],"\n"; - # undef @current_table; - # push (@current_table,$tbl_coord[$i]); - # } - # else { - # push (@current_table,$tbl_coord[$i]); - # } - # } -# } - - -# print "\n"; -# foreach my $key (keys %coord_by_chr){ - # print "\n$key\n"; - # @tbl_coord = sort { $a <=> $b } @tbl_coord; - # for (my $i=0;$i<=$#tbl_coord;$i++){ - # print $tbl_coord[$i],"\n"; - # } -# } diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenomicsegment.xml --- a/genephys/extractgenomicsegment.xml Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ - -Extract the coordinate of genomic segment containing the genetic markers - - extractgenomicsegment.pl $input_geneticmap $input_markers $window $offset > $output_file - - - - - - - - - - - - - - - - - diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenomicsequencefromsegment.pl --- a/genephys/extractgenomicsequencefromsegment.pl Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,90 +0,0 @@ -#!/usr/bin/perl -#V1.10 -my $inputsegment = $ARGV[0]; -my $inputfasta = $ARGV[1]; - -open(IS, $inputsegment) or die ("Can't open $inputsegment\n"); -open(IF, $inputfasta) or die ("Can't open $inputfasta\n"); - - -my @header; -my @start; -my @end; -my @segment_header; - -while (my $ligne = ){ - if ($ligne=~/(.*?):(\d+)\.+(\d+)/){ - push (@header,$1); - push (@start,$2); - push (@end,$3); - push (@segment_header,$1.":".$2."..".$3); - } -} - -close (IS); - -#print "TEST : $#header\n"; - -my %genome; - -my $current_header; -my $current_seq=""; -while (my $ligne = ){ - if ($ligne =~ /^\>(.*?)\s*$/){ - if ($current_header){ - $genome{$current_header} = $current_seq; - } - - # my $length = length($current_seq); - # print "TEST : $current_header\t$length\n"; - # print "TEST : $current_header\n"; - $current_header=$1; - $current_seq = ""; - $current_position=0; - } - else { - if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){ - $current_seq .= $1; - } - else { - print STDERR "Erreur Parsing n°1\n$ligne\n"; - } - } -} - -#TRAITEMENT DU DERNIER -if ($current_header){ - $genome{$current_header} = $current_seq; - undef($current_seq); -} - -# foreach my $key (keys %genome){ - # print $key,"\t",length($genome{$key}),"\n"; -# } - -for (my $i=0;$i<=$#header;$i++){ - my $compt=0; - my $current_seq=""; - print ">",$header[$i],":",$start[$i],"..",$end[$i],"\n"; - ### Modification 1.10 - if ($end[$i]>length($genome{$header[$i]})){ - $end[$i] = length($genome{$header[$i]}); - } - ### - - my @SEQ = split(//,$genome{$header[$i]}); - for (my $coord = $start[$i]-1; $coord<=$end[$i]-1;$coord++){ - $compt++; - # print "TEST : $compt\n"; - if ($compt > 60 ){ - $current_seq .= "\n"; - $compt=1; - } - $current_seq .= $SEQ[$coord]; - - } - print "$current_seq\n"; -} - -close (IF); - diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenomicsequencefromsegment.xml --- a/genephys/extractgenomicsequencefromsegment.xml Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ - -Extract the genomic sequence corresponding to a genomic segment (format : chr:start..stop) - - extractgenomicsequencefromsegment.pl $input_segment $input_assembly > $output_file - - - - - - - - - - - - - - - diff -r c52e74b98773 -r 8dfa09868059 genephys/fastaGroomerForMakeBlastdb.pl --- a/genephys/fastaGroomerForMakeBlastdb.pl Wed Aug 20 12:42:40 2014 -0400 +++ b/genephys/fastaGroomerForMakeBlastdb.pl Fri Oct 24 05:54:20 2014 -0400 @@ -1,4 +1,5 @@ #!/usr/bin/perl +#V1.0.0 my $inputfasta = $ARGV[0]; open(IB, $inputfasta) or die ("Can't open $inputfasta \n"); diff -r c52e74b98773 -r 8dfa09868059 genephys/fastaGroomerForMakeBlastdb.xml --- a/genephys/fastaGroomerForMakeBlastdb.xml Wed Aug 20 12:42:40 2014 -0400 +++ b/genephys/fastaGroomerForMakeBlastdb.xml Fri Oct 24 05:54:20 2014 -0400 @@ -1,4 +1,4 @@ - + fasta Groomer For MakeBlastdb fastaGroomerForMakeBlastdb.pl $input_fasta > $output_fasta diff -r c52e74b98773 -r 8dfa09868059 genephys/mergeAllBestBlast.pl --- a/genephys/mergeAllBestBlast.pl Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -#!/usr/bin/perl -my $inputblastn = $ARGV[0]; -my $inputtblastx = $ARGV[1]; -my $inputblastx = $ARGV[2]; -my $inputblastp = $ARGV[3]; - -open(IN, $inputblastn) or die ("Can't open $inputblastn \n"); -open(ITX, $inputtblastx) or die ("Can't open $inputtblastx \n"); -open(IX, $inputblastx) or die ("Can't open $inputblastx \n"); -open(IP, $inputblastp) or die ("Can't open $inputblastp \n"); - -my %blastx; -my %tblastx; -my %blastp; - -while (my $ligne = ){ - my @fields = split (/\t/,$ligne); - chomp($ligne); - $tblastx{$fields[0]} = $ligne; -} -close (ITX); - -while (my $ligne = ){ - my @fields = split (/\t/,$ligne); - chomp($ligne); - $blastx{$fields[0]} = $ligne; -} -close (IX); - -while (my $ligne = ){ - my @fields = split (/\t/,$ligne); - chomp($ligne); - $blastp{$fields[0]} = $ligne; -} -close (IP); - - -while (my $ligne = ){ - my @fields = split (/\t/,$ligne); - my $query = $fields[0]; - print "BLASTN\t$ligne"; - print "TBLASTX\t",$tblastx{$query},"\n"; - print "BLASTX\t",$blastx{$query},"\n"; - print "BLASTP\t",$blastp{$query},"\n\n"; - -} -close (IN); diff -r c52e74b98773 -r 8dfa09868059 genephys/mergeAllBestBlast.xml --- a/genephys/mergeAllBestBlast.xml Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ - -Merge best results from Blast - - mergeAllBestBlast.pl $input_blastn $input_tblastx $input_blastx $input_blastp > $output_results - - - - - - - - - - - - - - - diff -r c52e74b98773 -r 8dfa09868059 genephys/parseblasttab.pl --- a/genephys/parseblasttab.pl Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,511 +0,0 @@ -#!/usr/bin/perl -my $inputblast = $ARGV[0]; -my $outputjoin = $ARGV[1]; -my $outputbest = $ARGV[2]; -open(IB, $inputblast) or die ("Can't open $inputblast \n"); -open (OJ, ">$outputjoin") or die ("Can't open $outputjoin \n"); -open (OB, ">$outputbest") or die ("Can't open $outputbest \n"); - -my %all_match; -my @all_match_joined; - - -my $MAX_OVERLAP_FRACTION = 0.5; -my $MAX_OVERLAP_LENGTH_IGNORED = 3; - - -while (my $ligne = ){ - my @fields = split (/\t/,$ligne); - my %match; - $match{"Query"}=$fields[0]; - $match{"Subject_id"}=$fields[1]; - $match{"Subject_start"}=$fields[8]; - $match{"Subject_end"}=$fields[9]; - $match{"Similarity"}=$fields[13]; - $match{"Query_length"}=$fields[14]; - $match{"Subject_length"}=$fields[15]; - $match{"Subject"}=$fields[16]; - - if ($fields[6]<=$fields[7]){ - $match{"Query_start"}=$fields[6]; - $match{"Query_end"}=$fields[7]; - $match{"Orientation"}="+"; - #print "+ $ligne"; - } - else { - $match{"Query_start"}=$fields[7]; - $match{"Query_end"}=$fields[6]; - $match{"Orientation"}="-"; - #print "- $ligne"; - } - - if ($fields[9]<=$fields[8]){ - $match{"Subject_start"}=$fields[9]; - $match{"Subject_end"}=$fields[8]; - $match{"Orientation"}="+"; - #print "+ $ligne"; - } - else { - $match{"Subject_start"}=$fields[8]; - $match{"Subject_end"}=$fields[9]; - $match{"Orientation"}="-"; - #print "- $ligne"; - } - - $match{"Ligne"}=$ligne; - my $key = $match{"Query"}."##".$match{"Subject"}."##".$match{"Orientation"}; - if ($match{"Subject_length"}==0){ - print $ligne,"\n",$match{"Subject_length"},"\n"; - } - my @match_table; - - if ($all_match{$key}){ - @match_table = @{$all_match{$key}}; - } - push (@match_table,\%match); - $all_match{$key} = \@match_table; -} - -foreach my $key (keys %all_match){ - my @match_table = @{$all_match{$key}}; - #### Sort - @match_table = sort mysort @match_table; - - - my @duplicate; - my @overlap; - for (my $i=0;$i<=$#match_table;$i++){ - push (@duplicate,0); - } - print "\nTable Match ($#match_table)\n"; - for (my $i=0;$i<=$#match_table;$i++){ - my %match=%{$match_table[$i]}; - print $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t"; - print $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n"; - } - - #Scan d'inclusion strict - for (my $i=0;$i<=$#match_table;$i++){ - my %match1=%{$match_table[$i]}; - for (my $j=0;$j<=$#match_table;$j++){ - if (($j != $i)&&($duplicate[$j]==0)){ # On scan dans les deux sens, pas seuelment $j = $i+1 a cause du last; - my %match2=%{$match_table[$j]}; - # Inclus Subject - if (($match1{"Subject_start"}>=$match2{"Subject_start"})&&($match1{"Subject_end"}<=$match2{"Subject_end"})) - { - $duplicate[$i]=1; - # print $i," : 1 : ",$match1{"Query"},"\t",$match1{"Subject_id"},"\t",$match1{"Query_start"},"\t",$match1{"Query_end"},"\t",$match1{"Subject_start"},"\t",$match1{"Subject_end"},"\n"; - # print $j," : 1 : ",$match2{"Query"},"\t",$match2{"Subject_id"},"\t",$match2{"Query_start"},"\t",$match2{"Query_end"},"\t",$match2{"Subject_start"},"\t",$match2{"Subject_end"},"\n"; - - last; - } - # Inclus Query - elsif (($match1{"Query_start"}>=$match2{"Query_start"})&&($match1{"Query_end"}<=$match2{"Query_end"})) - { - $duplicate[$i]=2; - # print $i," : 2 : ",$match1{"Query"},"\t",$match1{"Subject_id"},"\t",$match1{"Query_start"},"\t",$match1{"Query_end"},"\t",$match1{"Subject_start"},"\t",$match1{"Subject_end"},"\n"; - # print $j," : 2 : ",$match2{"Query"},"\t",$match2{"Subject_id"},"\t",$match2{"Query_start"},"\t",$match2{"Query_end"},"\t",$match2{"Subject_start"},"\t",$match2{"Subject_end"},"\n"; - last; - } - - } - } - } - - my @match_table_filtered; - for (my $i=0;$i<=$#match_table;$i++){ - if ($duplicate[$i] == 0){ - push (@match_table_filtered,$match_table[$i]); - } - } - - if ($#match_table > $#match_table_filtered){ - @match_table = @match_table_filtered; - print "Table Match filtered 1 ($#match_table)\n"; - for (my $i=0;$i<=$#match_table;$i++){ - my %match=%{$match_table[$i]}; - print $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t"; - print $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n"; - #print $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t",$match{"Subject_start"},"\t",$match{"Subject_end"},"\n"; - } - } - - undef @duplicate; - for (my $i=0;$i<=$#match_table;$i++){ - push (@duplicate,0); - } - ######## - - ###Scan d'overlap trop important - # D'abord subject - for (my $i=0;$i<=$#match_table;$i++){ - my %match1=%{$match_table[$i]}; - for (my $j=$i+1;$j<=$#match_table;$j++){ - my %match2=%{$match_table[$j]}; - if (($match1{"Subject_start"}<=$match2{"Subject_start"})&&($match1{"Subject_end"}>=$match2{"Subject_start"})&&($match1{"Subject_end"}<=$match2{"Subject_end"})) - { - my $overlap_length = $match1{"Subject_end"} - $match2{"Subject_start"}+1; - my $length1 = $match1{"Subject_end"} - $match1{"Subject_start"} +1; - my $length2 = $match2{"Subject_end"} - $match2{"Subject_start"} +1; - - if (($length1 >= $length2)&&($overlap_length > $length2 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){ - $duplicate[$j]=1; - # print "DUPLICATE : $j\n"; - } - elsif (($length2 >= $length1)&&($overlap_length > $length1 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){ - $duplicate[$i]=1; - # print "DUPLICATE : $i\n"; - } - else { - } - - # print "$i : 3 : $overlap_length : $length1\n"; - # print "$j : 3 : $overlap_length : $length2\n"; - } - elsif (($match2{"Subject_start"}<=$match1{"Subject_start"})&&($match2{"Subject_end"}>=$match1{"Subject_start"})&&($match2{"Subject_end"}<=$match1{"Subject_end"})) - { - my $overlap_length = $match2{"Subject_end"} - $match1{"Subject_start"}+1; - my $length1 = $match1{"Subject_end"} - $match1{"Subject_start"} +1; - my $length2 = $match2{"Subject_end"} - $match2{"Subject_start"} +1; - - if (($length1 >= $length2)&&($overlap_length > $length2 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){ - $duplicate[$j]=1; - # print "DUPLICATE : $j\n"; - # print "($length1 >= $length2)&&($overlap_length > $length2 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)\n"; - } - elsif (($length2 >= $length1)&&($overlap_length > $length1 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){ - $duplicate[$i]=1; - # print "DUPLICATE : $i\n"; - # print "($length2 >= $length1)&&($overlap_length > $length1 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)\n"; - } - else { - } - - # print "((",$match2{"Subject_start"},"<=",$match1{"Subject_start"},")&&(",$match2{"Subject_end"},">=",$match1{"Subject_start"},")&&(",$match2{"Subject_end"},"<=",$match1{"Subject_end"},"))\n"; - # print "$i : 4 : $overlap_length : $length1\n"; - # print "$j : 4 : $overlap_length : $length2\n"; - } - } - } - - undef @match_table_filtered; - for (my $i=0;$i<=$#match_table;$i++){ - if ($duplicate[$i] == 0){ - push (@match_table_filtered,$match_table[$i]); - } - } - - if ($#match_table > $#match_table_filtered){ - @match_table = @match_table_filtered; - print "Table Match filtered 2 ($#match_table)\n"; - for (my $i=0;$i<=$#match_table;$i++){ - my %match=%{$match_table[$i]}; - print $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t"; - print $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n"; - #print $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t",$match{"Subject_start"},"\t",$match{"Subject_end"},"\n"; - } - } - - undef @duplicate; - for (my $i=0;$i<=$#match_table;$i++){ - push (@duplicate,0); - } - - # Ensuite Query (Subject puis Query pour evitez des deletions complementaires query update qui enleverait toutes les entrées) - - for (my $i=0;$i<=$#match_table;$i++){ - my %match1=%{$match_table[$i]}; - for (my $j=$i+1;$j<=$#match_table;$j++){ - my %match2=%{$match_table[$j]}; - if (($match1{"Query_start"}<=$match2{"Query_start"})&&($match1{"Query_end"}>=$match2{"Query_start"})&&($match1{"Query_end"}<=$match2{"Query_end"})) - { - my $overlap_length = $match1{"Query_end"} - $match2{"Query_start"}+1; - my $length1 = $match1{"Query_end"} - $match1{"Query_start"} +1; - my $length2 = $match2{"Query_end"} - $match2{"Query_start"} +1; - - if (($length1 >= $length2)&&($overlap_length > $length2 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){ - $duplicate[$j]=1; - # print "DUPLICATE : $j\n"; - } - elsif (($length2 >= $length1)&&($overlap_length > $length1 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){ - $duplicate[$i]=1; - # print "DUPLICATE : $i\n"; - } - else { - } - - # print "$i : 5 : $overlap_length : $length1\n"; - # print "$j : 5 : $overlap_length : $length2\n"; - } - elsif (($match2{"Query_start"}<=$match1{"Query_start"})&&($match2{"Query_end"}>=$match1{"Query_start"})&&($match2{"Query_end"}<=$match1{"Query_end"})) - { - my $overlap_length = $match2{"Query_end"} - $match1{"Query_start"}+1; - my $length1 = $match1{"Query_end"} - $match1{"Query_start"} +1; - my $length2 = $match2{"Query_end"} - $match2{"Query_start"} +1; - - if (($length1 >= $length2)&&($overlap_length > $length2 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){ - $duplicate[$j]=1; - # print "DUPLICATE : $j\n"; - } - elsif (($length2 >= $length1)&&($overlap_length > $length1 * $MAX_OVERLAP_FRACTION)&&($overlap_length > $MAX_OVERLAP_LENGTH_IGNORED)){ - $duplicate[$i]=1; - # print "DUPLICATE : $i\n"; - } - else { - } - - # print "$i : 6 : $overlap_length : $length1\n"; - # print "$j : 6 : $overlap_length : $length2\n"; - } - } - } - - undef @match_table_filtered; - for (my $i=0;$i<=$#match_table;$i++){ - if ($duplicate[$i] == 0){ - push (@match_table_filtered,$match_table[$i]); - } - } - - if ($#match_table > $#match_table_filtered){ - @match_table = @match_table_filtered; - print "Table Match filtered 3 ($#match_table)\n"; - for (my $i=0;$i<=$#match_table;$i++){ - my %match=%{$match_table[$i]}; - print $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t"; - print $match{"Subject_start"},"\t",$match{"Subject_end"},"\t",$match{"Subject_length"},"\t",$match{"Similarity"},"\n"; - # print $match{"Query"},"\t",$match{"Subject_id"},"\t",$match{"Orientation"},"\t",$match{"Query_start"},"\t",$match{"Query_end"},"\t",$match{"Subject_start"},"\t",$match{"Subject_end"},"\n"; - } - } - - - #Calcul des nouvelles metriques - - my $overlap_length = 0; - my $min_query = 0; - my $max_query = 0; - my $min_subject = 0; - my $max_subject = 0; - my $Subject_coverage; - my $Identity; - my $nb_match = 0; - - - my $nb_covered_subject=0; - my $nb_covered_query=0; - my %match=%{$match_table[0]}; - my $q_length = $match{"Query_length"}; - my $sub_length = $match{"Subject_length"}; - my $subject = $match{"Subject"}; - my $min_query = $match{"Query_start"}; - my $max_query = $match{"Query_end"}; - my $min_subject = $match{"Subject_start"}; - my $max_subject = $match{"Subject_end"}; - my $orientation = $match{"Orientation"}; - my $Query = $match{"Query"}; - my $Subject_Id = $match{"Subject_id"}; - my $Subject = $match{"Subject"}; - - - for (my $i=0;$i<=$#match_table;$i++){ - my %match1=%{$match_table[$i]}; - $nb_covered_subject += $match1{"Subject_end"} - $match1{"Subject_start"} +1; - $nb_covered_query += $match1{"Query_end"} - $match1{"Query_start"} +1; - - if ($match1{"Query_start"}<$min_query){ - $min_query = $match1{"Query_start"}; - } - if ($match1{"Query_end"}>$max_query){ - $max_query = $match1{"Query_end"}; - } - if ($match1{"Subject_start"}<$min_subject){ - $min_subject = $match1{"Subject_start"}; - } - if ($match1{"Subject_end"}>$max_subject){ - $max_subject = $match1{"Subject_end"}; - } - - $nb_match += $match1{"Similarity"}; - # print "TEST : ",$match1{"Similarity"},"\t",$nb_match,"\n"; - - for (my $j=$i+1;$j<=$#match_table;$j++){ - my $overlap_query=0; - my $overlap_subject=0; - my %match2=%{$match_table[$j]}; - - if (($match1{"Subject_start"}<=$match2{"Subject_start"})&&($match1{"Subject_end"}>=$match2{"Subject_start"})&&($match1{"Subject_end"}<=$match2{"Subject_end"})) - { - $overlap_subject = $match1{"Subject_end"} - $match2{"Subject_start"}+1; - # print "$i : $j : 3 : $overlap_subject\n"; - } - elsif (($match2{"Subject_start"}<=$match1{"Subject_start"})&&($match2{"Subject_end"}>=$match1{"Subject_start"})&&($match2{"Subject_end"}<=$match1{"Subject_end"})) - { - $overlap_subject = $match2{"Subject_end"} - $match1{"Subject_start"}+1; - # print "$i : $j : 4 : $overlap_subject\n"; - } - - if (($match1{"Query_start"}<=$match2{"Query_start"})&&($match1{"Query_end"}>=$match2{"Query_start"})&&($match1{"Query_end"}<=$match2{"Query_end"})) - { - $overlap_query = $match1{"Query_end"} - $match2{"Query_start"} +1; - # print "$i : $j : 5 : $overlap_query\n"; - } - elsif (($match2{"Query_start"}<=$match1{"Query_start"})&&($match2{"Query_end"}>=$match1{"Query_start"})&&($match2{"Query_end"}<=$match1{"Query_end"})) - { - $overlap_query = $match2{"Query_end"} - $match1{"Query_start"}+1; - # print "$i : $j : 6 : $overlap_query\n"; - } - - if ($overlap_query > $overlap_subject){ - $overlap_length += $overlap_query; - } - else { - $overlap_length += $overlap_subject; - } - - $nb_covered_subject -= $overlap_subject; - $nb_covered_query-= $overlap_query; - - } - - } - ### Cas des tblastx ou le nb match est en aa, et les nb_covered en base - if ($nb_match/$nb_covered_subject<0.34){ - $nb_match = $nb_match *3; - } - #### - - $Identity = sprintf("%.2f",($nb_match-$overlap_length)*100/$nb_covered_subject); - - $Subject_coverage = sprintf("%.2f",$nb_covered_subject*100/$sub_length); - $Query_coverage = sprintf("%.2f",$nb_covered_query*100/$q_length); - - print "Final\n"; - print $Query,"\t",$Subject_Id,"\t",$orientation,"\t",$min_query,"\t",$max_query,"\t",$min_subject,"\t",$max_subject,"\t",$sub_length,"\t"; - print "NB:",$nb_match,"\t","O:",$overlap_length,"\t","CQ:",$nb_covered_query,"\t","CS:",$nb_covered_subject,"\t",$Query_coverage,"\t",$Subject_coverage,"\t",$Identity,"\n"; - - if ($subject=~/^(.*?)\s*$/){ - $subject = $1; - } - - my %match_joined; - $match_joined{"Query"}=$Query; - $match_joined{"Query_start"}=$min_query; - $match_joined{"Query_end"}=$max_query; - $match_joined{"Query_length"}=$q_length; - $match_joined{"QCoverage"} = $Query_coverage; - $match_joined{"Subject_id"}=$Subject_Id; - $match_joined{"Subject"}=$subject; - $match_joined{"Subject_start"}=$min_subject; - $match_joined{"Subject_end"}=$max_subject; - $match_joined{"Subject_length"}=$sub_length; - $match_joined{"SCoverage"} = $Subject_coverage; - $match_joined{"Similarity"}=$Identity; - $match_joined{"Nbmatch"}=$nb_match-$overlap_length; - $match_joined{"Display"}="$Query\t$Subject_Id\t$orientation\t$Query_coverage%\t$Subject_coverage%\t$Identity%\t$min_query\t$max_query\t$min_subject\t$max_subject\t$q_length\t$sub_length\t$subject"; - - my $chr; - my $start; - my $end; - - if ($match_joined{"Query"}=~/(.*?)\:(\d+)[\.]+(\d+)/){ - $chr =$1; - $start = $2; - $end = $3; - - } - else { - print "Error Parsing Query : ",$match_joined{"Query"},"\n"; - exit(0); - } - - my $subid = $match_joined{"Subject_id"}; - my $nb = $nb_match-$overlap_length; - - my $key = "$chr#$start#$end#$nb#$subid"; - $all_match_joined{$key} = \%match_joined; - - - # my %match_joined; - # my $nb_covered=0; - # my $length=0; - # for (my $i=0;$i<=$#match_table;$i++){ - # my %match=%{$match_table[$i]}; - # $nb_covered+=$match{"Similarity"}; - # $length = $match{"Subject_length"} - # } - # # if ($match{"Subject_length"} == 0){ - # # print $key,"\n",$match{"Ligne"},"\n",$match{"Subject"},"\n"; - # # exit(0); - # # } - # my $similarity = sprintf("%.2f",$nb_covered / $length); - - # print "TEST : ",$key,"\t",$similarity,"\t",$nb_covered,"\t",$length,"\n"; - - - # if ($similarity > 1){ - # for (my $i=0;$i<=$#match_table;$i++){ - # my %match=%{$match_table[$i]}; - # print "----- : ",$match{"Ligne"},"\n"; - # exit(0); - # } - # } - # $match_joined{"Query"}=$match{"Query"}; - # $match_joined{"Subject"}=$match{"Subject"}; - # $match_joined{"Subject_id"}=$match{"Subject_id"}; - # $match_joined{"Similarity"}=$similarity; - # $match_joined{"Query_length"}=$match{"Query_length"}; - # $match_joined{"Subject_length"}=$match{"Subject_length"}; - # push(@all_match_joined,\%match_joined); - -} - -close (IB); - -my %all_match_joined_best; -foreach my $key (sort sortkey keys %all_match_joined){ - my %match = %{$all_match_joined{$key}}; - print OJ $match{"Display"},"\n"; - my $shortkey = $match{"Query"}; - if ($all_match_joined_best{$shortkey}){ - } - else { - $all_match_joined_best{$shortkey} = \%match; - print OB $match{"Display"},"\n"; - } - -} - -# for (my $i=0;$i<=$#all_match_joined;$i++){ - # my $match_joined = %{$all_match_joined[$i]}; - # print $match_joined{"Query"},"\t",$match_joined{"Subject"},"\t",$match_joined{"Subject_id"},"\t",$match_joined{"Similarity"},"\t",$match_joined{"Query_length"},"\t",$match_joined{"Subject_length"},"\n"; -# } - - -sub mysort{ - my %matcha=%{$a}; - my %matchb=%{$b}; - - #print "TEST : ",$matcha{"Query_start"}, " / ", $matchb{"Query_start"},"\n"; - - $matcha{"Query_start"} <=> $matchb{"Query_start"} - || - $matcha{"Query_end"} <=> $matchb{"Query_end"} - -} - -sub sortkey { - my @fieldsa = split (/\#/,$a); - my @fieldsb = split (/\#/,$b); - - #print "$a\n$b\n"; - #print $fieldsa[0]," cmp ",$fieldsb[0],"\n"; - #exit(0); - - $fieldsa[0] cmp $fieldsb[0] - || - $fieldsa[1] <=> $fieldsb[1] - || - $fieldsb[2] <=> $fieldsa[2] - || - $fieldsb[3] <=> $fieldsa[3] -} diff -r c52e74b98773 -r 8dfa09868059 genephys/parseblasttab.xml --- a/genephys/parseblasttab.xml Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ - -Parse Blast result (Tabular) to merge feature - - parseblasttab.pl $input_blast $output_merge $output_best - - - - - - - - - - - - - diff -r c52e74b98773 -r 8dfa09868059 genephys/repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/repository_dependencies.xml Fri Oct 24 05:54:20 2014 -0400 @@ -0,0 +1,4 @@ + + + +