# HG changeset patch # User Dave Bouvier # Date 1386784472 18000 # Node ID e7d863c5c5d6032ae15875a47b48a4c45518f4ba # Parent 95612c1596813ca21f5d459af8c85e2eb5cf91a6 Update sam_pileup to use the fasta_indexes data table. diff -r 95612c159681 -r e7d863c5c5d6 sam_pileup.py --- a/sam_pileup.py Mon Aug 26 14:21:12 2013 -0400 +++ b/sam_pileup.py Wed Dec 11 12:54:32 2013 -0500 @@ -8,9 +8,8 @@ -o, --output1=o: Output pileup -R, --ref=R: Reference file type -n, --ownFile=n: User-supplied fasta reference file - -d, --dbkey=d: dbkey of user-supplied file - -x, --indexDir=x: Index directory -b, --bamIndex=b: BAM index file + -g, --index=g: Path of the indexed reference genome -s, --lastCol=s: Print the mapping quality as the last column -i, --indels=i: Only output lines containing indels -M, --mapCap=M: Cap mapping quality @@ -31,24 +30,9 @@ sys.stderr.write( '%s\n' % msg ) sys.exit() -def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ): - seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR - seqPath = '' - for line in open( seqFile ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seqPath = fields[2].strip() - break - return seqPath - def __main__(): #Parse Command Line options, args = doc_optparse.parse( __doc__ ) - seqPath = check_seq_file( options.dbkey, options.indexDir ) # output version # of tool try: tmp = tempfile.NamedTemporaryFile().name @@ -77,7 +61,6 @@ tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir ) tmpf1_name = tmpf1.name tmpf1.close() - tmpf1fai_name = '%s.fai' % tmpf1_name #link bam and bam index to working directory (can't move because need to leave original) os.symlink( options.input1, tmpf0bam_name ) os.symlink( options.bamIndex, tmpf0bambai_name ) @@ -100,9 +83,9 @@ try: #index reference if necessary and prepare pileup command if options.ref == 'indexed': - if not os.path.exists( "%s.fai" % seqPath ): - raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey - cmd = cmd % ( opts, seqPath, tmpf0bam_name, options.output1 ) + if not os.path.exists( "%s.fai" % options.index ): + raise Exception, "Indexed genome %s not present, request it by reporting this error." % options.index + cmd = cmd % ( opts, options.index, tmpf0bam_name, options.output1 ) elif options.ref == 'history': os.symlink( options.ownFile, tmpf1_name ) cmdIndex = 'samtools faidx %s' % ( tmpf1_name ) diff -r 95612c159681 -r e7d863c5c5d6 sam_pileup.xml --- a/sam_pileup.xml Mon Aug 26 14:21:12 2013 -0400 +++ b/sam_pileup.xml Wed Dec 11 12:54:32 2013 -0500 @@ -1,4 +1,4 @@ - + from BAM dataset samtools @@ -11,7 +11,7 @@ #if $refOrHistory.reference == "history": --ownFile=$refOrHistory.ownFile #else: - --ownFile="None" + --index=${refOrHistory.index.fields.path} #end if --dbkey=${input1.metadata.dbkey} --indexDir=${GALAXY_DATA_INDEX_DIR} @@ -41,7 +41,14 @@ - + + + + + + + + @@ -100,6 +107,7 @@ --> + diff -r 95612c159681 -r e7d863c5c5d6 tool-data/fasta_indexes.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Wed Dec 11 12:54:32 2013 -0500 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a sam_fa_new_indices.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The sam_fa_new_indices.loc +#file has this format (white space characters are TAB characters): +# +# +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the sam_fa_new_indices.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your sam_fa_new_indices.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa diff -r 95612c159681 -r e7d863c5c5d6 tool-data/sam_fa_indices.loc.sample --- a/tool-data/sam_fa_indices.loc.sample Mon Aug 26 14:21:12 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of Samtools indexed sequences data files. You will need -#to create these data files and then create a sam_fa_indices.loc file -#similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The sam_fa_indices.loc -#file has this format (white space characters are TAB characters): -# -#index -# -#So, for example, if you had hg18 indexed stored in -#/depot/data2/galaxy/sam/, -#then the sam_fa_indices.loc entry would look like this: -# -#index hg18 /depot/data2/galaxy/sam/hg18.fa -# -#and your /depot/data2/galaxy/sam/ directory -#would contain hg18.fa and hg18.fa.fai files: -# -#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa -#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai -# -#Your sam_fa_indices.loc file should include an entry per line for -#each index set you have stored. The file in the path does actually -#exist, but it should never be directly used. Instead, the name serves -#as a prefix for the index file. For example: -# -#index hg18 /depot/data2/galaxy/sam/hg18.fa -#index hg19 /depot/data2/galaxy/sam/hg19.fa diff -r 95612c159681 -r e7d863c5c5d6 tool-data/tool_data_table_conf.xml.sample --- a/tool-data/tool_data_table_conf.xml.sample Mon Aug 26 14:21:12 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ - - - - - line_type, value, path - -

- \ No newline at end of file diff -r 95612c159681 -r e7d863c5c5d6 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Dec 11 12:54:32 2013 -0500 @@ -0,0 +1,7 @@ + + + + value, dbkey, name, path + +