Mercurial > repos > devteam > cufflinks
comparison cufflinks_wrapper.py @ 2:da11bfc10e81
Update to the new data table specification.
author | Dave Bouvier <dave@bx.psu.edu> |
---|---|
date | Wed, 04 Dec 2013 13:24:49 -0500 |
parents | b50aacc8ae49 |
children | 5346d5eea8b1 |
comparison
equal
deleted
inserted
replaced
1:b01956f26c36 | 2:da11bfc10e81 |
---|---|
8 | 8 |
9 def stop_err( msg ): | 9 def stop_err( msg ): |
10 sys.stderr.write( "%s\n" % msg ) | 10 sys.stderr.write( "%s\n" % msg ) |
11 sys.exit() | 11 sys.exit() |
12 | 12 |
13 # Copied from sam_to_bam.py: | |
14 def check_seq_file( dbkey, cached_seqs_pointer_file ): | |
15 seq_path = '' | |
16 for line in open( cached_seqs_pointer_file ): | |
17 line = line.rstrip( '\r\n' ) | |
18 if line and not line.startswith( '#' ) and line.startswith( 'index' ): | |
19 fields = line.split( '\t' ) | |
20 if len( fields ) < 3: | |
21 continue | |
22 if fields[1] == dbkey: | |
23 seq_path = fields[2].strip() | |
24 break | |
25 return seq_path | |
26 | |
27 def __main__(): | 13 def __main__(): |
28 #Parse Command Line | 14 #Parse Command Line |
29 parser = optparse.OptionParser() | 15 parser = optparse.OptionParser() |
30 parser.add_option( '-1', '--input', dest='input', help=' file of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' ) | 16 parser.add_option( '-1', '--input', dest='input', help=' file of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' ) |
31 parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' ) | 17 parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' ) |
51 parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' ) | 37 parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' ) |
52 parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' ) | 38 parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' ) |
53 | 39 |
54 # Bias correction options. | 40 # Bias correction options. |
55 parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.') | 41 parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.') |
56 parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) | 42 parser.add_option( '', '--index', dest='index', help='The path of the reference genome' ) |
57 parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) | |
58 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) | 43 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) |
59 | 44 |
60 # Global model. | 45 # Global model. |
61 parser.add_option( '', '--global_model', dest='global_model_file', help='Global model used for computing on local data' ) | 46 parser.add_option( '', '--global_model', dest='global_model_file', help='Global model used for computing on local data' ) |
62 | 47 |
81 except: | 66 except: |
82 sys.stdout.write( 'Could not determine Cufflinks version\n' ) | 67 sys.stdout.write( 'Could not determine Cufflinks version\n' ) |
83 | 68 |
84 # If doing bias correction, set/link to sequence file. | 69 # If doing bias correction, set/link to sequence file. |
85 if options.do_bias_correction: | 70 if options.do_bias_correction: |
86 if options.ref_file != 'None': | 71 if options.ref_file: |
87 # Sequence data from history. | 72 # Sequence data from history. |
88 # Create symbolic link to ref_file so that index will be created in working directory. | 73 # Create symbolic link to ref_file so that index will be created in working directory. |
89 seq_path = "ref.fa" | 74 seq_path = "ref.fa" |
90 os.symlink( options.ref_file, seq_path ) | 75 os.symlink( options.ref_file, seq_path ) |
91 else: | 76 else: |
92 # Sequence data from loc file. | 77 if not os.path.exists( options.index ): |
93 cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' ) | 78 stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index ) |
94 if not os.path.exists( cached_seqs_pointer_file ): | 79 seq_path = options.index |
95 stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) | 80 |
96 # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, | |
97 # and the equCab2.fa file will contain fasta sequences. | |
98 seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) | |
99 if seq_path == '': | |
100 stop_err( 'No sequence data found for dbkey %s, so bias correction cannot be used.' % options.dbkey ) | |
101 | |
102 # Build command. | 81 # Build command. |
103 | 82 |
104 # Base; always use quiet mode to avoid problems with storing log output. | 83 # Base; always use quiet mode to avoid problems with storing log output. |
105 cmd = "cufflinks -q --no-update-check" | 84 cmd = "cufflinks -q --no-update-check" |
106 | 85 |