comparison inspect.xml @ 3:cc0deb593fc8 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 3b41d687ff30583540d055f6995de00530cca81d"
author iuc
date Thu, 12 Dec 2019 09:27:38 -0500
parents 7d22964a8639
children 08192eebb47d
comparison
equal deleted inserted replaced
2:7d22964a8639 3:cc0deb593fc8
133 133
134 #else if $method.method == 'tl.rank_genes_groups' 134 #else if $method.method == 'tl.rank_genes_groups'
135 sc.tl.rank_genes_groups( 135 sc.tl.rank_genes_groups(
136 adata=adata, 136 adata=adata,
137 groupby='$method.groupby', 137 groupby='$method.groupby',
138 use_raw=$method.use_raw,
139 #if str($method.groups) != '' 138 #if str($method.groups) != ''
140 #set $group=[x.strip() for x in str($method.groups).split(',')] 139 #set $group=[x.strip() for x in str($method.groups).split(',')]
141 groups=$group, 140 groups=$group,
142 #end if 141 #end if
143 #if $method.ref.rest == 'rest' 142 #if $method.ref.rest == 'rest'
197 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class', 196 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
198 #end if 197 #end if
199 tol=$method.tl_rank_genes_groups_method.tol, 198 tol=$method.tl_rank_genes_groups_method.tol,
200 C=$method.tl_rank_genes_groups_method.c, 199 C=$method.tl_rank_genes_groups_method.c,
201 #end if 200 #end if
202 only_positive=$method.only_positive) 201 use_raw=$method.use_raw)
203 202
204 #else if $method.method == "tl.marker_gene_overlap" 203 #else if $method.method == "tl.marker_gene_overlap"
205 reference_markers = {} 204 reference_markers = {}
206 #for $i, $s in enumerate($method.reference_markers) 205 #for $i, $s in enumerate($method.reference_markers)
207 #set $list=[x.strip() for x in str($s.values).split(',')] 206 #set $list=[x.strip() for x in str($s.values).split(',')]
254 </configfiles> 253 </configfiles>
255 <inputs> 254 <inputs>
256 <expand macro="inputs_anndata"/> 255 <expand macro="inputs_anndata"/>
257 <conditional name="method"> 256 <conditional name="method">
258 <param argument="method" type="select" label="Method used for inspecting"> 257 <param argument="method" type="select" label="Method used for inspecting">
259 <option value="pp.calculate_qc_metrics">Calculate quality control metrics, using `pp.calculate_qc_metrics`</option> 258 <option value="pp.calculate_qc_metrics">Calculate quality control metrics, using 'pp.calculate_qc_metrics'</option>
260 <option value="pp.neighbors">Compute a neighborhood graph of observations, using `pp.neighbors`</option> 259 <option value="pp.neighbors">Compute a neighborhood graph of observations, using 'pp.neighbors'</option>
261 <option value="tl.score_genes">Score a set of genes, using `tl.score_genes`</option> 260 <option value="tl.score_genes">Score a set of genes, using 'tl.score_genes'</option>
262 <option value="tl.score_genes_cell_cycle">Score cell cycle genes, using `tl.score_genes_cell_cycle`</option> 261 <option value="tl.score_genes_cell_cycle">Score cell cycle genes, using 'tl.score_genes_cell_cycle'</option>
263 <option value="tl.rank_genes_groups">Rank genes for characterizing groups, using `tl.rank_genes_groups`</option> 262 <option value="tl.rank_genes_groups">Rank genes for characterizing groups, using 'tl.rank_genes_groups'</option>
264 <!--<option value="tl.marker_gene_overlap">Calculate an overlap score between data-deriven marker genes and provided markers, using `tl.marker_gene_overlap`</option>--> 263 <!--<option value="tl.marker_gene_overlap">Calculate an overlap score between data-deriven marker genes and provided markers, using 'tl.marker_gene_overlap'</option>-->
265 <option value="pp.log1p">Logarithmize the data matrix, using `pp.log1p`</option> 264 <option value="pp.log1p">Logarithmize the data matrix, using 'pp.log1p'</option>
266 <option value="pp.scale">Scale data to unit variance and zero mean, using `pp.scale`</option> 265 <option value="pp.scale">Scale data to unit variance and zero mean, using 'pp.scale'</option>
267 <option value="pp.sqrt">Square root the data matrix, using `pp.sqrt`</option> 266 <option value="pp.sqrt">Square root the data matrix, using 'pp.sqrt'</option>
268 </param> 267 </param>
269 <when value="pp.calculate_qc_metrics"> 268 <when value="pp.calculate_qc_metrics">
270 <param argument="expr_type" type="text" value="counts" label="Name of kind of values in X"/> 269 <param argument="expr_type" type="text" value="counts" label="Name of kind of values in X"/>
271 <param argument="var_type" type="text" value="genes" label="The kind of thing the variables are"/> 270 <param argument="var_type" type="text" value="genes" label="The kind of thing the variables are"/>
272 <param argument="qc_vars" type="text" value="" label="Keys for boolean columns of `.var` which identify variables you could want to control for" 271 <param argument="qc_vars" type="text" value="" label="Keys for boolean columns of '.var' which identify variables you could want to control for"
273 help="Keys separated by a comma"/> 272 help="Keys separated by a comma"/>
274 <param argument="percent_top" type="text" value="" label="Proportions of top genes to cover" 273 <param argument="percent_top" type="text" value="" label="Proportions of top genes to cover"
275 help=" Values (integers) are considered 1-indexed, `50` finds cumulative proportion to the 50th most expressed genes. Values separated by a comma. 274 help=" Values (integers) are considered 1-indexed, '50' finds cumulative proportion to the 50th most expressed genes. Values separated by a comma.
276 If empty don't calculate"/> 275 If empty don't calculate"/>
277 </when> 276 </when>
278 <when value="pp.neighbors"> 277 <when value="pp.neighbors">
279 <param argument="n_neighbors" type="integer" min="0" value="15" label="The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation" help="Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If `knn` is `True`, number of nearest neighbors to be searched. If `knn` is `False`, a Gaussian kernel width is set to the distance of the `n_neighbors` neighbor."/> 278 <param argument="n_neighbors" type="integer" min="0" value="15" label="The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation" help="Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If 'knn' is 'True', number of nearest neighbors to be searched. If 'knn' is 'False', a Gaussian kernel width is set to the distance of the 'n_neighbors' neighbor."/>
280 <param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/> 279 <param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/>
281 <param argument="use_rep" type="text" value="" optional="true" label="Indicated representation to use" help="If not set, the representation is chosen automatically: for n_vars below 50, X is used, otherwise X_pca (uns) is used. If X_pca is not present, it's computed with default parameter"/> 280 <param argument="use_rep" type="text" value="" optional="true" label="Indicated representation to use" help="If not set, the representation is chosen automatically: for n_vars below 50, X is used, otherwise X_pca (uns) is used. If X_pca is not present, it's computed with default parameter"/>
282 <param argument="knn" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use a hard threshold to restrict the number of neighbors to n_neighbors?" help="If true, it considers a knn graph. Otherwise, it uses a Gaussian Kernel to assign low weights to neighbors more distant than the `n_neighbors` nearest neighbor."/> 281 <param argument="knn" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use a hard threshold to restrict the number of neighbors to n_neighbors?" help="If true, it considers a knn graph. Otherwise, it uses a Gaussian Kernel to assign low weights to neighbors more distant than the 'n_neighbors' nearest neighbor."/>
283 <param argument="random_state" type="integer" value="0" label="Numpy random seed" help=""/> 282 <param argument="random_state" type="integer" value="0" label="Numpy random seed" help=""/>
284 <param name="pp_neighbors_method" argument="method" type="select" label="Method for computing connectivities" help=""> 283 <param name="pp_neighbors_method" argument="method" type="select" label="Method for computing connectivities" help="">
285 <option value="umap">umap (McInnes et al, 2018)</option> 284 <option value="umap">umap (McInnes et al, 2018)</option>
286 <option value="gauss">gauss: Gauss kernel following (Coifman et al 2005) with adaptive width (Haghverdi et al 2016)</option> 285 <option value="gauss">gauss: Gauss kernel following (Coifman et al 2005) with adaptive width (Haghverdi et al 2016)</option>
287 </param> 286 </param>
290 </param> 289 </param>
291 </when> 290 </when>
292 <when value="tl.score_genes"> 291 <when value="tl.score_genes">
293 <param argument="gene_list" type="text" value="" label="The list of gene names used for score calculation" help="Genes separated by a comma"/> 292 <param argument="gene_list" type="text" value="" label="The list of gene names used for score calculation" help="Genes separated by a comma"/>
294 <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled" 293 <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled"
295 help="If `len(gene_list)` is not too low, you can set `ctrl_size=len(gene_list)`."/> 294 help="If 'len(gene_list)' is not too low, you can set 'ctrl_size=len(gene_list)'."/>
296 <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set" 295 <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set"
297 help="Default is all genes. Genes separated by a comma"/> 296 help="Default is all genes. Genes separated by a comma"/>
298 <expand macro="score_genes_params"/> 297 <expand macro="score_genes_params"/>
299 <param argument="score_name" type="text" value="score" label="Name of the field to be added in `.obs`" help=""/> 298 <param argument="score_name" type="text" value="score" label="Name of the field to be added in '.obs'" help=""/>
300 </when> 299 </when>
301 <when value="tl.score_genes_cell_cycle"> 300 <when value="tl.score_genes_cell_cycle">
302 <conditional name='s_genes'> 301 <conditional name='s_genes'>
303 <param name="format" type="select" label="Format for the list of genes associated with S phase"> 302 <param name="format" type="select" label="Format for the list of genes associated with S phase">
304 <option value="file">File</option> 303 <option value="file">File</option>
424 <param argument="tol" type="float" value="1e-4" label="Tolerance for stopping criteria" help=""/> 423 <param argument="tol" type="float" value="1e-4" label="Tolerance for stopping criteria" help=""/>
425 <param argument="c" type="float" value="1.0" label="Inverse of regularization strength" 424 <param argument="c" type="float" value="1.0" label="Inverse of regularization strength"
426 help="It must be a positive float. Like in support vector machines, smaller values specify stronger regularization."/> 425 help="It must be a positive float. Like in support vector machines, smaller values specify stronger regularization."/>
427 </when> 426 </when>
428 </conditional> 427 </conditional>
429 <param argument="only_positive" type="boolean" truevalue="True" falsevalue="False" checked="true"
430 label="Only consider positive differences?" help=""/>
431 </when> 428 </when>
432 <!--<when value="tl.marker_gene_overlap"> 429 <!--<when value="tl.marker_gene_overlap">
433 <repeat name="reference_markers" title="Marker genes"> 430 <repeat name="reference_markers" title="Marker genes">
434 <param name="key" type="text" value="" label="Cell identity name" help=""/> 431 <param name="key" type="text" value="" label="Cell identity name" help=""/>
435 <param name="values" type="text" value="" label="List of genes" help="Comma-separated names from `var`"/> 432 <param name="values" type="text" value="" label="List of genes" help="Comma-separated names from 'var'"/>
436 </repeat> 433 </repeat>
437 <param argument="key" type="text" value="rank_genes_groups" label="Key in adata.uns where the rank_genes_groups output is stored"/> 434 <param argument="key" type="text" value="rank_genes_groups" label="Key in adata.uns where the rank_genes_groups output is stored"/>
438 <conditional name="overlap"> 435 <conditional name="overlap">
439 <param argument="method" type="select" label="Method to calculate marker gene overlap"> 436 <param argument="method" type="select" label="Method to calculate marker gene overlap">
440 <option value="overlap_count">overlap_count: Intersection of the gene set</option> 437 <option value="overlap_count">overlap_count: Intersection of the gene set</option>
596 <param name="n_genes" value="100"/> 593 <param name="n_genes" value="100"/>
597 <conditional name="tl_rank_genes_groups_method"> 594 <conditional name="tl_rank_genes_groups_method">
598 <param name="method" value="t-test_overestim_var"/> 595 <param name="method" value="t-test_overestim_var"/>
599 <param name="corr_method" value="benjamini-hochberg"/> 596 <param name="corr_method" value="benjamini-hochberg"/>
600 </conditional> 597 </conditional>
601 <param name="only_positive" value="true"/>
602 </conditional> 598 </conditional>
603 <assert_stdout> 599 <assert_stdout>
604 <has_text_matching expression="sc.tl.rank_genes_groups"/> 600 <has_text_matching expression="sc.tl.rank_genes_groups"/>
605 <has_text_matching expression="groupby='cell_type'"/> 601 <has_text_matching expression="groupby='cell_type'"/>
606 <has_text_matching expression="use_raw=True"/> 602 <has_text_matching expression="use_raw=True"/>
607 <has_text_matching expression="reference='rest'"/> 603 <has_text_matching expression="reference='rest'"/>
608 <has_text_matching expression="n_genes=100"/> 604 <has_text_matching expression="n_genes=100"/>
609 <has_text_matching expression="method='t-test_overestim_var'"/> 605 <has_text_matching expression="method='t-test_overestim_var'"/>
610 <has_text_matching expression="corr_method='benjamini-hochberg'"/> 606 <has_text_matching expression="corr_method='benjamini-hochberg'"/>
611 <has_text_matching expression="only_positive=True"/>
612 </assert_stdout> 607 </assert_stdout>
613 <output name="anndata_out" file="tl.rank_genes_groups.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> 608 <output name="anndata_out" file="tl.rank_genes_groups.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
614 </test> 609 </test>
615 <test> 610 <test>
616 <!-- test 7 --> 611 <!-- test 7 -->
632 <param name="multi_class" value="auto"/> 627 <param name="multi_class" value="auto"/>
633 </conditional> 628 </conditional>
634 <param name="tol" value="1e-4"/> 629 <param name="tol" value="1e-4"/>
635 <param name="c" value="1.0"/> 630 <param name="c" value="1.0"/>
636 </conditional> 631 </conditional>
637 <param name="only_positive" value="true"/>
638 </conditional> 632 </conditional>
639 <assert_stdout> 633 <assert_stdout>
640 <has_text_matching expression="sc.tl.rank_genes_groups"/> 634 <has_text_matching expression="sc.tl.rank_genes_groups"/>
641 <has_text_matching expression="groupby='louvain'"/> 635 <has_text_matching expression="groupby='louvain'"/>
642 <has_text_matching expression="use_raw=True"/> 636 <has_text_matching expression="use_raw=True"/>
648 <has_text_matching expression="fit_intercept=True"/> 642 <has_text_matching expression="fit_intercept=True"/>
649 <has_text_matching expression="max_iter=100"/> 643 <has_text_matching expression="max_iter=100"/>
650 <has_text_matching expression="multi_class='auto'"/> 644 <has_text_matching expression="multi_class='auto'"/>
651 <has_text_matching expression="tol=0.0001"/> 645 <has_text_matching expression="tol=0.0001"/>
652 <has_text_matching expression="C=1.0"/> 646 <has_text_matching expression="C=1.0"/>
653 <has_text_matching expression="only_positive=True"/>
654 </assert_stdout> 647 </assert_stdout>
655 <output name="anndata_out" file="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad" ftype="h5ad" compare="sim_size"> 648 <output name="anndata_out" file="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad" ftype="h5ad" compare="sim_size">
656 <assert_contents> 649 <assert_contents>
657 <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" /> 650 <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" />
658 </assert_contents> 651 </assert_contents>
684 </conditional> 677 </conditional>
685 </conditional> 678 </conditional>
686 <param name="tol" value="1e-4"/> 679 <param name="tol" value="1e-4"/>
687 <param name="c" value="1.0"/> 680 <param name="c" value="1.0"/>
688 </conditional> 681 </conditional>
689 <param name="only_positive" value="true"/>
690 </conditional> 682 </conditional>
691 <assert_stdout> 683 <assert_stdout>
692 <has_text_matching expression="sc.tl.rank_genes_groups"/> 684 <has_text_matching expression="sc.tl.rank_genes_groups"/>
693 <has_text_matching expression="groupby='louvain'"/> 685 <has_text_matching expression="groupby='louvain'"/>
694 <has_text_matching expression="use_raw=True"/> 686 <has_text_matching expression="use_raw=True"/>
700 <has_text_matching expression="dual=False"/> 692 <has_text_matching expression="dual=False"/>
701 <has_text_matching expression="fit_intercept=True"/> 693 <has_text_matching expression="fit_intercept=True"/>
702 <has_text_matching expression="intercept_scaling=1.0"/> 694 <has_text_matching expression="intercept_scaling=1.0"/>
703 <has_text_matching expression="tol=0.0001"/> 695 <has_text_matching expression="tol=0.0001"/>
704 <has_text_matching expression="C=1.0"/> 696 <has_text_matching expression="C=1.0"/>
705 <has_text_matching expression="only_positive=True"/>
706 </assert_stdout> 697 </assert_stdout>
707 <output name="anndata_out" file="tl.rank_genes_groups.liblinear.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"> 698 <output name="anndata_out" file="tl.rank_genes_groups.liblinear.krumsiek11.h5ad" ftype="h5ad" compare="sim_size">
708 <assert_contents> 699 <assert_contents>
709 <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" /> 700 <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" />
710 </assert_contents> 701 </assert_contents>
795 =================================================================== 786 ===================================================================
796 787
797 Calculates a number of qc metrics for an AnnData object, largely based on calculateQCMetrics from scater. 788 Calculates a number of qc metrics for an AnnData object, largely based on calculateQCMetrics from scater.
798 Currently is most efficient on a sparse CSR or dense matrix. 789 Currently is most efficient on a sparse CSR or dense matrix.
799 790
800 It updates the observation level metrics: 791 It updates the observation level metrics with
801 792
802 - total_{var_type}_by_{expr_type} (e.g. "total_genes_by_counts", number of genes with positive counts in a cell) 793 - total_{var_type}_by_{expr_type} (e.g. "total_genes_by_counts", number of genes with positive counts in a cell)
803 - total_{expr_type} (e.g. "total_counts", total number of counts for a cell) 794 - total_{expr_type} (e.g. "total_counts", total number of counts for a cell)
804 - pct_{expr_type}_in_top_{n}_{var_type} (e.g. "pct_counts_in_top_50_genes", cumulative percentage of counts for 50 most expressed genes in a cell) 795 - pct_{expr_type}_in_top_{n}_{var_type} - for n in percent_top (e.g. "pct_counts_in_top_50_genes", cumulative percentage of counts for 50 most expressed genes in a cell)
805 - total_{expr_type}_{qc_var} (e.g. "total_counts_mito", total number of counts for variabes in qc_vars ) 796 - total_{expr_type}_{qc_var} - for qc_var in qc_vars (e.g. "total_counts_mito", total number of counts for variabes in qc_vars)
806 - pct_{expr_type}_{qc_var} (e.g. "pct_counts_mito", proportion of total counts for a cell which are mitochondrial) 797 - pct_{expr_type}_{qc_var} - for qc_var in qc_vars (e.g. "pct_counts_mito", proportion of total counts for a cell which are mitochondrial)
807 798
808 And also the variable level metrics: 799 And also the variable level metrics:
809 800
810 - total_{expr_type} (e.g. "total_counts", sum of counts for a gene) 801 - total_{expr_type} (e.g. "total_counts", sum of counts for a gene)
811 - mean_{expr_type} (e.g. "mean counts", mean expression over all cells. 802 - mean_{expr_type} (e.g. "mean counts", mean expression over all cells)
812 - n_cells_by_{expr_type} (e.g. "n_cells_by_counts", number of cells this expression is measured in) 803 - n_cells_by_{expr_type} (e.g. "n_cells_by_counts", number of cells this expression is measured in)
813 - pct_dropout_by_{expr_type} (e.g. "pct_dropout_by_counts", percentage of cells this feature does not appear in) 804 - pct_dropout_by_{expr_type} (e.g. "pct_dropout_by_counts", percentage of cells this feature does not appear in)
814 805
815 More details on the `scanpy documentation 806 More details on the `scanpy documentation
816 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.calculate_qc_metrics.html>`__ 807 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.calculate_qc_metrics.html>`__