Mercurial > repos > nml > csvtk_separate
changeset 0:1be50033b476 draft default tip
"planemo upload for repository https://github.com/shenwei356/csvtk commit 3a97e1b79bf0c6cdd37d5c8fb497b85531a563ab"
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,337 @@ +<macros> + <token name="@VERSION@">0.20.0</token> + <token name="@GALAXY_VERSION@">galaxy0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">csvtk</requirement> + </requirements> + </xml> + <xml name="version_cmd"> + <version_command>csvtk version</version_command> + </xml> + <xml name="text_sanitizer"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </xml> + <xml name="multi_input"> + <param type="data" name="in_1" format="csv,tabular" + multiple="true" optional="false" + label="Specify TSV or CSV file inputs" + help="Input tsv or csv files to analyze. HOWEVER, they must all be the **same file type** or the tool will fail/not give correct results!" + /> + </xml> + <xml name="singular_input"> + <param type="data" name="in_1" format="csv,tabular" + multiple="false" optional="false" + label="Specify an input TSV or CSV file" + help="Input a TSV or CSV file to work on" + /> + </xml> + <xml name="ignore_case"> + <param type="boolean" name="ignore_case" + falsevalue="" truevalue="-i" + checked="false" + argument="-i" + label="Ignore cell case?" + help="ABC == abc" + /> + </xml> + <xml name="global_parameters"> + <section name="global_param" title="csvtk Global Parameters" expanded="false"> + <param type="boolean" name="header" + falsevalue="-H" truevalue="" + checked="true" + argument="-H" + label="Input file has a header line" + /> + <param type="boolean" name="illegal_rows" + falsevalue="" truevalue="-I" + checked="false" + argument="-I" + label="Ignore illegal rows" + help="Use if file has illegal rows as defined in the help section" + /> + <param type="boolean" name="empty_rows" + falsevalue="" truevalue="-E" + checked="false" + argument="-E" + label="Ignore empty rows" + help="Ignore rows with no data (only needed if input has empty rows)" + /> + <param type="boolean" name="lazy_quotes" + falsevalue="" truevalue="-l" + checked="false" + argument="-l" + label="File has Lazy quotes" + help="(TSV files only) If Yes, a quote may appear in an unquoted field and a non-doubled quote may appear in a quoted field" + /> + </section> + </xml> + <xml name="fields_input"> + <conditional name="column_text" > + <param type="select" name="select" label="Select input column(s) based on" argument="-F -f"> + <option value="string">Column Name(s)</option> + <option value="column">Column Number(s)</option> + </param> + <when value="column"> + <param type="data_column" name="in_text" + data_ref="in_1" + multiple="true" + label="Input column number(s)" + help="Select column(s) to use for analysis" + /> + </when> + <when value="string"> + <param type="text" name="in_text" + optional="false" + label="Input column name(s)" + help="Multiple columns can be given if separated by a ' , '. + Column numbers can be given too - ex. '1,2' will target columns 1 and 2. + Please see the help section below for more detailed info"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> + </xml> + <xml name="singular_fields_input"> + <conditional name="column_text" > + <param type="select" name="select" label="Select input column based on" argument="-f"> + <option value="string">Column Name</option> + <option value="column">Column Number</option> + </param> + <when value="column"> + <param type="data_column" name="in_text" + data_ref="in_1" + multiple="false" + label="Input column number" + help="Select column to use for analysis" + /> + </when> + <when value="string"> + <param type="text" name="in_text" + optional="false" + label="Input column name" + help="Input column name or number ex. 'Length' or '1'. + Please see the help section below for more detailed info"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> + </xml> + <xml name="plot_field"> + <conditional name="column_text" > + <param type="select" name="select" label="Select input data column based on" argument="-f"> + <option value="string">Column Name</option> + <option value="column">Column Number</option> + </param> + <when value="column"> + <param type="data_column" name="in_text" + data_ref="in_1" + multiple="false" + label="Input data column number" + /> + </when> + <when value="string"> + <param type="text" name="in_text" + optional="false" + label="Input data column name" + help="Can use column name or column number"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> + </xml> + <xml name="groups_input"> + <conditional name="group_field" > + <param type="select" name="select_group" label="Select column to group data" argument="-g" optional="false" help="Specify a single column that is used to create data groups. An example is shown in the help section"> + <option value="none">None</option> + <option value="string">Column Name</option> + <option value="column">Column Number</option> + </param> + <when value="none" /> + <when value="column"> + <param type="data_column" name="in_text" + data_ref="in_1" + multiple="false" + label="Group by column number" + /> + </when> + <when value="string"> + <param type="text" name="in_text" + optional="false" + label="Group by column name" + help="Can use column name or number"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> + </xml> + <xml name="global_plot_parameters"> + <section name="plot_parameters" title="Advanced Optional Plot Parameters" expanded="false"> + <param type="float" name="figure_height" + argument="--height" + optional="true" + label="Figure Height (Default 4.5)" + /> + <param type="float" name="figure_width" + argument="--width" + optional="true" + label="Figure Width (Default 1.5)" + /> + <param type="float" name="tick_width" + argument="--tick-width" + optional="true" + label="Axis Tick Width (Default 1.5)" + /> + <param type="text" name="title" + argument="--title" + optional="true" + label="Specify Figure Title"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param type="text" name="x_label" + argument="--xlab" + optional="true" + label="Specify X-axis label"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param type="float" name="min_x" + argument="--x-min" + optional="true" + label="Minimum value of X-axis (float)" + /> + <param type="float" name="max_x" + argument="--x-max" + optional="true" + label="Maximum value of X-axis (float)" + /> + <param type="text" name="y_label" + argument="--ylab" + optional="true" + label="Specify Y-axis label"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param type="float" name="min_y" + argument="--y-min" + optional="true" + label="Minimum value of Y-axis (float)" + /> + <param type="float" name="max_y" + argument="--y-max" + optional="true" + label="Maximum value of Y-axis (float)" + /> + </section> + </xml> + <token name="@HELP_COLUMNS@"> +Column Name Input Help +###################### + +- Multiple names can be given if separated by a ' , '. + + - ex. 'ID,Organism' would target the columns named ID and Organism for the function + +- Column names are case SeNsitive + +- Column numbers can also be given: + + -ex. '1,2,3' or '1-3' for inputting columns 1-3. + +- You can also specify all but unwanted column(s) with a ' - '. + + - ex. '-ID' would target all columns but the ID column + +---- + </token> + <token name="@HELP_INPUT_DATA@"> +Input Data +########## + +:: + + **Limitations of Input Data** + + 1. The CSV parser requires all the lines have same number of fields/columns. + If your file has illegal rows, set the "Illegal Rows" parameter to "Yes" to pass your data through + Even lines with spaces will cause error. + Example bad table below. + + 2. By default, csvtk thinks files have header rows. If your file does not, set global parameter + "Has Header Row" to "No" + + 3. Column names should be unique and are case sensitive! + + 4. Lines starting with "#" or "$" will be ignored, if in the header row + + 5. If " exists in tab-delimited files, set Lazy quotes global parameter to "Yes" + +Example bad table: + ++--------+--------+--------+--------+ +| Head 1 | Head 2 | Head 3 | Head 3 | ++========+========+========+========+ +| 1 | 2 | 3 | | ++--------+--------+--------+--------+ +| this | will | | break | ++--------+--------+--------+--------+ + +Bad tables may work if both the "Ignore Illegal Rows" and "Ignore Empty Rows" global parameters are set to "Yes", +But there is no guarentee of that! + +---- + </token> + <token name="@HELP_END_STATEMENT@"> +More Information +################ +For information from the creators of csvtk, please visit their site at: https://bioinf.shenwei.me/csvtk/ + +Although be aware that some features may not be available and some small changes were made to work with Galaxy. + +**Notable changes from their documentation:** + +- Cannot specify multiple file header names (IE cannot use "name;username" as a valid column match) + +- No single quotes / apostrophes allowed in text inputs + </token> + <xml name="citations"> + <citations> + <citation type="bibtex">@ARTICLE{a1, + title = {csvtk - CSV/TSV Toolkit}, + author = {Wei Shen}, + url = {https://github.com/shenwei356/csvtk} + } + }</citation> + </citations> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/separate.xml Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,228 @@ +<tool id="csvtk_separate" name="csvtk-separate" version="@VERSION@+@GALAXY_VERSION@"> + <description> column into multiple columns</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_cmd" /> + <command detect_errors="exit_code"><![CDATA[ + +#set column_number = $column_names.count(',') + 1 + +################### +## Start Command ## +################### +csvtk separate --num-cpus "\${GALAXY_SLOTS:-1}" + + ## Add additional flags as specified ## + ####################################### + $ignore_case + $global_param.illegal_rows + $global_param.empty_rows + $global_param.header + $global_param.lazy_quotes + + ## Set Tabular input/output flag if first input is tabular ## + ############################################################# + #if $in_1.is_of_type("tabular"): + -t -T + #end if + + ## Set input files ## + ##################### + $in_1 + + ## Specify field to use ## + ########################## + -f '$column_text.in_text' + + ## Specific inputs ## + ##################### + -n '$column_names' + -N '$column_number' + + #if $how_separate.how == 'sep' + -s '$how_separate.separator' + #else + -r '($how_separate.separator)' + #end if + + --na '$fill_na' + $extra_data + $remove + + ## To output ## + ############### + > separated + + ]]></command> + <inputs> + <expand macro="singular_input"/> + <expand macro="singular_fields_input"/> + <conditional name="how_separate"> + <param name="how" type="select" label="Separate input column by:"> + <option value="sep">Separator String</option> + <option value="regex">Regexp</option> + </param> + <when value="sep"> + <param name="separator" type="text" value="; " argument="-s" + label="Separator string" + help="Specify what string separates the data in the column"> + <expand macro="text_sanitizer" /> + </param> + </when> + <when value="regex"> + <param name="separator" type="text" value=".+" argument="-r" + label="Set regex search pattern" + help="Use regex to match input column information. Example: ^(.+)$ will match all characters. + Regex help can be found below. The ' character is invalid"> + <expand macro="text_sanitizer" /> + </param> + </when> + </conditional> + <param name="column_names" type="text" value="new1,new2" argument="-n" + label="Specify new column name(s)" + help="More than one column can be made by separating the names by a comma (,). Ex. 'Genus,Species' would create two columns."> + <expand macro="text_sanitizer" /> + </param> + <param name="extra_data" type="select" label="Handle extra data by:" + help="Extra data is data that does not fit into the new columns made. An example can be found below"> + <option value="--drop">Dropping it</option> + <option value="--merge">Merging it</option> + </param> + <param name="fill_na" type="text" value="NA" argument="--na" + label="Character string to fill empty columns"> + <expand macro="text_sanitizer" /> + </param> + <param name="remove" type="boolean" checked="true" argument="-R" + truevalue="-R" + falsevalue="" + label="Remove input column" + /> + <expand macro="ignore_case" /> + <expand macro="global_parameters" /> + </inputs> + <outputs> + <data format_source="in_1" from_work_dir="separated" name="separated" label="${in_1.name} column ${column_text.in_text} separated by ${how_separate.separator}" /> + </outputs> + <tests> + <test> + <param name="in_1" value="collapsed.tsv" /> + <conditional name="column_text"> + <param name="select" value="string" /> + <param name="in_text" value="2" /> + </conditional> + <conditional name="how_separate"> + <param name="how" value="sep" /> + <param name="separator" value="; " /> + </conditional> + <param name="column_names" value="1,2" /> + <param name="fill_na" value="NA" /> + <param name="extra_data" value="--drop" /> + <param name="remove" value="true" /> + <output name="separated" file="separated_1.tsv" ftype="tabular" /> + </test> + <test> + <param name="in_1" value="collapsed.tsv" /> + <conditional name="column_text"> + <param name="select" value="string" /> + <param name="in_text" value="2" /> + </conditional> + <conditional name="how_separate"> + <param name="how" value="sep" /> + <param name="separator" value="; " /> + </conditional> + <param name="column_names" value="1,2" /> + <param name="fill_na" value="N/A" /> + <param name="extra_data" value="--merge" /> + <param name="remove" value="false" /> + <output name="separated" file="separated_2.tsv" ftype="tabular" /> + </test> + </tests> + <help><![CDATA[ + +Csvtk - Separate Help +--------------------- + +Info +#### + +Csvtk-separate separates columns into new columns based on either an input string or a regex expression. + +The regex input for this tool is structured such that your regular expression **does not** need to start with with quotes or brackets. You can +start your expression with a `^` or just go straight into it + +For example: + +:: + + Using `.+` as an input would be used in the code as '(.+)' + + Using ^(.+)$ as an input would yield an input in the code as '(^(.+)$)' + +.. class:: warningmark + + Single quotes are not allowed in text inputs! + +---- + + +@HELP_INPUT_DATA@ + + +Usage +##### + +**Ex. Separate with Dropping Data** + +Suppose we had the following table and wanted to separate the scientific name column to create two new columns called genus and species: + ++------------+------------------------------+ +| Name | Scientific Name | ++============+==============================+ +| Red Fox | Vulpes vulpes | ++------------+------------------------------+ +| Salmonella | Salmonella enterica enterica | ++------------+------------------------------+ + +First, we set our separator to string and use just a space (' ') as the separator as the names are separated by spaces. + +Then, we have to set the new column names which can be done by setting the column names to 'Genus,Species' (make sure that the names are +separated by a comma). + +Finally, we have to decide if we want to drop the input column and if we want to merge the additional data or drop it. + +First table will show dropping of additional data (the second enterica is additional as it cannot fit in its own column) + ++------------+------------+----------+ +| Name | Genus | Species | ++============+============+==========+ +| Red Fox | Vulpes | vulpes | ++------------+------------+----------+ +| Salmonella | Salmonella | enterica | ++------------+------------+----------+ + +| + +Here is what it would look like if we merged data instead: + ++------------+------------+-------------------+ +| Name | Genus | Species | ++============+============+===================+ +| Red Fox | Vulpes | vulpes | ++------------+------------+-------------------+ +| Salmonella | Salmonella | enterica enterica | ++------------+------------+-------------------+ + +---- + + +@HELP_COLUMNS@ + + +@HELP_END_STATEMENT@ + + + ]]></help> + <expand macro="citations" /> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Animals_More.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,10 @@ +Name,Animal,Random_Number +Spots,Dog,1 +Fred,Dog,5 +Mittens,Cat,16 +Slippers,Cat,11 +Gravy,Cat,6 +Stripes,Zebra,7 +Muffin,Cat,7 +Earl,Dog,2 +Sir-Wags-A-Lot,Dog,44
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/XY_converted.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,12 @@ +X,Y +1,2 +2,4 +4,8 +8,16 +16,32 +32,64 +64,128 +128,256 +256,512 +1024,2048 +2048,5096
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/XY_converted.tsv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,12 @@ +X Y +1 2 +2 4 +4 8 +8 16 +16 32 +32 64 +64 128 +128 256 +256 512 +1024 2048 +2048 5096
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/XY_with_break.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,13 @@ +X,Y +1,2 +2,4 +4,8 +8,16 +16,32 +32,64 +64,128 +128,256 +256,512 +, +1024,2048 +2048,5096
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/XY_with_break.tabular Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,13 @@ +X Y +1 2 +2 4 +4 8 +8 16 +16 32 +32 64 +64 128 +128 256 +256 512 + +1024 2048 +2048 5096
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/another.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,4 @@ +Name,Food,Age +Joe,Beets,33 +Steven,Eggplant,36 +Jacob,Kale,59 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/awk_mutate_input.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,4 @@ +Culture Label,Cell Count,Dilution +ECo-1,2523,1000 +LPn-1,100,1000000 +LPn-2,4,1000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/awk_mutate_output.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,4 @@ +Culture Label,Cell Count,Dilution,CFU/ml +ECo-1,2523,1000,2523000 +LPn-1,100,1000000,100000000 +LPn-2,4,1000,4000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blood_type.tsv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,11 @@ +Name Blood_Type Favourite_Colour Height +Darian AB Blue 175cm +Fred AB- Orange 185cm +Jacob AB Blue 160cm +Adrian O Blue 2000cm +Tim O- Green 20cm +Matthew O Green 140cm +Patrick O Green 1cm +Chester O Blue 22cm +Kim B Teal 11cm +Sarah A Blue 123cm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/collapsed.tsv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,7 @@ +Blood_Type Name +AB- Fred +AB Darian; Jacob +O- Tim +O Adrian; Matthew; Patrick; Chester +B Kim +A Sarah
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/concat_1.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,11 @@ +Name,Colour,Food +Eric,Blue,Apples +Darian,Blue,Pancakes +Daniel,Red,Apples +Emily,Blue,Apples +Fred,-,- +Adrian,-,- +Steven,-,- +Joe,-,Beets +Steven,-,Eggplant +Jacob,-,Kale
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/concat_2.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,11 @@ +Name,Colour,Food +Eric,Blue,Apples +Darian,Blue,Pancakes +Daniel,Red,Apples +Emily,Blue,Apples +Fred,, +Adrian,, +Steven,, +Joe,,Beets +Steven,,Eggplant +Jacob,,Kale
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/corr_1.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,1 @@ +X,Y,0.9960
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/corr_2.tsv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,1 @@ +X Y 0.9997
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/csv-bob.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,2 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType +Bob,Protein,All of them,250cm,O-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/csv-darian.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,2 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType +Darian,Potatos,Blue,175cm,O
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/csv-jack.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,2 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType +Jack,Pineapple,Off White,165cm,O
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/csv.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,4 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType +Jack,Pineapple,Off White,165cm,O +Bob,Protein,All of them,250cm,O- +Darian,Potatos,Blue,175cm,O
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut_1.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,33 @@ +Length,GC Content +100,50.00 +100,50.05 +100,49.95 +110,50.60 +105,50.50 +101,49.05 +99,49.95 +95,50.95 +100,50.00 +100,50.00 +90,66.00 +100,66.60 +100,65.05 +101,65.95 +101,65.55 +99,66.00 +95,66.05 +100,66.55 +105,65.55 +100,65.55 +110,66.55 +110,70.00 +100,70.00 +90,45.65 +99,45.60 +99,45.50 +95,45.20 +95,45.55 +100,45.55 +100,45.20 +100,45.55 +100,45.50
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut_2.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,33 @@ +Group,GC Content +A,50.00 +A,50.05 +A,49.95 +A,50.60 +A,50.50 +A,49.05 +A,49.95 +A,50.95 +A,50.00 +A,50.00 +B,66.00 +B,66.60 +B,65.05 +B,65.95 +B,65.55 +B,66.00 +B,66.05 +B,66.55 +B,65.55 +B,65.55 +B,66.55 +C,70.00 +C,70.00 +D,45.65 +D,45.60 +D,45.50 +D,45.20 +D,45.55 +D,45.55 +D,45.20 +D,45.55 +D,45.50
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/data.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,4 @@ +Person,Height,Sport,Job +Fred,140cm,Diving,Accountant +Darian,175cm,Running,Student +Jake,188cm,Shotput,Moving Heavy Objects
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtered.tsv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,4 @@ +Blood_Type Favourite_Colour frequency +AB Blue 2 +O Green 2 +O Blue 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/frequency.tsv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,8 @@ +Blood_Type Favourite_Colour frequency +AB- Orange 1 +AB Blue 2 +O- Green 1 +O Green 2 +O Blue 2 +B Teal 1 +A Blue 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gathered.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,4 @@ +Favourite Food,Favourite Colour,Height,BloodType,1,2 +Pineapple,Off White,165cm,O,Person,Jack +Protein,All of them,250cm,O-,Person,Bob +Potatos,Blue,175cm,O,Person,Darian
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illegal.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,6 @@ +Test,A,B,C +D,,S,C +F,F,F,F +F,F,F, +TT,TT,TT,TT +Agh,Ol,As,TT \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illegal_collapse.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,5 @@ +C,Test +C,D +F,F +,F +TT,TT; Agh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/joined.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,2 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType,Height,Sport,Job +Darian,Potatos,Blue,175cm,O,175cm,Running,Student
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/joined_filled.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,6 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType,Height,Sport,Job +Jack,Pineapple,Off White,165cm,O,a,a,a +Bob,Protein,All of them,250cm,O-,a,a,a +Darian,Potatos,Blue,175cm,O,175cm,Running,Student +Fred,a,a,a,a,140cm,Diving,Accountant +Jake,a,a,a,a,188cm,Shotput,Moving Heavy Objects
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kv.txt Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,3 @@ +Key Value +Dog Big +Cat Small \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mutate_removed.tsv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,11 @@ +Blood_Type Favourite_Colour Height new_column +AB Blue 175cm Darian +AB- Orange 185cm +AB Blue 160cm +O Blue 2000cm +O- Green 20cm +O Green 140cm +O Green 1cm +O Blue 22cm +B Teal 11cm +A Blue 123cm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mutated.tsv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,11 @@ +Name Blood_Type Favourite_Colour Height new_column +Darian AB Blue 175cm Darian +Fred AB- Orange 185cm Fred +Jacob AB Blue 160cm Jacob +Adrian O Blue 2000cm Adrian +Tim O- Green 20cm Tim +Matthew O Green 140cm Matthew +Patrick O Green 1cm Patrick +Chester O Blue 22cm Chester +Kim B Teal 11cm Kim +Sarah A Blue 123cm Sarah
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/other.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,4 @@ +Name,Sport,Job +Fred,Volleyball,Molecular Biologist +Adrian,Basketball,Computational Biologist +Steven,Football,Microbiologist \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/plot.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,33 @@ +Group,Length,GC Content +A,100,50.00 +A,100,50.05 +A,100,49.95 +A,110,50.60 +A,105,50.50 +A,101,49.05 +A,99,49.95 +A,95,50.95 +A,100,50.00 +A,100,50.00 +B,90,66.00 +B,100,66.60 +B,100,65.05 +B,101,65.95 +B,101,65.55 +B,99,66.00 +B,95,66.05 +B,100,66.55 +B,105,65.55 +B,100,65.55 +B,110,66.55 +C,110,70.00 +C,100,70.00 +D,90,45.65 +D,99,45.60 +D,99,45.50 +D,95,45.20 +D,95,45.55 +D,100,45.55 +D,100,45.20 +D,100,45.55 +D,100,45.50 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_1.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,4 @@ +Name,Animal +Shirley,1-Dog +Mittens,2-Cat +Fuzzy,3-Chinchilla
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_2.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,4 @@ +Name,Animal +Shirley,Big +Mittens,Small +Fuzzy,Chinchilla
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_input.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,4 @@ +Name,Animal +Shirley,Dog +Mittens,Cat +Fuzzy,Chinchilla
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sampled_1.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,16 @@ +Group,Length,GC Content +A,100,50.00 +A,101,49.05 +A,100,50.00 +A,100,50.00 +B,90,66.00 +B,100,66.60 +B,101,65.55 +B,99,66.00 +B,105,65.55 +D,90,45.65 +D,99,45.60 +D,99,45.50 +D,95,45.20 +D,95,45.55 +D,100,45.20
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sampled_2.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,25 @@ +n,Group,Length,GC Content +1,A,100,50.00 +4,A,110,50.60 +5,A,105,50.50 +6,A,101,49.05 +9,A,100,50.00 +10,A,100,50.00 +11,B,90,66.00 +12,B,100,66.60 +13,B,100,65.05 +14,B,101,65.95 +15,B,101,65.55 +16,B,99,66.00 +19,B,105,65.55 +20,B,100,65.55 +22,C,110,70.00 +24,D,90,45.65 +25,D,99,45.60 +26,D,99,45.50 +27,D,95,45.20 +28,D,95,45.55 +29,D,100,45.55 +30,D,100,45.20 +31,D,100,45.55 +32,D,100,45.50
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/separated_1.tsv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,7 @@ +Blood_Type 1 2 +AB- Fred NA +AB Darian Jacob +O- Tim NA +O Adrian Matthew +B Kim NA +A Sarah NA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/separated_2.tsv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,7 @@ +Blood_Type Name 1 2 +AB- Fred Fred N/A +AB Darian; Jacob Darian Jacob +O- Tim Tim N/A +O Adrian; Matthew; Patrick; Chester Adrian Matthew; Patrick; Chester +B Kim Kim N/A +A Sarah Sarah N/A
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_order.txt Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,3 @@ +Zebra +Cat +Dog
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted_1.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,10 @@ +Name,Animal,Random_Number +Stripes,Zebra,7 +Mittens,Cat,16 +Slippers,Cat,11 +Muffin,Cat,7 +Gravy,Cat,6 +Sir-Wags-A-Lot,Dog,44 +Fred,Dog,5 +Earl,Dog,2 +Spots,Dog,1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.csv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,5 @@ +Name,Colour,Food +Eric,Blue,Apples +Darian,Blue,Pancakes +Daniel,Red,Apples +Emily,Blue,Apples \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/uniq.tsv Tue May 19 17:24:41 2020 -0400 @@ -0,0 +1,8 @@ +Name Blood_Type Favourite_Colour Height +Darian AB Blue 175cm +Fred AB- Orange 185cm +Adrian O Blue 2000cm +Tim O- Green 20cm +Matthew O Green 140cm +Kim B Teal 11cm +Sarah A Blue 123cm