% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dkuReadDataset.R
\name{dkuReadDataset}
\alias{dkuReadDataset}
\title{Reads a dataset from Dataiku's Data Science Studio}
\usage{
dkuReadDataset(
  name,
  partitions = NULL,
  samplingMethod = c("full", "fixed", "head", "ratio"),
  columns = NULL,
  nbRows = NULL,
  ratio = NULL,
  convertEmptyStrings = TRUE,
  colClasses = NA,
  inferColClassesFromData = TRUE,
  na.strings = "NA"
)
}
\arguments{
\item{name}{name of dataset}

\item{partitions}{character vector of partitions to load}

\item{samplingMethod}{the sampling method to use, if necessary}

\item{columns}{a character vector of columns to read from dataset}

\item{nbRows}{An integer. The number of rounds used for sampling}

\item{ratio}{A numeric. The probability used for sampling each row. 0 < ratio < 1.}

\item{convertEmptyStrings}{Whether to convert empty strings to NAs}

\item{colClasses}{Manually-specified column classes. Default is to infer from dataset schema.}

\item{inferColClassesFromData}{If colClasses is not specified, infer column classes from data instead of dataset schema.}

\item{na.strings}{Optional list of strings to convert to NAs. Default is "NA".}
}
\value{
A data.frame with the requested data
}
\description{
Reads a dataset from Dataiku's Data Science Studio
}
\details{
Users can specify which partitions and columns to load, as well as a sampling scheme
if the dataset is too large to fit into memory. Possible sampling schemes are fixed sampling, where a set number of rows are 
randomly chosen from the dataset; head sampling, where the first *n* rows are sampled from the dataset; and ratio sampling, where
rows are included randomly with a probability.
}
\examples{
\dontrun{
d = dkuReadDataset("iris")

# read in two columns
d = dkuReadDataset("iris", columns=c("Sepal.Length", "Sepal.Width"))

# explicitly set colClasses
d = dkuReadDataset("iris", colClasses=c("numeric", "numeric", "numeric", "numeric", "character"))

# fixed sampling -- read 100 random rows from the iris dataset
d = dkuReadDataset("iris", samplingMethod="fixed", nbRows=100)

# head sampling -- read the first 100 rows from the iris dataset
d = dkuReadDataset("iris", samplingMethod="head", nbRows=100)

# ratio sampling -- read 30\% of the rows (chosen randomly) from the iris dataset
d = dkuReadDataset("iris", samplingMethod="ratio", ratio=0.3)
}
}
