KSLCore/ksl.utilities.io/DataFrameUtil

DataFrameUtil

object DataFrameUtil(source)

Functions

boxPlotSummary

fun boxPlotSummary(dc: DataColumn<Double>, name: String? = dc.name()): BoxPlotSummary

buildMarkDown

fun <T> buildMarkDown(df: DataFrame<T>, appendable: Appendable)

Writes the data frame as a MarkDown based table by converting rows to strings.

frequenciesD

fun frequenciesD(dc: DataColumn<Double>, name: String? = dc.name()): IntegerFrequency

frequenciesI

fun frequenciesI(dc: DataColumn<Int>, name: String? = dc.name()): IntegerFrequency

histogram

fun histogram(dc: DataColumn<Double>, breakPoints: DoubleArray = Histogram.recommendBreakPoints(dc.toDoubleArray()), name: String? = dc.name()): Histogram

permute

inline fun <T> permute(dc: DataColumn<T>, streamNum: Int): DataColumn<T>

inline fun <T> permute(dc: DataColumn<T>, stream: RNStreamIfc = KSLRandom.defaultRNStream()): DataColumn<T>

The data column dc, is not changed. The returned data column holds a permutation of dc

fun <T> permute(df: DataFrame<T>, streamNum: Int): DataFrame<T>

fun <T> permute(df: DataFrame<T>, stream: RNStreamIfc = KSLRandom.defaultRNStream()): DataFrame<T>

The data frame df, is not changed. The returned data frame holds a permutation of df

randomlySelect

fun <T> randomlySelect(dc: DataColumn<T>, streamNum: Int): T

fun <T> randomlySelect(dc: DataColumn<T>, stream: RNStreamIfc = KSLRandom.defaultRNStream()): T

Randomly select an element from the data column

fun <T> randomlySelect(df: DataFrame<T>, streamNum: Int): DataRow<T>

fun <T> randomlySelect(df: DataFrame<T>, stream: RNStreamIfc = KSLRandom.defaultRNStream()): DataRow<T>

Randomly select a row from the data frame

fun <T> randomlySelect(dc: DataColumn<T>, cdf: DoubleArray, streamNum: Int): T

fun <T> randomlySelect(dc: DataColumn<T>, cdf: DoubleArray, stream: RNStreamIfc = KSLRandom.defaultRNStream()): T

Randomly selects an element from the data column using the supplied cdf

fun <T> randomlySelect(df: DataFrame<T>, cdf: DoubleArray, streamNum: Int): DataRow<T>

Randomly selects a row from the data frame using the supplied cdf

fun <T> randomlySelect(df: DataFrame<T>, cdf: DoubleArray, stream: RNStreamIfc = KSLRandom.defaultRNStream()): DataRow<T>

Randomly selects from the data frame using the supplied cdf

sampleWithoutReplacement

inline fun <T> sampleWithoutReplacement(dc: DataColumn<T>, sampleSize: Int, stream: RNStreamIfc = KSLRandom.defaultRNStream()): DataColumn<T>

A new DataColumn is created, such that the first sampleSize elements contain the sampled values. That is, x.get(0), x.get(1), ... , x.get(sampleSize-1) is the random sample without replacement

fun <T> sampleWithoutReplacement(df: DataFrame<T>, sampleSize: Int, streamNum: Int): DataFrame<T>

fun <T> sampleWithoutReplacement(df: DataFrame<T>, sampleSize: Int, stream: RNStreamIfc = KSLRandom.defaultRNStream()): DataFrame<T>

The data frame df, is not changed. The returned data frame holds a sample of the rows from df

statistics

fun statistics(dc: DataColumn<Double>, name: String? = dc.name()): Statistic

toCSV

fun toCSV(df: AnyFrame, appendable: Appendable, header: Boolean = true, separator: String = ",")

Converts the data frame to rows of comma separated file output with specified separator. The default separator is a comma. Elements that are strings are enclosed in double quotes to permit the separator to appear in the element. If the header is true, then the column names of the dataframe are included as the first row before any data rows are appended.

toTabularFile

fun toTabularFile(df: AnyFrame, pathToFile: Path): TabularOutputFile

Convert the dataframe to a TabularOutputFile at the supplied pathToFile

fun toTabularFile(df: AnyFrame, fileName: String): TabularOutputFile

Convert the dataframe to a TabularOutputFile with the supplied file name within KSL.outDir