Spark 3.5.5 ScalaDoc - org.apache.spark.sql.Dataset (original) (raw)

class Dataset[T] extends Serializable

 

Ordering

Grouped
Alphabetic
By Inheritance

Inherited

Dataset
Serializable
Serializable
AnyRef
Any
Hide All
Show All

Instance Constructors

 new Dataset(sqlContext: SQLContext, logicalPlan: LogicalPlan, encoder: Encoder[T])
 new Dataset(sparkSession: SparkSession, logicalPlan: LogicalPlan, encoder: Encoder[T])

Value Members

 final def !=(arg0: Any): Boolean
 final def ##(): Int
 final def ==(arg0: Any): Boolean
 def agg(expr: Column, exprs: Column*): DataFrame
 def agg(exprs: Map[String, String]): DataFrame
 def agg(exprs: Map[String, String]): DataFrame
 def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame
 def alias(alias: Symbol): Dataset[T]
 def alias(alias: String): Dataset[T]
 def apply(colName: String): Column
 def as(alias: Symbol): Dataset[T]
 def as(alias: String): Dataset[T]
 def as[U](implicit arg0: Encoder[U]): Dataset[U]
 final def asInstanceOf[T0]: T0
 def cache(): Dataset.this.type
 def checkpoint(eager: Boolean): Dataset[T]
 def checkpoint(): Dataset[T]
 def clone(): AnyRef
 def coalesce(numPartitions: Int): Dataset[T]
 def col(colName: String): Column
 def colRegex(colName: String): Column
 def collect(): Array[T]
 def collectAsList(): List[T]
 def columns: Array[String]
 def count(): Long
 def createGlobalTempView(viewName: String): Unit
 def createOrReplaceGlobalTempView(viewName: String): Unit
 def createOrReplaceTempView(viewName: String): Unit
 def createTempView(viewName: String): Unit
 def crossJoin(right: Dataset[_]): DataFrame
 def cube(col1: String, cols: String*): RelationalGroupedDataset
 def cube(cols: Column*): RelationalGroupedDataset
 def describe(cols: String*): DataFrame
 def distinct(): Dataset[T]
 def drop(col: Column, cols: Column*): DataFrame
 def drop(col: Column): DataFrame
 def drop(colNames: String*): DataFrame
 def drop(colName: String): DataFrame
 def dropDuplicates(col1: String, cols: String*): Dataset[T]
 def dropDuplicates(colNames: Array[String]): Dataset[T]
 def dropDuplicates(colNames: Seq[String]): Dataset[T]
 def dropDuplicates(): Dataset[T]
 def dropDuplicatesWithinWatermark(col1: String, cols: String*): Dataset[T]
 def dropDuplicatesWithinWatermark(colNames: Array[String]): Dataset[T]
 def dropDuplicatesWithinWatermark(colNames: Seq[String]): Dataset[T]
 def dropDuplicatesWithinWatermark(): Dataset[T]
 def dtypes: Array[(String, String)]
 val encoder: Encoder[T]
 final def eq(arg0: AnyRef): Boolean
 def equals(arg0: Any): Boolean
 def except(other: Dataset[T]): Dataset[T]
 def exceptAll(other: Dataset[T]): Dataset[T]
 def explain(): Unit
 def explain(extended: Boolean): Unit
 def explain(mode: String): Unit
 def filter(func: FilterFunction[T]): Dataset[T]
 def filter(func: (T) ⇒ Boolean): Dataset[T]
 def filter(conditionExpr: String): Dataset[T]
 def filter(condition: Column): Dataset[T]
 def finalize(): Unit
 def first(): T
 def flatMap[U](f: FlatMapFunction[T, U], encoder: Encoder[U]): Dataset[U]
 def flatMap[U](func: (T) ⇒ TraversableOnce[U])(implicit arg0: Encoder[U]): Dataset[U]
 def foreach(func: ForeachFunction[T]): Unit
 def foreach(f: (T) ⇒ Unit): Unit
 def foreachPartition(func: ForeachPartitionFunction[T]): Unit
 def foreachPartition(f: (Iterator[T]) ⇒ Unit): Unit
 final def getClass(): Class[_]
 def groupBy(col1: String, cols: String*): RelationalGroupedDataset
 def groupBy(cols: Column*): RelationalGroupedDataset
 def groupByKey[K](func: MapFunction[T, K], encoder: Encoder[K]): KeyValueGroupedDataset[K, T]
 def groupByKey[K](func: (T) ⇒ K)(implicit arg0: Encoder[K]): KeyValueGroupedDataset[K, T]
 def hashCode(): Int
 def head(): T
 def head(n: Int): Array[T]
 def hint(name: String, parameters: Any*): Dataset[T]
 def inputFiles: Array[String]
 def intersect(other: Dataset[T]): Dataset[T]
 def intersectAll(other: Dataset[T]): Dataset[T]
 def isEmpty: Boolean
 final def isInstanceOf[T0]: Boolean
 def isLocal: Boolean
 def isStreaming: Boolean
 def javaRDD: JavaRDD[T]
 def join(right: Dataset[_], joinExprs: Column, joinType: String): DataFrame
 def join(right: Dataset[_], joinExprs: Column): DataFrame
 def join(right: Dataset[_], usingColumns: Seq[String], joinType: String): DataFrame
 def join(right: Dataset[_], usingColumns: Array[String], joinType: String): DataFrame
 def join(right: Dataset[_], usingColumn: String, joinType: String): DataFrame
 def join(right: Dataset[_], usingColumns: Seq[String]): DataFrame
 def join(right: Dataset[_], usingColumns: Array[String]): DataFrame
 def join(right: Dataset[_], usingColumn: String): DataFrame
 def join(right: Dataset[_]): DataFrame
 def joinWith[U](other: Dataset[U], condition: Column): Dataset[(T, U)]
 def joinWith[U](other: Dataset[U], condition: Column, joinType: String): Dataset[(T, U)]
 def limit(n: Int): Dataset[T]
 def localCheckpoint(eager: Boolean): Dataset[T]
 def localCheckpoint(): Dataset[T]
 def map[U](func: MapFunction[T, U], encoder: Encoder[U]): Dataset[U]
 def map[U](func: (T) ⇒ U)(implicit arg0: Encoder[U]): Dataset[U]
 def mapPartitions[U](f: MapPartitionsFunction[T, U], encoder: Encoder[U]): Dataset[U]
 def mapPartitions[U](func: (Iterator[T]) ⇒ Iterator[U])(implicit arg0: Encoder[U]): Dataset[U]
 def melt(ids: Array[Column], variableColumnName: String, valueColumnName: String): DataFrame
 def melt(ids: Array[Column], values: Array[Column], variableColumnName: String, valueColumnName: String): DataFrame
 def metadataColumn(colName: String): Column
 def na: DataFrameNaFunctions
 final def ne(arg0: AnyRef): Boolean
 final def notify(): Unit
 final def notifyAll(): Unit
 def observe(observation: Observation, expr: Column, exprs: Column*): Dataset[T]
 def observe(name: String, expr: Column, exprs: Column*): Dataset[T]
 def offset(n: Int): Dataset[T]
 def orderBy(sortExprs: Column*): Dataset[T]
 def orderBy(sortCol: String, sortCols: String*): Dataset[T]
 def persist(newLevel: StorageLevel): Dataset.this.type
 def persist(): Dataset.this.type
 def printSchema(level: Int): Unit
 def printSchema(): Unit
 val queryExecution: QueryExecution
 def randomSplit(weights: Array[Double]): Array[Dataset[T]]
 def randomSplit(weights: Array[Double], seed: Long): Array[Dataset[T]]
 def randomSplitAsList(weights: Array[Double], seed: Long): List[Dataset[T]]
 lazy val rdd: RDD[T]
 def reduce(func: ReduceFunction[T]): T
 def reduce(func: (T, T) ⇒ T): T
 def repartition(partitionExprs: Column*): Dataset[T]
 def repartition(numPartitions: Int, partitionExprs: Column*): Dataset[T]
 def repartition(numPartitions: Int): Dataset[T]
 def repartitionByRange(partitionExprs: Column*): Dataset[T]
 def repartitionByRange(numPartitions: Int, partitionExprs: Column*): Dataset[T]
 def rollup(col1: String, cols: String*): RelationalGroupedDataset
 def rollup(cols: Column*): RelationalGroupedDataset
 def sameSemantics(other: Dataset[T]): Boolean
 def sample(withReplacement: Boolean, fraction: Double): Dataset[T]
 def sample(withReplacement: Boolean, fraction: Double, seed: Long): Dataset[T]
 def sample(fraction: Double): Dataset[T]
 def sample(fraction: Double, seed: Long): Dataset[T]
 def schema: StructType
 def select[U1, U2, U3, U4, U5](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], c3: TypedColumn[T, U3], c4: TypedColumn[T, U4], c5: TypedColumn[T, U5]): Dataset[(U1, U2, U3, U4, U5)]
 def select[U1, U2, U3, U4](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], c3: TypedColumn[T, U3], c4: TypedColumn[T, U4]): Dataset[(U1, U2, U3, U4)]
 def select[U1, U2, U3](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], c3: TypedColumn[T, U3]): Dataset[(U1, U2, U3)]
 def select[U1, U2](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2]): Dataset[(U1, U2)]
 def select[U1](c1: TypedColumn[T, U1]): Dataset[U1]
 def select(col: String, cols: String*): DataFrame
 def select(cols: Column*): DataFrame
 def selectExpr(exprs: String*): DataFrame
 def selectUntyped(columns: TypedColumn[_, _]*): Dataset[_]
 def semanticHash(): Int
 def show(numRows: Int, truncate: Int, vertical: Boolean): Unit
 def show(numRows: Int, truncate: Int): Unit
 def show(numRows: Int, truncate: Boolean): Unit
 def show(truncate: Boolean): Unit
 def show(): Unit
 def show(numRows: Int): Unit
 def sort(sortExprs: Column*): Dataset[T]
 def sort(sortCol: String, sortCols: String*): Dataset[T]
 def sortWithinPartitions(sortExprs: Column*): Dataset[T]
 def sortWithinPartitions(sortCol: String, sortCols: String*): Dataset[T]
 lazy val sparkSession: SparkSession
 lazy val sqlContext: SQLContext
 def stat: DataFrameStatFunctions
 def storageLevel: StorageLevel
 def summary(statistics: String*): DataFrame
 final def synchronized[T0](arg0: ⇒ T0): T0
 def tail(n: Int): Array[T]
 def take(n: Int): Array[T]
 def takeAsList(n: Int): List[T]
 def to(schema: StructType): DataFrame
 def toDF(colNames: String*): DataFrame
 def toDF(): DataFrame
 def toJSON: Dataset[String]
 def toJavaRDD: JavaRDD[T]
 def toLocalIterator(): Iterator[T]
 def toString(): String
 def transform[U](t: (Dataset[T]) ⇒ Dataset[U]): Dataset[U]
 def union(other: Dataset[T]): Dataset[T]
 def unionAll(other: Dataset[T]): Dataset[T]
 def unionByName(other: Dataset[T], allowMissingColumns: Boolean): Dataset[T]
 def unionByName(other: Dataset[T]): Dataset[T]
 def unpersist(): Dataset.this.type
 def unpersist(blocking: Boolean): Dataset.this.type
 def unpivot(ids: Array[Column], variableColumnName: String, valueColumnName: String): DataFrame
 def unpivot(ids: Array[Column], values: Array[Column], variableColumnName: String, valueColumnName: String): DataFrame
 final def wait(): Unit
 final def wait(arg0: Long, arg1: Int): Unit
 final def wait(arg0: Long): Unit
 def where(conditionExpr: String): Dataset[T]
 def where(condition: Column): Dataset[T]
 def withColumn(colName: String, col: Column): DataFrame
 def withColumnRenamed(existingName: String, newName: String): DataFrame
 def withColumns(colsMap: Map[String, Column]): DataFrame
 def withColumns(colsMap: Map[String, Column]): DataFrame
 def withColumnsRenamed(colsMap: Map[String, String]): DataFrame
 def withColumnsRenamed(colsMap: Map[String, String]): DataFrame
 def withMetadata(columnName: String, metadata: Metadata): DataFrame
 def withWatermark(eventTime: String, delayThreshold: String): Dataset[T]
 def write: DataFrameWriter[T]
 def writeStream: DataStreamWriter[T]
 def writeTo(table: String): DataFrameWriterV2[T]

Deprecated Value Members

 def explode[A, B](inputColumn: String, outputColumn: String)(f: (A) ⇒ TraversableOnce[B])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[B]): DataFrame
 def explode[A <: Product](input: Column*)(f: (Row) ⇒ TraversableOnce[A])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[A]): DataFrame
 def registerTempTable(tableName: String): Unit

Spark 3.5.5 ScalaDoc - org.apache.spark.sql.Dataset (original) (raw)

class Dataset[T] extends Serializable

Instance Constructors

Value Members

Deprecated Value Members

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Actions

Basic Dataset functions

streaming

Typed transformations

Untyped transformations

Ungrouped