Spark 3.5.5 ScalaDoc - org.apache.spark.sql.Dataset (original) (raw)
class Dataset[T] extends Serializable
Ordering
- Grouped
- Alphabetic
- By Inheritance
Inherited
Dataset
Serializable
Serializable
AnyRef
Any
Hide All
Show All
Instance Constructors
- new Dataset(sqlContext: SQLContext, logicalPlan: LogicalPlan, encoder: Encoder[T])
- new Dataset(sparkSession: SparkSession, logicalPlan: LogicalPlan, encoder: Encoder[T])
Value Members
- final def !=(arg0: Any): Boolean
- final def ##(): Int
- final def ==(arg0: Any): Boolean
- def agg(expr: Column, exprs: Column*): DataFrame
- def agg(exprs: Map[String, String]): DataFrame
- def agg(exprs: Map[String, String]): DataFrame
- def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame
- def alias(alias: Symbol): Dataset[T]
- def alias(alias: String): Dataset[T]
- def apply(colName: String): Column
- def as(alias: Symbol): Dataset[T]
- def as(alias: String): Dataset[T]
- def as[U](implicit arg0: Encoder[U]): Dataset[U]
- final def asInstanceOf[T0]: T0
- def cache(): Dataset.this.type
- def checkpoint(eager: Boolean): Dataset[T]
- def checkpoint(): Dataset[T]
- def clone(): AnyRef
- def coalesce(numPartitions: Int): Dataset[T]
- def col(colName: String): Column
- def colRegex(colName: String): Column
- def collect(): Array[T]
- def collectAsList(): List[T]
- def columns: Array[String]
- def count(): Long
- def createGlobalTempView(viewName: String): Unit
- def createOrReplaceGlobalTempView(viewName: String): Unit
- def createOrReplaceTempView(viewName: String): Unit
- def createTempView(viewName: String): Unit
- def crossJoin(right: Dataset[_]): DataFrame
- def cube(col1: String, cols: String*): RelationalGroupedDataset
- def cube(cols: Column*): RelationalGroupedDataset
- def describe(cols: String*): DataFrame
- def distinct(): Dataset[T]
- def drop(col: Column, cols: Column*): DataFrame
- def drop(col: Column): DataFrame
- def drop(colNames: String*): DataFrame
- def drop(colName: String): DataFrame
- def dropDuplicates(col1: String, cols: String*): Dataset[T]
- def dropDuplicates(colNames: Array[String]): Dataset[T]
- def dropDuplicates(colNames: Seq[String]): Dataset[T]
- def dropDuplicates(): Dataset[T]
- def dropDuplicatesWithinWatermark(col1: String, cols: String*): Dataset[T]
- def dropDuplicatesWithinWatermark(colNames: Array[String]): Dataset[T]
- def dropDuplicatesWithinWatermark(colNames: Seq[String]): Dataset[T]
- def dropDuplicatesWithinWatermark(): Dataset[T]
- def dtypes: Array[(String, String)]
- val encoder: Encoder[T]
- final def eq(arg0: AnyRef): Boolean
- def equals(arg0: Any): Boolean
- def except(other: Dataset[T]): Dataset[T]
- def exceptAll(other: Dataset[T]): Dataset[T]
- def explain(): Unit
- def explain(extended: Boolean): Unit
- def explain(mode: String): Unit
- def filter(func: FilterFunction[T]): Dataset[T]
- def filter(func: (T) ⇒ Boolean): Dataset[T]
- def filter(conditionExpr: String): Dataset[T]
- def filter(condition: Column): Dataset[T]
- def finalize(): Unit
- def first(): T
- def flatMap[U](f: FlatMapFunction[T, U], encoder: Encoder[U]): Dataset[U]
- def flatMap[U](func: (T) ⇒ TraversableOnce[U])(implicit arg0: Encoder[U]): Dataset[U]
- def foreach(func: ForeachFunction[T]): Unit
- def foreach(f: (T) ⇒ Unit): Unit
- def foreachPartition(func: ForeachPartitionFunction[T]): Unit
- def foreachPartition(f: (Iterator[T]) ⇒ Unit): Unit
- final def getClass(): Class[_]
- def groupBy(col1: String, cols: String*): RelationalGroupedDataset
- def groupBy(cols: Column*): RelationalGroupedDataset
- def groupByKey[K](func: MapFunction[T, K], encoder: Encoder[K]): KeyValueGroupedDataset[K, T]
- def groupByKey[K](func: (T) ⇒ K)(implicit arg0: Encoder[K]): KeyValueGroupedDataset[K, T]
- def hashCode(): Int
- def head(): T
- def head(n: Int): Array[T]
- def hint(name: String, parameters: Any*): Dataset[T]
- def inputFiles: Array[String]
- def intersect(other: Dataset[T]): Dataset[T]
- def intersectAll(other: Dataset[T]): Dataset[T]
- def isEmpty: Boolean
- final def isInstanceOf[T0]: Boolean
- def isLocal: Boolean
- def isStreaming: Boolean
- def javaRDD: JavaRDD[T]
- def join(right: Dataset[_], joinExprs: Column, joinType: String): DataFrame
- def join(right: Dataset[_], joinExprs: Column): DataFrame
- def join(right: Dataset[_], usingColumns: Seq[String], joinType: String): DataFrame
- def join(right: Dataset[_], usingColumns: Array[String], joinType: String): DataFrame
- def join(right: Dataset[_], usingColumn: String, joinType: String): DataFrame
- def join(right: Dataset[_], usingColumns: Seq[String]): DataFrame
- def join(right: Dataset[_], usingColumns: Array[String]): DataFrame
- def join(right: Dataset[_], usingColumn: String): DataFrame
- def join(right: Dataset[_]): DataFrame
- def joinWith[U](other: Dataset[U], condition: Column): Dataset[(T, U)]
- def joinWith[U](other: Dataset[U], condition: Column, joinType: String): Dataset[(T, U)]
- def limit(n: Int): Dataset[T]
- def localCheckpoint(eager: Boolean): Dataset[T]
- def localCheckpoint(): Dataset[T]
- def map[U](func: MapFunction[T, U], encoder: Encoder[U]): Dataset[U]
- def map[U](func: (T) ⇒ U)(implicit arg0: Encoder[U]): Dataset[U]
- def mapPartitions[U](f: MapPartitionsFunction[T, U], encoder: Encoder[U]): Dataset[U]
- def mapPartitions[U](func: (Iterator[T]) ⇒ Iterator[U])(implicit arg0: Encoder[U]): Dataset[U]
- def melt(ids: Array[Column], variableColumnName: String, valueColumnName: String): DataFrame
- def melt(ids: Array[Column], values: Array[Column], variableColumnName: String, valueColumnName: String): DataFrame
- def metadataColumn(colName: String): Column
- def na: DataFrameNaFunctions
- final def ne(arg0: AnyRef): Boolean
- final def notify(): Unit
- final def notifyAll(): Unit
- def observe(observation: Observation, expr: Column, exprs: Column*): Dataset[T]
- def observe(name: String, expr: Column, exprs: Column*): Dataset[T]
- def offset(n: Int): Dataset[T]
- def orderBy(sortExprs: Column*): Dataset[T]
- def orderBy(sortCol: String, sortCols: String*): Dataset[T]
- def persist(newLevel: StorageLevel): Dataset.this.type
- def persist(): Dataset.this.type
- def printSchema(level: Int): Unit
- def printSchema(): Unit
- val queryExecution: QueryExecution
- def randomSplit(weights: Array[Double]): Array[Dataset[T]]
- def randomSplit(weights: Array[Double], seed: Long): Array[Dataset[T]]
- def randomSplitAsList(weights: Array[Double], seed: Long): List[Dataset[T]]
- lazy val rdd: RDD[T]
- def reduce(func: ReduceFunction[T]): T
- def reduce(func: (T, T) ⇒ T): T
- def repartition(partitionExprs: Column*): Dataset[T]
- def repartition(numPartitions: Int, partitionExprs: Column*): Dataset[T]
- def repartition(numPartitions: Int): Dataset[T]
- def repartitionByRange(partitionExprs: Column*): Dataset[T]
- def repartitionByRange(numPartitions: Int, partitionExprs: Column*): Dataset[T]
- def rollup(col1: String, cols: String*): RelationalGroupedDataset
- def rollup(cols: Column*): RelationalGroupedDataset
- def sameSemantics(other: Dataset[T]): Boolean
- def sample(withReplacement: Boolean, fraction: Double): Dataset[T]
- def sample(withReplacement: Boolean, fraction: Double, seed: Long): Dataset[T]
- def sample(fraction: Double): Dataset[T]
- def sample(fraction: Double, seed: Long): Dataset[T]
- def schema: StructType
- def select[U1, U2, U3, U4, U5](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], c3: TypedColumn[T, U3], c4: TypedColumn[T, U4], c5: TypedColumn[T, U5]): Dataset[(U1, U2, U3, U4, U5)]
- def select[U1, U2, U3, U4](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], c3: TypedColumn[T, U3], c4: TypedColumn[T, U4]): Dataset[(U1, U2, U3, U4)]
- def select[U1, U2, U3](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], c3: TypedColumn[T, U3]): Dataset[(U1, U2, U3)]
- def select[U1, U2](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2]): Dataset[(U1, U2)]
- def select[U1](c1: TypedColumn[T, U1]): Dataset[U1]
- def select(col: String, cols: String*): DataFrame
- def select(cols: Column*): DataFrame
- def selectExpr(exprs: String*): DataFrame
- def selectUntyped(columns: TypedColumn[_, _]*): Dataset[_]
- def semanticHash(): Int
- def show(numRows: Int, truncate: Int, vertical: Boolean): Unit
- def show(numRows: Int, truncate: Int): Unit
- def show(numRows: Int, truncate: Boolean): Unit
- def show(truncate: Boolean): Unit
- def show(): Unit
- def show(numRows: Int): Unit
- def sort(sortExprs: Column*): Dataset[T]
- def sort(sortCol: String, sortCols: String*): Dataset[T]
- def sortWithinPartitions(sortExprs: Column*): Dataset[T]
- def sortWithinPartitions(sortCol: String, sortCols: String*): Dataset[T]
- lazy val sparkSession: SparkSession
- lazy val sqlContext: SQLContext
- def stat: DataFrameStatFunctions
- def storageLevel: StorageLevel
- def summary(statistics: String*): DataFrame
- final def synchronized[T0](arg0: ⇒ T0): T0
- def tail(n: Int): Array[T]
- def take(n: Int): Array[T]
- def takeAsList(n: Int): List[T]
- def to(schema: StructType): DataFrame
- def toDF(colNames: String*): DataFrame
- def toDF(): DataFrame
- def toJSON: Dataset[String]
- def toJavaRDD: JavaRDD[T]
- def toLocalIterator(): Iterator[T]
- def toString(): String
- def transform[U](t: (Dataset[T]) ⇒ Dataset[U]): Dataset[U]
- def union(other: Dataset[T]): Dataset[T]
- def unionAll(other: Dataset[T]): Dataset[T]
- def unionByName(other: Dataset[T], allowMissingColumns: Boolean): Dataset[T]
- def unionByName(other: Dataset[T]): Dataset[T]
- def unpersist(): Dataset.this.type
- def unpersist(blocking: Boolean): Dataset.this.type
- def unpivot(ids: Array[Column], variableColumnName: String, valueColumnName: String): DataFrame
- def unpivot(ids: Array[Column], values: Array[Column], variableColumnName: String, valueColumnName: String): DataFrame
- final def wait(): Unit
- final def wait(arg0: Long, arg1: Int): Unit
- final def wait(arg0: Long): Unit
- def where(conditionExpr: String): Dataset[T]
- def where(condition: Column): Dataset[T]
- def withColumn(colName: String, col: Column): DataFrame
- def withColumnRenamed(existingName: String, newName: String): DataFrame
- def withColumns(colsMap: Map[String, Column]): DataFrame
- def withColumns(colsMap: Map[String, Column]): DataFrame
- def withColumnsRenamed(colsMap: Map[String, String]): DataFrame
- def withColumnsRenamed(colsMap: Map[String, String]): DataFrame
- def withMetadata(columnName: String, metadata: Metadata): DataFrame
- def withWatermark(eventTime: String, delayThreshold: String): Dataset[T]
- def write: DataFrameWriter[T]
- def writeStream: DataStreamWriter[T]
- def writeTo(table: String): DataFrameWriterV2[T]
Deprecated Value Members
- def explode[A, B](inputColumn: String, outputColumn: String)(f: (A) ⇒ TraversableOnce[B])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[B]): DataFrame
- def explode[A <: Product](input: Column*)(f: (Row) ⇒ TraversableOnce[A])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[A]): DataFrame
- def registerTempTable(tableName: String): Unit