Spark 3.5.5 ScalaDoc - org.apache.spark.sql.Dataset (original) (raw)

class Dataset[T] extends Serializable

Ordering

  1. Grouped
  2. Alphabetic
  3. By Inheritance

Inherited

  1. Dataset

  2. Serializable

  3. Serializable

  4. AnyRef

  5. Any

  6. Hide All

  7. Show All

Instance Constructors

  1. new Dataset(sqlContext: SQLContext, logicalPlan: LogicalPlan, encoder: Encoder[T])
  2. new Dataset(sparkSession: SparkSession, logicalPlan: LogicalPlan, encoder: Encoder[T])

Value Members

  1. final def !=(arg0: Any): Boolean
  2. final def ##(): Int
  3. final def ==(arg0: Any): Boolean
  4. def agg(expr: Column, exprs: Column*): DataFrame
  5. def agg(exprs: Map[String, String]): DataFrame
  6. def agg(exprs: Map[String, String]): DataFrame
  7. def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame
  8. def alias(alias: Symbol): Dataset[T]
  9. def alias(alias: String): Dataset[T]
  10. def apply(colName: String): Column
  11. def as(alias: Symbol): Dataset[T]
  12. def as(alias: String): Dataset[T]
  13. def as[U](implicit arg0: Encoder[U]): Dataset[U]
  14. final def asInstanceOf[T0]: T0
  15. def cache(): Dataset.this.type
  16. def checkpoint(eager: Boolean): Dataset[T]
  17. def checkpoint(): Dataset[T]
  18. def clone(): AnyRef
  19. def coalesce(numPartitions: Int): Dataset[T]
  20. def col(colName: String): Column
  21. def colRegex(colName: String): Column
  22. def collect(): Array[T]
  23. def collectAsList(): List[T]
  24. def columns: Array[String]
  25. def count(): Long
  26. def createGlobalTempView(viewName: String): Unit
  27. def createOrReplaceGlobalTempView(viewName: String): Unit
  28. def createOrReplaceTempView(viewName: String): Unit
  29. def createTempView(viewName: String): Unit
  30. def crossJoin(right: Dataset[_]): DataFrame
  31. def cube(col1: String, cols: String*): RelationalGroupedDataset
  32. def cube(cols: Column*): RelationalGroupedDataset
  33. def describe(cols: String*): DataFrame
  34. def distinct(): Dataset[T]
  35. def drop(col: Column, cols: Column*): DataFrame
  36. def drop(col: Column): DataFrame
  37. def drop(colNames: String*): DataFrame
  38. def drop(colName: String): DataFrame
  39. def dropDuplicates(col1: String, cols: String*): Dataset[T]
  40. def dropDuplicates(colNames: Array[String]): Dataset[T]
  41. def dropDuplicates(colNames: Seq[String]): Dataset[T]
  42. def dropDuplicates(): Dataset[T]
  43. def dropDuplicatesWithinWatermark(col1: String, cols: String*): Dataset[T]
  44. def dropDuplicatesWithinWatermark(colNames: Array[String]): Dataset[T]
  45. def dropDuplicatesWithinWatermark(colNames: Seq[String]): Dataset[T]
  46. def dropDuplicatesWithinWatermark(): Dataset[T]
  47. def dtypes: Array[(String, String)]
  48. val encoder: Encoder[T]
  49. final def eq(arg0: AnyRef): Boolean
  50. def equals(arg0: Any): Boolean
  51. def except(other: Dataset[T]): Dataset[T]
  52. def exceptAll(other: Dataset[T]): Dataset[T]
  53. def explain(): Unit
  54. def explain(extended: Boolean): Unit
  55. def explain(mode: String): Unit
  56. def filter(func: FilterFunction[T]): Dataset[T]
  57. def filter(func: (T) ⇒ Boolean): Dataset[T]
  58. def filter(conditionExpr: String): Dataset[T]
  59. def filter(condition: Column): Dataset[T]
  60. def finalize(): Unit
  61. def first(): T
  62. def flatMap[U](f: FlatMapFunction[T, U], encoder: Encoder[U]): Dataset[U]
  63. def flatMap[U](func: (T) ⇒ TraversableOnce[U])(implicit arg0: Encoder[U]): Dataset[U]
  64. def foreach(func: ForeachFunction[T]): Unit
  65. def foreach(f: (T) ⇒ Unit): Unit
  66. def foreachPartition(func: ForeachPartitionFunction[T]): Unit
  67. def foreachPartition(f: (Iterator[T]) ⇒ Unit): Unit
  68. final def getClass(): Class[_]
  69. def groupBy(col1: String, cols: String*): RelationalGroupedDataset
  70. def groupBy(cols: Column*): RelationalGroupedDataset
  71. def groupByKey[K](func: MapFunction[T, K], encoder: Encoder[K]): KeyValueGroupedDataset[K, T]
  72. def groupByKey[K](func: (T) ⇒ K)(implicit arg0: Encoder[K]): KeyValueGroupedDataset[K, T]
  73. def hashCode(): Int
  74. def head(): T
  75. def head(n: Int): Array[T]
  76. def hint(name: String, parameters: Any*): Dataset[T]
  77. def inputFiles: Array[String]
  78. def intersect(other: Dataset[T]): Dataset[T]
  79. def intersectAll(other: Dataset[T]): Dataset[T]
  80. def isEmpty: Boolean
  81. final def isInstanceOf[T0]: Boolean
  82. def isLocal: Boolean
  83. def isStreaming: Boolean
  84. def javaRDD: JavaRDD[T]
  85. def join(right: Dataset[_], joinExprs: Column, joinType: String): DataFrame
  86. def join(right: Dataset[_], joinExprs: Column): DataFrame
  87. def join(right: Dataset[_], usingColumns: Seq[String], joinType: String): DataFrame
  88. def join(right: Dataset[_], usingColumns: Array[String], joinType: String): DataFrame
  89. def join(right: Dataset[_], usingColumn: String, joinType: String): DataFrame
  90. def join(right: Dataset[_], usingColumns: Seq[String]): DataFrame
  91. def join(right: Dataset[_], usingColumns: Array[String]): DataFrame
  92. def join(right: Dataset[_], usingColumn: String): DataFrame
  93. def join(right: Dataset[_]): DataFrame
  94. def joinWith[U](other: Dataset[U], condition: Column): Dataset[(T, U)]
  95. def joinWith[U](other: Dataset[U], condition: Column, joinType: String): Dataset[(T, U)]
  96. def limit(n: Int): Dataset[T]
  97. def localCheckpoint(eager: Boolean): Dataset[T]
  98. def localCheckpoint(): Dataset[T]
  99. def map[U](func: MapFunction[T, U], encoder: Encoder[U]): Dataset[U]
  100. def map[U](func: (T) ⇒ U)(implicit arg0: Encoder[U]): Dataset[U]
  101. def mapPartitions[U](f: MapPartitionsFunction[T, U], encoder: Encoder[U]): Dataset[U]
  102. def mapPartitions[U](func: (Iterator[T]) ⇒ Iterator[U])(implicit arg0: Encoder[U]): Dataset[U]
  103. def melt(ids: Array[Column], variableColumnName: String, valueColumnName: String): DataFrame
  104. def melt(ids: Array[Column], values: Array[Column], variableColumnName: String, valueColumnName: String): DataFrame
  105. def metadataColumn(colName: String): Column
  106. def na: DataFrameNaFunctions
  107. final def ne(arg0: AnyRef): Boolean
  108. final def notify(): Unit
  109. final def notifyAll(): Unit
  110. def observe(observation: Observation, expr: Column, exprs: Column*): Dataset[T]
  111. def observe(name: String, expr: Column, exprs: Column*): Dataset[T]
  112. def offset(n: Int): Dataset[T]
  113. def orderBy(sortExprs: Column*): Dataset[T]
  114. def orderBy(sortCol: String, sortCols: String*): Dataset[T]
  115. def persist(newLevel: StorageLevel): Dataset.this.type
  116. def persist(): Dataset.this.type
  117. def printSchema(level: Int): Unit
  118. def printSchema(): Unit
  119. val queryExecution: QueryExecution
  120. def randomSplit(weights: Array[Double]): Array[Dataset[T]]
  121. def randomSplit(weights: Array[Double], seed: Long): Array[Dataset[T]]
  122. def randomSplitAsList(weights: Array[Double], seed: Long): List[Dataset[T]]
  123. lazy val rdd: RDD[T]
  124. def reduce(func: ReduceFunction[T]): T
  125. def reduce(func: (T, T) ⇒ T): T
  126. def repartition(partitionExprs: Column*): Dataset[T]
  127. def repartition(numPartitions: Int, partitionExprs: Column*): Dataset[T]
  128. def repartition(numPartitions: Int): Dataset[T]
  129. def repartitionByRange(partitionExprs: Column*): Dataset[T]
  130. def repartitionByRange(numPartitions: Int, partitionExprs: Column*): Dataset[T]
  131. def rollup(col1: String, cols: String*): RelationalGroupedDataset
  132. def rollup(cols: Column*): RelationalGroupedDataset
  133. def sameSemantics(other: Dataset[T]): Boolean
  134. def sample(withReplacement: Boolean, fraction: Double): Dataset[T]
  135. def sample(withReplacement: Boolean, fraction: Double, seed: Long): Dataset[T]
  136. def sample(fraction: Double): Dataset[T]
  137. def sample(fraction: Double, seed: Long): Dataset[T]
  138. def schema: StructType
  139. def select[U1, U2, U3, U4, U5](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], c3: TypedColumn[T, U3], c4: TypedColumn[T, U4], c5: TypedColumn[T, U5]): Dataset[(U1, U2, U3, U4, U5)]
  140. def select[U1, U2, U3, U4](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], c3: TypedColumn[T, U3], c4: TypedColumn[T, U4]): Dataset[(U1, U2, U3, U4)]
  141. def select[U1, U2, U3](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], c3: TypedColumn[T, U3]): Dataset[(U1, U2, U3)]
  142. def select[U1, U2](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2]): Dataset[(U1, U2)]
  143. def select[U1](c1: TypedColumn[T, U1]): Dataset[U1]
  144. def select(col: String, cols: String*): DataFrame
  145. def select(cols: Column*): DataFrame
  146. def selectExpr(exprs: String*): DataFrame
  147. def selectUntyped(columns: TypedColumn[_, _]*): Dataset[_]
  148. def semanticHash(): Int
  149. def show(numRows: Int, truncate: Int, vertical: Boolean): Unit
  150. def show(numRows: Int, truncate: Int): Unit
  151. def show(numRows: Int, truncate: Boolean): Unit
  152. def show(truncate: Boolean): Unit
  153. def show(): Unit
  154. def show(numRows: Int): Unit
  155. def sort(sortExprs: Column*): Dataset[T]
  156. def sort(sortCol: String, sortCols: String*): Dataset[T]
  157. def sortWithinPartitions(sortExprs: Column*): Dataset[T]
  158. def sortWithinPartitions(sortCol: String, sortCols: String*): Dataset[T]
  159. lazy val sparkSession: SparkSession
  160. lazy val sqlContext: SQLContext
  161. def stat: DataFrameStatFunctions
  162. def storageLevel: StorageLevel
  163. def summary(statistics: String*): DataFrame
  164. final def synchronized[T0](arg0: ⇒ T0): T0
  165. def tail(n: Int): Array[T]
  166. def take(n: Int): Array[T]
  167. def takeAsList(n: Int): List[T]
  168. def to(schema: StructType): DataFrame
  169. def toDF(colNames: String*): DataFrame
  170. def toDF(): DataFrame
  171. def toJSON: Dataset[String]
  172. def toJavaRDD: JavaRDD[T]
  173. def toLocalIterator(): Iterator[T]
  174. def toString(): String
  175. def transform[U](t: (Dataset[T]) ⇒ Dataset[U]): Dataset[U]
  176. def union(other: Dataset[T]): Dataset[T]
  177. def unionAll(other: Dataset[T]): Dataset[T]
  178. def unionByName(other: Dataset[T], allowMissingColumns: Boolean): Dataset[T]
  179. def unionByName(other: Dataset[T]): Dataset[T]
  180. def unpersist(): Dataset.this.type
  181. def unpersist(blocking: Boolean): Dataset.this.type
  182. def unpivot(ids: Array[Column], variableColumnName: String, valueColumnName: String): DataFrame
  183. def unpivot(ids: Array[Column], values: Array[Column], variableColumnName: String, valueColumnName: String): DataFrame
  184. final def wait(): Unit
  185. final def wait(arg0: Long, arg1: Int): Unit
  186. final def wait(arg0: Long): Unit
  187. def where(conditionExpr: String): Dataset[T]
  188. def where(condition: Column): Dataset[T]
  189. def withColumn(colName: String, col: Column): DataFrame
  190. def withColumnRenamed(existingName: String, newName: String): DataFrame
  191. def withColumns(colsMap: Map[String, Column]): DataFrame
  192. def withColumns(colsMap: Map[String, Column]): DataFrame
  193. def withColumnsRenamed(colsMap: Map[String, String]): DataFrame
  194. def withColumnsRenamed(colsMap: Map[String, String]): DataFrame
  195. def withMetadata(columnName: String, metadata: Metadata): DataFrame
  196. def withWatermark(eventTime: String, delayThreshold: String): Dataset[T]
  197. def write: DataFrameWriter[T]
  198. def writeStream: DataStreamWriter[T]
  199. def writeTo(table: String): DataFrameWriterV2[T]

Deprecated Value Members

  1. def explode[A, B](inputColumn: String, outputColumn: String)(f: (A) ⇒ TraversableOnce[B])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[B]): DataFrame
  2. def explode[A <: Product](input: Column*)(f: (Row) ⇒ TraversableOnce[A])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[A]): DataFrame
  3. def registerTempTable(tableName: String): Unit

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Actions

Basic Dataset functions

streaming

Typed transformations

Untyped transformations

Ungrouped