com.alpine.plugin.core.spark.utils

SparkRuntimeUtils

class SparkRuntimeUtils extends AnyRef

:: AlpineSdkApi ::

Annotations
@AlpineSdkApi()
Linear Supertypes
AnyRef, Any
Ordering
  1. Alphabetic
  2. By inheritance
Inherited
  1. SparkRuntimeUtils
  2. AnyRef
  3. Any
  1. Hide All
  2. Show all
Learn more about member selection
Visibility
  1. Public
  2. All

Instance Constructors

  1. new SparkRuntimeUtils(sc: SparkContext)

Value Members

  1. final def !=(arg0: AnyRef): Boolean

    Definition Classes
    AnyRef
  2. final def !=(arg0: Any): Boolean

    Definition Classes
    Any
  3. final def ##(): Int

    Definition Classes
    AnyRef → Any
  4. final def ==(arg0: AnyRef): Boolean

    Definition Classes
    AnyRef
  5. final def ==(arg0: Any): Boolean

    Definition Classes
    Any
  6. final def asInstanceOf[T0]: T0

    Definition Classes
    Any
  7. def clone(): AnyRef

    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  8. def convertColumnTypeToSparkSQLDataType(columnType: TypeValue): DataType

    Converts an Alpine specific 'ColumnType' to the corresponding Saprk SQL specific type.

    Converts an Alpine specific 'ColumnType' to the corresponding Saprk SQL specific type. If no match can be found for the type, return a string type rather than throwing an exception. used to define data frame schemas.

    columnType
    returns

  9. def convertSparkSQLDataTypeToColumnType(dataType: DataType): TypeValue

    Converts from a Spark SQL data type to an Alpine-specific column type.

    Converts from a Spark SQL data type to an Alpine-specific column type.

    dataType
    returns

  10. def convertSparkSQLSchemaToTabularSchema(schema: StructType): TabularSchema

    Converts from a Spark SQL schema to the Alpine 'TabularSchema' type.

    Converts from a Spark SQL schema to the Alpine 'TabularSchema' type. The 'TabularSchema' object this method returns can be used to create any of the tabular Alpine IO types (HDFSTabular dataset, dataTable etc.)

    schema

    -a Spark SQL DataFrame schema

    returns

    the equivalent Alpine schema for that dataset

  11. def convertTabularSchemaToSparkSQLSchema(tabularSchema: TabularSchema): StructType

    Convert the Alpine 'TabularSchema' with column names and types to the equivalent Spark SQL data frame header.

    Convert the Alpine 'TabularSchema' with column names and types to the equivalent Spark SQL data frame header.

    tabularSchema

    An Alpine 'TabularSchemaOutline' object with fixed column definitions containing a name and Alpine specific type.

    returns

  12. def deleteFilePathIfExists(outputPathStr: String): AnyVal

    Checks if the given file path already exists (and would cause a 'PathAlreadyExists' exception when we try to write to it) and deletes the directory to prevent existing results at that path if they do exist.

    Checks if the given file path already exists (and would cause a 'PathAlreadyExists' exception when we try to write to it) and deletes the directory to prevent existing results at that path if they do exist.

    outputPathStr

    - the full HDFS path

    returns

  13. final def eq(arg0: AnyRef): Boolean

    Definition Classes
    AnyRef
  14. def equals(arg0: Any): Boolean

    Definition Classes
    AnyRef → Any
  15. def finalize(): Unit

    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  16. final def getClass(): Class[_]

    Definition Classes
    AnyRef → Any
  17. def getDataFrame(dataset: HiveTable): DataFrame

    For use with hive.

    For use with hive. Returns a Spark data frame given a hive table.

  18. def getDataFrame(dataset: HdfsTabularDataset): DataFrame

    Returns a DataFrame from an Alpine HdfsTabularDataset.

    Returns a DataFrame from an Alpine HdfsTabularDataset. The DataFrame's schema will correspond to the column header of the Alpine dataset.

    dataset
    returns

    Spark SQL DataFrame

  19. def hashCode(): Int

    Definition Classes
    AnyRef → Any
  20. final def isInstanceOf[T0]: Boolean

    Definition Classes
    Any
  21. final def ne(arg0: AnyRef): Boolean

    Definition Classes
    AnyRef
  22. final def notify(): Unit

    Definition Classes
    AnyRef
  23. final def notifyAll(): Unit

    Definition Classes
    AnyRef
  24. def saveAsAvro(path: String, dataFrame: DataFrame, sourceOperatorInfo: Option[OperatorInfo], addendum: Map[String, AnyRef] = Map[String, AnyRef]()): HdfsAvroDataset

    Write a DataFrame as an HDFSAvro dataset, and return the an instance of the Alpine HDFSAvroDataset type which contains the 'TabularSchema' definition (created by converting the DataFrame schema) and the path to the to the saved data.

    Write a DataFrame as an HDFSAvro dataset, and return the an instance of the Alpine HDFSAvroDataset type which contains the 'TabularSchema' definition (created by converting the DataFrame schema) and the path to the to the saved data.

    path
    dataFrame
    returns

  25. def saveAsParquet(path: String, dataFrame: DataFrame, sourceOperatorInfo: Option[OperatorInfo], addendum: Map[String, AnyRef] = Map[String, AnyRef]()): HdfsParquetDataset

    Write a DataFrame to HDFS as a Parquet file, and return an instance of the HDFSParquet IO base type which contains the Alpine 'TabularSchema' definition (created by converting the DataFrame schema) and the path to the to the saved data.

    Write a DataFrame to HDFS as a Parquet file, and return an instance of the HDFSParquet IO base type which contains the Alpine 'TabularSchema' definition (created by converting the DataFrame schema) and the path to the to the saved data.

    path
    dataFrame
    returns

  26. def saveAsTSV(path: String, dataFrame: DataFrame, sourceOperatorInfo: Option[OperatorInfo], addendum: Map[String, AnyRef] = Map[String, AnyRef]()): HdfsDelimitedTabularDataset

    Write a DataFrame to HDFS as a Tabular Delimited file, and return an instance of the Alpine HDFSParquet type which contains the Alpine 'TabularSchema' definition (created by converting the DataFrame schema) and the path to the to the saved data.

    Write a DataFrame to HDFS as a Tabular Delimited file, and return an instance of the Alpine HDFSParquet type which contains the Alpine 'TabularSchema' definition (created by converting the DataFrame schema) and the path to the to the saved data.

    path
    dataFrame
    returns

  27. def saveDataFrame(path: String, dataFrame: DataFrame, storageFormat: HdfsStorageFormat, overwrite: Boolean, sourceOperatorInfo: Option[OperatorInfo], addendum: Map[String, AnyRef] = Map[String, AnyRef]()): HdfsTabularDataset

    Save a data frame to a path using the given storage format, and return a corresponding HdfsTabularDataset object that points to the path.

    Save a data frame to a path using the given storage format, and return a corresponding HdfsTabularDataset object that points to the path.

    path

    The path to which we'll save the data frame.

    dataFrame

    The data frame that we want to save.

    storageFormat

    The format that we want to store in.

    overwrite

    Whether to overwrite any existing file at the path.

    sourceOperatorInfo

    Mandatory source operator information to be included in the output object.

    addendum

    Mandatory addendum information to be included in the output object.

    returns

    After saving the data frame, returns an HdfsTabularDataset object.

  28. final def synchronized[T0](arg0: ⇒ T0): T0

    Definition Classes
    AnyRef
  29. def toString(): String

    Definition Classes
    AnyRef → Any
  30. final def wait(): Unit

    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  31. final def wait(arg0: Long, arg1: Int): Unit

    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  32. final def wait(arg0: Long): Unit

    Definition Classes
    AnyRef
    Annotations
    @throws( ... )

Inherited from AnyRef

Inherited from Any

Ungrouped