SparkRuntimeUtils

Instance Constructors

new SparkRuntimeUtils(sc: SparkContext)

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def convertColumnTypeToSparkSQLDataType(columnType: TypeValue): DataType

Converts an Alpine specific 'ColumnType' to the corresponding Saprk SQL specific type.
Converts an Alpine specific 'ColumnType' to the corresponding Saprk SQL specific type. If no match can be found for the type, return a string type rather than throwing an exception. used to define data frame schemas.
columnType
returns
def convertSparkSQLDataTypeToColumnType(dataType: DataType): TypeValue

Converts from a Spark SQL data type to an Alpine-specific column type.
Converts from a Spark SQL data type to an Alpine-specific column type.
dataType
returns
def convertSparkSQLSchemaToTabularSchema(schema: StructType): TabularSchema

Converts from a Spark SQL schema to the Alpine 'TabularSchema' type.
Converts from a Spark SQL schema to the Alpine 'TabularSchema' type. The 'TabularSchema' object this method returns can be used to create any of the tabular Alpine IO types (HDFSTabular dataset, dataTable etc.)
schema
-a Spark SQL DataFrame schema
returns
the equivalent Alpine schema for that dataset
def convertTabularSchemaToSparkSQLSchema(tabularSchema: TabularSchema): StructType

Convert the Alpine 'TabularSchema' with column names and types to the equivalent Spark SQL data frame header.
Convert the Alpine 'TabularSchema' with column names and types to the equivalent Spark SQL data frame header.
tabularSchema
An Alpine 'TabularSchemaOutline' object with fixed column definitions containing a name and Alpine specific type.
returns
def deleteFilePathIfExists(outputPathStr: String): AnyVal

Checks if the given file path already exists (and would cause a 'PathAlreadyExists' exception when we try to write to it) and deletes the directory to prevent existing results at that path if they do exist.
Checks if the given file path already exists (and would cause a 'PathAlreadyExists' exception when we try to write to it) and deletes the directory to prevent existing results at that path if they do exist.
outputPathStr
- the full HDFS path
returns
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getDataFrame(dataset: HiveTable): DataFrame

For use with hive.
For use with hive. Returns a Spark data frame given a hive table.
def getDataFrame(dataset: HdfsTabularDataset): DataFrame

Returns a DataFrame from an Alpine HdfsTabularDataset.
Returns a DataFrame from an Alpine HdfsTabularDataset. The DataFrame's schema will correspond to the column header of the Alpine dataset.
dataset
returns
Spark SQL DataFrame
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def saveAsAvro(path: String, dataFrame: DataFrame, sourceOperatorInfo: Option[OperatorInfo], addendum: Map[String, AnyRef] = Map[String, AnyRef]()): HdfsAvroDataset

Write a DataFrame as an HDFSAvro dataset, and return the an instance of the Alpine HDFSAvroDataset type which contains the 'TabularSchema' definition (created by converting the DataFrame schema) and the path to the to the saved data.
Write a DataFrame as an HDFSAvro dataset, and return the an instance of the Alpine HDFSAvroDataset type which contains the 'TabularSchema' definition (created by converting the DataFrame schema) and the path to the to the saved data.
path
dataFrame
returns
def saveAsParquet(path: String, dataFrame: DataFrame, sourceOperatorInfo: Option[OperatorInfo], addendum: Map[String, AnyRef] = Map[String, AnyRef]()): HdfsParquetDataset

Write a DataFrame to HDFS as a Parquet file, and return an instance of the HDFSParquet IO base type which contains the Alpine 'TabularSchema' definition (created by converting the DataFrame schema) and the path to the to the saved data.
Write a DataFrame to HDFS as a Parquet file, and return an instance of the HDFSParquet IO base type which contains the Alpine 'TabularSchema' definition (created by converting the DataFrame schema) and the path to the to the saved data.
path
dataFrame
returns
def saveAsTSV(path: String, dataFrame: DataFrame, sourceOperatorInfo: Option[OperatorInfo], addendum: Map[String, AnyRef] = Map[String, AnyRef]()): HdfsDelimitedTabularDataset

Write a DataFrame to HDFS as a Tabular Delimited file, and return an instance of the Alpine HDFSParquet type which contains the Alpine 'TabularSchema' definition (created by converting the DataFrame schema) and the path to the to the saved data.
Write a DataFrame to HDFS as a Tabular Delimited file, and return an instance of the Alpine HDFSParquet type which contains the Alpine 'TabularSchema' definition (created by converting the DataFrame schema) and the path to the to the saved data.
path
dataFrame
returns
def saveDataFrame(path: String, dataFrame: DataFrame, storageFormat: HdfsStorageFormat, overwrite: Boolean, sourceOperatorInfo: Option[OperatorInfo], addendum: Map[String, AnyRef] = Map[String, AnyRef]()): HdfsTabularDataset

Save a data frame to a path using the given storage format, and return a corresponding HdfsTabularDataset object that points to the path.
Save a data frame to a path using the given storage format, and return a corresponding HdfsTabularDataset object that points to the path.
path
The path to which we'll save the data frame.
dataFrame
The data frame that we want to save.
storageFormat
The format that we want to store in.
overwrite
Whether to overwrite any existing file at the path.
sourceOperatorInfo
Mandatory source operator information to be included in the output object.
addendum
Mandatory addendum information to be included in the output object.
returns
After saving the data frame, returns an HdfsTabularDataset object.
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

class SparkRuntimeUtils extends AnyRef

Instance Constructors

new SparkRuntimeUtils(sc: SparkContext)

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

def convertColumnTypeToSparkSQLDataType(columnType: TypeValue): DataType

def convertSparkSQLDataTypeToColumnType(dataType: DataType): TypeValue

def convertSparkSQLSchemaToTabularSchema(schema: StructType): TabularSchema

def convertTabularSchemaToSparkSQLSchema(tabularSchema: TabularSchema): StructType

def deleteFilePathIfExists(outputPathStr: String): AnyVal

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def getDataFrame(dataset: HiveTable): DataFrame

def getDataFrame(dataset: HdfsTabularDataset): DataFrame

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def saveAsAvro(path: String, dataFrame: DataFrame, sourceOperatorInfo: Option[OperatorInfo], addendum: Map[String, AnyRef] = Map[String, AnyRef]()): HdfsAvroDataset

def saveAsParquet(path: String, dataFrame: DataFrame, sourceOperatorInfo: Option[OperatorInfo], addendum: Map[String, AnyRef] = Map[String, AnyRef]()): HdfsParquetDataset

def saveAsTSV(path: String, dataFrame: DataFrame, sourceOperatorInfo: Option[OperatorInfo], addendum: Map[String, AnyRef] = Map[String, AnyRef]()): HdfsDelimitedTabularDataset

def saveDataFrame(path: String, dataFrame: DataFrame, storageFormat: HdfsStorageFormat, overwrite: Boolean, sourceOperatorInfo: Option[OperatorInfo], addendum: Map[String, AnyRef] = Map[String, AnyRef]()): HdfsTabularDataset

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from AnyRef

Inherited from Any

Ungrouped