object OrcUtils extends Logging
- Alphabetic
- By Inheritance
- OrcUtils
- Logging
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
addSparkVersionMetadata(writer: Writer): Unit
Add a metadata specifying Spark version.
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native() @HotSpotIntrinsicCandidate()
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- val extensionsForCompressionCodecNames: Map[String, String]
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native() @HotSpotIntrinsicCandidate()
- def inferSchema(sparkSession: SparkSession, files: Seq[FileStatus], options: Map[String, String]): Option[StructType]
-
def
initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
- def listOrcFiles(pathStr: String, conf: Configuration): Seq[Path]
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
def
orcResultSchemaString(canPruneCols: Boolean, dataSchema: StructType, resultSchema: StructType, partitionSchema: StructType, conf: Configuration): String
Returns the result schema to read from ORC file.
Returns the result schema to read from ORC file. In addition, It sets the schema string to 'orc.mapred.input.schema' so ORC reader can use later.
- canPruneCols
Flag to decide whether pruned cols schema is send to resultSchema or to send the entire dataSchema to resultSchema.
- dataSchema
Schema of the orc files.
- resultSchema
Result data schema created after pruning cols.
- partitionSchema
Schema of partitions.
- conf
Hadoop Configuration.
- returns
Returns the result schema as string.
-
def
orcTypeDescriptionString(dt: DataType): String
Given a
StructTypeobject, this methods converts it to corresponding string representation in ORC. - def readCatalystSchema(file: Path, conf: Configuration, ignoreCorruptFiles: Boolean): Option[StructType]
-
def
readOrcSchemasInParallel(files: Seq[FileStatus], conf: Configuration, ignoreCorruptFiles: Boolean): Seq[StructType]
Reads ORC file schemas in multi-threaded manner, using native version of ORC.
Reads ORC file schemas in multi-threaded manner, using native version of ORC. This is visible for testing.
- def readSchema(sparkSession: SparkSession, files: Seq[FileStatus], options: Map[String, String]): Option[StructType]
- def readSchema(file: Path, conf: Configuration, ignoreCorruptFiles: Boolean): Option[TypeDescription]
-
def
requestedColumnIds(isCaseSensitive: Boolean, dataSchema: StructType, requiredSchema: StructType, reader: Reader, conf: Configuration): Option[(Array[Int], Boolean)]
- returns
Returns the combination of requested column ids from the given ORC file and boolean flag to find if the pruneCols is allowed or not. Requested Column id can be -1, which means the requested column doesn't exist in the ORC file. Returns None if the given ORC file is empty.
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
Deprecated Value Members
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] ) @Deprecated
- Deprecated