object ResourceHelper
Helper one-place for IO management. Streams, source and external input should be handled from here
- Alphabetic
- By Inheritance
- ResourceHelper
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Type Members
-
case class
SourceStream(resource: String) extends Product with Serializable
Structure for a SourceStream coming from compiled content
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
copyToLocal(path: String): String
Copies the remote resource to a local temporary folder and returns its absolute path.
Copies the remote resource to a local temporary folder and returns its absolute path.
Currently, file:/, s3:/, hdfs:/ and dbfs:/ are supported.
If the file is already on the local file system just the absolute path will be returned instead.
- path
Path to the resource
- returns
Absolute path to the temporary or local folder of the resource
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
flattenRevertValuesAsKeys(er: ExternalResource): Map[String, String]
For multiple values per keys, this optimizer flattens all values for keys to have constant access
- def getActiveSparkSession: SparkSession
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- def getFileFromPath(pathToFile: String): File
- def getFilesContentBuffer(externalResource: ExternalResource): Seq[Iterator[String]]
- def getResourceFile(path: String): URL
-
def
getResourceStream(path: String): InputStream
NOT thread safe.
NOT thread safe. Do not call from executors.
- def getSparkSessionWithS3(awsAccessKeyId: String, awsSecretAccessKey: String, hadoopAwsVersion: String = ConfigHelper.hadoopAwsVersion, AwsJavaSdkVersion: String = ConfigHelper.awsJavaSdkVersion, region: String = "us-east-1", s3Impl: String = ..., pathStyleAccess: Boolean = true, credentialsProvider: String = "TemporaryAWSCredentialsProvider", awsSessionToken: Option[String] = None): SparkSession
- def getWordCount(externalResource: ExternalResource, wordCount: Map[String, Long] = ..., pipeline: Option[PipelineModel] = None): Map[String, Long]
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
- def listLocalFiles(path: String): List[File]
- def listResourceDirectory(path: String): Seq[String]
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- def parseKeyArrayValues(externalResource: ExternalResource): Map[String, Array[Float]]
- def parseKeyListValues(externalResource: ExternalResource): Map[String, List[String]]
-
def
parseKeyValueText(er: ExternalResource): Map[String, String]
General purpose key value parser from source Currently read only text files
-
def
parseLines(er: ExternalResource): Array[String]
General purpose line parser from source Currently read only text files
-
def
parseLinesIterator(er: ExternalResource): Seq[Iterator[String]]
General purpose line parser from source Currently read only text files
-
def
parseTupleSentences(er: ExternalResource): Array[TaggedSentence]
General purpose tuple parser from source Currently read only text files
- def parseTupleSentencesDS(er: ExternalResource): Dataset[TaggedSentence]
-
def
parseTupleText(er: ExternalResource): Array[(String, String)]
General purpose tuple parser from source Currently read only text files
-
def
readSparkDataFrame(er: ExternalResource): DataFrame
General purpose read saved Parquet Currently read only Parquet format
- lazy val spark: SparkSession
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
- def validFile(path: String): Boolean
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()