Spark NLP 6.1.1 ScalaDoc - com.johnsnowlabs.nlp.annotators.classifier.dl.ClassifierEncoder

final def !=(arg0: Any): Boolean

Definition Classes: AnyRef → Any

final def ##(): Int

Definition Classes: AnyRef → Any

final def $[T](param: Param[T]): T

Attributes: protected
Definition Classes: Params

final def ==(arg0: Any): Boolean

Definition Classes: AnyRef → Any

final def asInstanceOf[T0]: T0

Definition Classes: Any

val batchSize: IntParam

Batch size (Default: 64)

def buildDatasetWithLabels(dataset: Dataset[_], inputCols: String): (DataFrame, Array[String])

Attributes: protected

final def clear(param: Param[_]): ClassifierEncoder.this.type

Definition Classes: Params

def clone(): AnyRef

Attributes: protected[lang]
Definition Classes: AnyRef
Annotations: @throws( ... ) @native()

val configProtoBytes: IntArrayParam

ConfigProto from tensorflow, serialized into byte array.

ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()

def copyValues[T <: Params](to: T, extra: ParamMap): T

Attributes: protected
Definition Classes: Params

final def defaultCopy[T <: Params](extra: ParamMap): T

Attributes: protected
Definition Classes: Params

val enableOutputLogs: BooleanParam

Whether to output to annotators log folder (Default: false)

Definition Classes: EvaluationDLParams

final def eq(arg0: AnyRef): Boolean

Definition Classes: AnyRef

def equals(arg0: Any): Boolean

Definition Classes: AnyRef → Any

val evaluationLogExtended: BooleanParam

Whether logs for validation to be extended (Default: false): it displays time and evaluation of each label

Definition Classes: EvaluationDLParams

def explainParam(param: Param[_]): String

Definition Classes: Params

def explainParams(): String

Definition Classes: Params

def extractInputs(encoder: ClassifierDatasetEncoder, dataframe: DataFrame): (Array[Array[Float]], Array[String])

Attributes: protected

final def extractParamMap(): ParamMap

Definition Classes: Params

final def extractParamMap(extra: ParamMap): ParamMap

Definition Classes: Params

def finalize(): Unit

Attributes: protected[lang]
Definition Classes: AnyRef
Annotations: @throws( classOf[java.lang.Throwable] )

final def get[T](param: Param[T]): Option[T]

Definition Classes: Params

def getBatchSize: Int

Batch size (Default: 64)

final def getClass(): Class[_]

Definition Classes: AnyRef → Any
Annotations: @native()

def getConfigProtoBytes: Option[Array[Byte]]

Tensorflow config Protobytes passed to the TF session

final def getDefault[T](param: Param[T]): Option[T]

Definition Classes: Params

def getEnableOutputLogs: Boolean

Whether to output to annotators log folder (Default: false)

Definition Classes: EvaluationDLParams

def getLabelColumn: String

Column with label per each document

def getLr: Float

Learning Rate (Default: 5e-3f)

def getMaxEpochs: Int

Maximum number of epochs to train (Default: 10)

final def getOrDefault[T](param: Param[T]): T

Definition Classes: Params

def getOutputLogsPath: String

Folder path to save training logs (Default: "")

Definition Classes: EvaluationDLParams

def getParam(paramName: String): Param[Any]

Definition Classes: Params

def getRandomSeed: Int

Random seed

def getValidationSplit: Float

Choose the proportion of training dataset to be validated against the model on each Epoch (Default: 0.0f).

Choose the proportion of training dataset to be validated against the model on each Epoch (Default: 0.0f). The value should be between 0.0 and 1.0 and by default it is 0.0 and off.

Definition Classes: EvaluationDLParams

final def hasDefault[T](param: Param[T]): Boolean

Definition Classes: Params

def hasParam(paramName: String): Boolean

Definition Classes: Params

def hashCode(): Int

Definition Classes: AnyRef → Any
Annotations: @native()

final def isDefined(param: Param[_]): Boolean

Definition Classes: Params

final def isInstanceOf[T0]: Boolean

Definition Classes: Any

final def isSet(param: Param[_]): Boolean

Definition Classes: Params

val labelColumn: Param[String]

Column with label per each document

val lr: FloatParam

Learning Rate (Default: 5e-3f)

val maxEpochs: IntParam

Maximum number of epochs to train (Default: 10)

final def ne(arg0: AnyRef): Boolean

Definition Classes: AnyRef

final def notify(): Unit

Definition Classes: AnyRef
Annotations: @native()

final def notifyAll(): Unit

Definition Classes: AnyRef
Annotations: @native()

val outputLogsPath: Param[String]

Folder path to save training logs (Default: "")

Definition Classes: EvaluationDLParams

lazy val params: Array[Param[_]]

Definition Classes: Params

val randomSeed: IntParam

Random seed for shuffling the dataset

final def set(paramPair: ParamPair[_]): ClassifierEncoder.this.type

Attributes: protected
Definition Classes: Params

final def set(param: String, value: Any): ClassifierEncoder.this.type

Attributes: protected
Definition Classes: Params

final def set[T](param: Param[T], value: T): ClassifierEncoder.this.type

Definition Classes: Params

def setBatchSize(batch: Int): ClassifierEncoder.this.type

Batch size (Default: 64)

def setConfigProtoBytes(bytes: Array[Int]): ClassifierEncoder.this.type

Tensorflow config Protobytes passed to the TF session

final def setDefault(paramPairs: ParamPair[_]*): ClassifierEncoder.this.type

Attributes: protected
Definition Classes: Params

final def setDefault[T](param: Param[T], value: T): ClassifierEncoder.this.type

Attributes: protected[org.apache.spark.ml]
Definition Classes: Params

def setEnableOutputLogs(enableOutputLogs: Boolean): ClassifierEncoder.this.type

Whether to output to annotators log folder (Default: false)

Definition Classes: EvaluationDLParams

def setEvaluationLogExtended(evaluationLogExtended: Boolean): ClassifierEncoder.this.type

Whether logs for validation to be extended: it displays time and evaluation of each label.

Whether logs for validation to be extended: it displays time and evaluation of each label. Default is false.

Definition Classes: EvaluationDLParams

def setLabelColumn(column: String): ClassifierEncoder.this.type

Column with label per each document

def setLr(lr: Float): ClassifierEncoder.this.type

Learning Rate (Default: 5e-3f)

def setMaxEpochs(epochs: Int): ClassifierEncoder.this.type

Maximum number of epochs to train (Default: 10)

def setOutputLogsPath(path: String): ClassifierEncoder.this.type

Folder path to save training logs (Default: "")

Definition Classes: EvaluationDLParams

def setRandomSeed(seed: Int): ClassifierEncoder.this.type

Random seed

def setTestDataset(er: ExternalResource): ClassifierEncoder.this.type

ExternalResource to a parquet file of a test dataset.

ExternalResource to a parquet file of a test dataset. If set, it is used to calculate statistics on it during training.

When using an ExternalResource, only parquet files are accepted for this function.

The parquet file must be a dataframe that has the same columns as the model that is being trained. For example, if the model needs as input DOCUMENT, TOKEN, WORD_EMBEDDINGS (Features) and NAMED_ENTITY (label) then these columns also need to be present while saving the dataframe. The pre-processing steps for the training dataframe should also be applied to the test dataframe.

An example on how to create such a parquet file could be:

// assuming preProcessingPipeline
val Array(train, test) = data.randomSplit(Array(0.8, 0.2))

preProcessingPipeline
  .fit(test)
  .transform(test)
  .write
  .mode("overwrite")
  .parquet("test_data")

annotator.setTestDataset("test_data")

Definition Classes: EvaluationDLParams

def setTestDataset(path: String, readAs: Format = ReadAs.SPARK, options: Map[String, String] = Map("format" -> "parquet")): ClassifierEncoder.this.type

Path to a parquet file of a test dataset.

Path to a parquet file of a test dataset. If set, it is used to calculate statistics on it during training.

The parquet file must be a dataframe that has the same columns as the model that is being trained. For example, if the model needs as input DOCUMENT, TOKEN, WORD_EMBEDDINGS (Features) and NAMED_ENTITY (label) then these columns also need to be present while saving the dataframe. The pre-processing steps for the training dataframe should also be applied to the test dataframe.

An example on how to create such a parquet file could be:

// assuming preProcessingPipeline
val Array(train, test) = data.randomSplit(Array(0.8, 0.2))

preProcessingPipeline
  .fit(test)
  .transform(test)
  .write
  .mode("overwrite")
  .parquet("test_data")

annotator.setTestDataset("test_data")

Definition Classes: EvaluationDLParams

def setValidationSplit(validationSplit: Float): ClassifierEncoder.this.type

Choose the proportion of training dataset to be validated against the model on each Epoch (Default: 0.0f).

Choose the proportion of training dataset to be validated against the model on each Epoch (Default: 0.0f). The value should be between 0.0 and 1.0 and by default it is 0.0 and off.

Definition Classes: EvaluationDLParams

def setVerbose(verbose: Level): ClassifierEncoder.this.type

Level of verbosity during training (Default: Verbose.Silent.id)

Definition Classes: EvaluationDLParams

def setVerbose(verbose: Int): ClassifierEncoder.this.type

Level of verbosity during training (Default: Verbose.Silent.id)

Definition Classes: EvaluationDLParams

final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes: AnyRef

val testDataset: ExternalResourceParam

Path to a parquet file of a test dataset.

Path to a parquet file of a test dataset. If set, it is used to calculate statistics on it during training.

Definition Classes: EvaluationDLParams

def toString(): String

Definition Classes: Identifiable → AnyRef → Any

val validationSplit: FloatParam

Choose the proportion of training dataset to be validated against the model on each Epoch (Default: 0.0f).

Choose the proportion of training dataset to be validated against the model on each Epoch (Default: 0.0f). The value should be between 0.0 and 1.0 and by default it is 0.0 and off.

Definition Classes: EvaluationDLParams

val verbose: IntParam

Level of verbosity during training (Default: Verbose.Silent.id)

Definition Classes: EvaluationDLParams

final def wait(): Unit

Definition Classes: AnyRef
Annotations: @throws( ... )

final def wait(arg0: Long, arg1: Int): Unit

Definition Classes: AnyRef
Annotations: @throws( ... )

final def wait(arg0: Long): Unit

Definition Classes: AnyRef
Annotations: @throws( ... ) @native()

Packages

ClassifierEncoder

trait ClassifierEncoder extends EvaluationDLParams

Abstract Value Members

Concrete Value Members

Inherited from EvaluationDLParams

Inherited from Params

Inherited from Serializable

Inherited from Serializable

Inherited from Identifiable

Inherited from AnyRef

Inherited from Any

getParam

param

setParam

Ungrouped

Packages

ClassifierEncoder 

trait ClassifierEncoder extends EvaluationDLParams

Abstract Value Members

Concrete Value Members

Inherited from EvaluationDLParams

Inherited from Params

Inherited from Serializable

Inherited from Serializable

Inherited from Identifiable

Inherited from AnyRef

Inherited from Any

getParam

param

setParam

Ungrouped

ClassifierEncoder