Source code for sparknlp.annotator.param.evaluation_dl_params

#  Copyright 2017-2022 John Snow Labs
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

from sparknlp.common import *
from sparknlp.internal import ParamsGettersSetters


[docs]class EvaluationDLParams(ParamsGettersSetters): verbose = Param(Params._dummy(), "verbose", "Level of verbosity during training", TypeConverters.toInt) validationSplit = Param(Params._dummy(), "validationSplit", "Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off.", TypeConverters.toFloat) evaluationLogExtended = Param(Params._dummy(), "evaluationLogExtended", "Whether logs for validation to be extended: it displays time and evaluation of each label. Default is False.", TypeConverters.toBoolean) enableOutputLogs = Param(Params._dummy(), "enableOutputLogs", "Whether to use stdout in addition to Spark logs.", TypeConverters.toBoolean) outputLogsPath = Param(Params._dummy(), "outputLogsPath", "Folder path to save training logs", TypeConverters.toString) testDataset = Param(Params._dummy(), "testDataset", "Path to test dataset. If set used to calculate statistic on it during training.", TypeConverters.identity)
[docs] def setVerbose(self, value): """Sets level of verbosity during training Parameters ---------- value : int Level of verbosity """ return self._set(verbose=value)
[docs] def setValidationSplit(self, v): """Sets the proportion of training dataset to be validated against the model on each Epoch, by default it is 0.0 and off. The value should be between 0.0 and 1.0. Parameters ---------- v : float Proportion of training dataset to be validated """ self._set(validationSplit=v) return self
[docs] def setEvaluationLogExtended(self, v): """Sets whether logs for validation to be extended, by default False. Displays time and evaluation of each label. Parameters ---------- v : bool Whether logs for validation to be extended """ self._set(evaluationLogExtended=v) return self
[docs] def setEnableOutputLogs(self, value): """Sets whether to use stdout in addition to Spark logs, by default False. Parameters ---------- value : bool Whether to use stdout in addition to Spark logs """ return self._set(enableOutputLogs=value)
[docs] def setOutputLogsPath(self, p): """Sets folder path to save training logs Parameters ---------- p : str Folder path to save training logs """ return self._set(outputLogsPath=p)
[docs] def setTestDataset(self, path, read_as=ReadAs.SPARK, options={"format": "parquet"}): """Path to a parquet file of a test dataset. If set, it is used to calculate statistics on it during training. The parquet file must be a dataframe that has the same columns as the model that is being trained. For example, if the model needs as input `DOCUMENT`, `TOKEN`, `WORD_EMBEDDINGS` (Features) and `NAMED_ENTITY` (label) then these columns also need to be present while saving the dataframe. The pre-processing steps for the training dataframe should also be applied to the test dataframe. An example on how to create such a parquet file could be: >>> # assuming preProcessingPipeline >>> (train, test) = data.randomSplit([0.8, 0.2]) >>> preProcessingPipeline ... .fit(test) ... .transform(test) ... .write ... .mode("overwrite") ... .parquet("test_data") >>> annotator.setTestDataset("test_data") Parameters ---------- path : str Path to test dataset read_as : str, optional How to read the resource, by default ReadAs.SPARK options : dict, optional Options for reading the resource, by default {"format": "csv"} """ return self._set(testDataset=ExternalResource(path, read_as, options.copy()))