Source code for sparknlp.annotator.date2_chunk
# Copyright 2017-2023 John Snow Labs
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains classes for Date2Chunk."""
from sparknlp.common import *
[docs]class Date2Chunk(AnnotatorModel):
"""Converts ``DATE`` type Annotations to ``CHUNK`` type.
This can be useful if the following annotators after DateMatcher and MultiDateMatcher require ```CHUNK``` types.
====================== ======================
Input Annotation types Output Annotation type
====================== ======================
``DATE`` ``CHUNK``
====================== ======================
Parameters
----------
entityName
Entity name for the metadata, by default ``"DATE"``.
Examples
--------
>>> from pyspark.ml import Pipeline
>>> import sparknlp
>>> from sparknlp.base import *
>>> from sparknlp.annotator import *
>>> documentAssembler = DocumentAssembler() \\
... .setInputCol("text") \\
... .setOutputCol("document")
>>> date = DateMatcher() \\
... .setInputCols(["document"]) \\
... .setOutputCol("date")
>>> date2Chunk = Date2Chunk() \\
... .setInputCols(["date"]) \\
... .setOutputCol("date_chunk")
>>> pipeline = Pipeline().setStages([
... documentAssembler,
... date,
... date2Chunk
... ])
>>> data = spark.createDataFrame([["Omicron is a new variant of COVID-19, which the World Health Organization designated a variant of concern on Nov. 26, 2021/26/11."]]).toDF("text")
>>> result = pipeline.fit(data).transform(data)
>>> result.select("date_chunk").show(1, truncate=False)
----------------------------------------------------+
|date_chunk |
----------------------------------------------------+
|[{chunk, 118, 121, 2021/01/01, {sentence -> 0}, []}]|
----------------------------------------------------+
"""
name = "Date2Chunk"
inputAnnotatorTypes = [AnnotatorType.DATE]
outputAnnotatorType = AnnotatorType.CHUNK
@keyword_only
def __init__(self):
super(Date2Chunk, self).__init__(classname="com.johnsnowlabs.nlp.annotators.Date2Chunk")
self._setDefault(entityName="DATE")
entityName = Param(Params._dummy(), "entityName", "Entity name for the metadata",
TypeConverters.toString)
[docs] def setEntityName(self, name):
"""Sets Learning Rate, by default 0.001.
Parameters
----------
v : float
Learning Rate
"""
self._set(entityName=name)
return self