Source code for sparknlp.annotator.sentiment.sentiment_detector

#  Copyright 2017-2022 John Snow Labs
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
"""Contains classes for the SentimentDetector."""

from sparknlp.common import *


[docs]class SentimentDetector(AnnotatorApproach): """Trains a rule based sentiment detector, which calculates a score based on predefined keywords. A dictionary of predefined sentiment keywords must be provided with :meth:`.setDictionary`, where each line is a word delimited to its class (either ``positive`` or ``negative``). The dictionary can be set in the form of a delimited text file. By default, the sentiment score will be assigned labels ``"positive"`` if the score is ``>= 0``, else ``"negative"``. For extended examples of usage, see the `Examples <https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/training/english/dictionary-sentiment/sentiment.ipynb>`__. ====================== ====================== Input Annotation types Output Annotation type ====================== ====================== ``TOKEN, DOCUMENT`` ``SENTIMENT`` ====================== ====================== Parameters ---------- dictionary path for dictionary to sentiment analysis Examples -------- In this example, the dictionary ``default-sentiment-dict.txt`` has the form of:: ... cool,positive superb,positive bad,negative uninspired,negative ... where each sentiment keyword is delimited by ``","``. >>> import sparknlp >>> from sparknlp.base import * >>> from sparknlp.annotator import * >>> from pyspark.ml import Pipeline >>> documentAssembler = DocumentAssembler() \\ ... .setInputCol("text") \\ ... .setOutputCol("document") >>> tokenizer = Tokenizer() \\ ... .setInputCols(["document"]) \\ ... .setOutputCol("token") >>> lemmatizer = Lemmatizer() \\ ... .setInputCols(["token"]) \\ ... .setOutputCol("lemma") \\ ... .setDictionary("lemmas_small.txt", "->", "\\t") >>> sentimentDetector = SentimentDetector() \\ ... .setInputCols(["lemma", "document"]) \\ ... .setOutputCol("sentimentScore") \\ ... .setDictionary("default-sentiment-dict.txt", ",", ReadAs.TEXT) >>> pipeline = Pipeline().setStages([ ... documentAssembler, ... tokenizer, ... lemmatizer, ... sentimentDetector, ... ]) >>> data = spark.createDataFrame([ ... ["The staff of the restaurant is nice"], ... ["I recommend others to avoid because it is too expensive"] ... ]).toDF("text") >>> result = pipeline.fit(data).transform(data) >>> result.selectExpr("sentimentScore.result").show(truncate=False) +----------+ |result | +----------+ |[positive]| |[negative]| +----------+ See Also -------- ViveknSentimentApproach : for an alternative approach to sentiment extraction """ inputAnnotatorTypes = [AnnotatorType.TOKEN, AnnotatorType.DOCUMENT] outputAnnotatorType = AnnotatorType.SENTIMENT dictionary = Param(Params._dummy(), "dictionary", "path for dictionary to sentiment analysis", typeConverter=TypeConverters.identity) positiveMultiplier = Param(Params._dummy(), "positiveMultiplier", "multiplier for positive sentiments. Defaults 1.0", typeConverter=TypeConverters.toFloat) negativeMultiplier = Param(Params._dummy(), "negativeMultiplier", "multiplier for negative sentiments. Defaults -1.0", typeConverter=TypeConverters.toFloat) incrementMultiplier = Param(Params._dummy(), "incrementMultiplier", "multiplier for increment sentiments. Defaults 2.0", typeConverter=TypeConverters.toFloat) decrementMultiplier = Param(Params._dummy(), "decrementMultiplier", "multiplier for decrement sentiments. Defaults -2.0", typeConverter=TypeConverters.toFloat) reverseMultiplier = Param(Params._dummy(), "reverseMultiplier", "multiplier for revert sentiments. Defaults -1.0", typeConverter=TypeConverters.toFloat) enableScore = Param(Params._dummy(), "enableScore", "if true, score will show as the double value, else will output string \"positive\" or \"negative\". Defaults false", typeConverter=TypeConverters.toBoolean) def __init__(self): super(SentimentDetector, self).__init__( classname="com.johnsnowlabs.nlp.annotators.sda.pragmatic.SentimentDetector") self._setDefault(positiveMultiplier=1.0, negativeMultiplier=-1.0, incrementMultiplier=2.0, decrementMultiplier=-2.0, reverseMultiplier=-1.0, enableScore=False)
[docs] def setDictionary(self, path, delimiter, read_as=ReadAs.TEXT, options={'format': 'text'}): """Sets path for dictionary to sentiment analysis Parameters ---------- path : str Path to dictionary file delimiter : str Delimiter for entries read_as : sttr, optional How to read the resource, by default ReadAs.TEXT options : dict, optional Options for reading the resource, by default {'format': 'text'} """ opts = options.copy() if "delimiter" not in opts: opts["delimiter"] = delimiter return self._set(dictionary=ExternalResource(path, read_as, opts))
def _create_model(self, java_model): return SentimentDetectorModel(java_model=java_model)
[docs]class SentimentDetectorModel(AnnotatorModel): """Rule based sentiment detector, which calculates a score based on predefined keywords. This is the instantiated model of the :class:`.SentimentDetector`. For training your own model, please see the documentation of that class. By default, the sentiment score will be assigned labels ``"positive"`` if the score is ``>= 0``, else ``"negative"``. For extended examples of usage, see the `Examples <https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/training/english/dictionary-sentiment/sentiment.ipynb>`__. ====================== ====================== Input Annotation types Output Annotation type ====================== ====================== ``TOKEN, DOCUMENT`` ``SENTIMENT`` ====================== ====================== Parameters ---------- None """ name = "SentimentDetectorModel" inputAnnotatorTypes = [AnnotatorType.TOKEN, AnnotatorType.DOCUMENT] outputAnnotatorType = AnnotatorType.SENTIMENT positiveMultiplier = Param(Params._dummy(), "positiveMultiplier", "multiplier for positive sentiments. Defaults 1.0", typeConverter=TypeConverters.toFloat) def __init__(self, classname="com.johnsnowlabs.nlp.annotators.sda.pragmatic.SentimentDetectorModel", java_model=None): super(SentimentDetectorModel, self).__init__( classname=classname, java_model=java_model
)