Sentiment Analysis (Vivekn)

Description

This model uses classifies text into negative and positive categories. It is based on the approach by Vivek Narayanan.

Download Copy S3 URI

How to use

import sparknlp
from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.ml import Pipeline

document = DocumentAssembler() \
.setInputCol("text") \
.setOutputCol("document")

token = Tokenizer() \
.setInputCols(["document"]) \
.setOutputCol("token")

normalizer = Normalizer() \
.setInputCols(["token"]) \
.setOutputCol("normal")

vivekn =  ViveknSentimentModel.pretrained() \
.setInputCols(["document", "normal"]) \
.setOutputCol("result_sentiment")

finisher = Finisher() \
.setInputCols(["result_sentiment"]) \
.setOutputCols("final_sentiment")

pipeline = Pipeline().setStages([document, token, normalizer, vivekn, finisher])

data = spark.createDataFrame([
["I recommend this movie"],
["Dont waste your time!!!"]
]).toDF("text")
pipelineModel = pipeline.fit(data)
result = pipelineModel.transform(data)

result.select("final_sentiment").show(truncate=False)
import spark.implicits._
import com.johnsnowlabs.nlp.base.DocumentAssembler
import com.johnsnowlabs.nlp.annotators.Tokenizer
import com.johnsnowlabs.nlp.annotators.Normalizer
import com.johnsnowlabs.nlp.annotators.sda.vivekn.ViveknSentimentModel
import com.johnsnowlabs.nlp.Finisher
import org.apache.spark.ml.Pipeline

val document = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")

val token = new Tokenizer()
.setInputCols("document")
.setOutputCol("token")

val normalizer = new Normalizer()
.setInputCols("token")
.setOutputCol("normal")

val vivekn = ViveknSentimentModel.pretrained()
.setInputCols("document", "normal")
.setOutputCol("result_sentiment")

val finisher = new Finisher()
.setInputCols("result_sentiment")
.setOutputCols("final_sentiment")

val pipeline = new Pipeline().setStages(Array(document, token, normalizer, vivekn, finisher))

val data = Seq(
"I recommend this movie",
"Dont waste your time!!!"
).toDF("text")

val pipelineModel = pipeline.fit(data)

val result = pipelineModel.transform(data)
result.select("final_sentiment").show(false)
import nlu
nlu.load("en.sentiment.vivekn").predict("""Dont waste your time!!!""")

Results

+---------------+
|final_sentiment|
+---------------+
|[positive]     |
|[negative]     |
+---------------+

Model Information

Model Name: sentiment_vivekn
Compatibility: Spark NLP 2.0.2+
Edition: Official
Input labels: [document, token]
Output labels: [sentiment]
Language: en
License: Open Source

Data Source

AntBNC, an automatically generated English lemma list based on all words in the BNC corpus with a frequency greater than 2 (created by Laurence Anthony)