Description
MPNet Sequence Classification imported from huggingface.
Originally a SetFit model, reference: https://huggingface.co/rodekruis/sml-ukr-message-classifier
Predicted Entities
ANOMALY
, ARMY
, CHILDREN
, CONNECTIVITY
, CONNECTWITHREDCROSS
, EDUCATION
, FOOD
, GOODSSERVICES
, HEALTH
, INCLUSIONCVA
, LEGAL
, MONEY/BANKING
, NFINONFOODITEMS
, OTHERPROGRAMSOTHERNGOS
, PARCEL
, PAYMENTCVA
, PETS
, PMER/NEWPROGRAMOPERTUNITIES
, PROGRAMINFO
, PROGRAMINFORMATION
, PSSRFL
, REGISTRATIONCVA
, SENTIMENT/FEEDBACK
, SHELTER
, TRANSLATION/LANGUAGE
, TRANSPORT/CAR
, TRANSPORT/MOVEMENT
, WASH
, WORK/JOBS
How to use
import sparknlp
from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.ml import Pipeline
document = DocumentAssembler() \
.setInputCol("text") \
.setOutputCol("document")
tokenizer = Tokenizer() \
.setInputCols(["document"]) \
.setOutputCol("token")
sequenceClassifier = MPNetForSequenceClassification \
.pretrained() \
.setInputCols(["document", "token"]) \
.setOutputCol("label")
data = spark.createDataFrame([
["I love driving my car."],
["The next bus will arrive in 20 minutes."],
["pineapple on pizza is the worst 🤮"],
]).toDF("text")
pipeline = Pipeline().setStages([document, tokenizer, sequenceClassifier])
pipelineModel = pipeline.fit(data)
results = pipelineModel.transform(data)
results.select("label.result").show()
import com.johnsnowlabs.nlp.base._
import com.johnsnowlabs.nlp.annotator._
import org.apache.spark.ml.Pipeline
import spark.implicits._
val document = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val tokenizer = new Tokenizer()
.setInputCols(Array("document"))
.setOutputCol("token")
val modelPath = "onnx_exported/rodekruis/sml-ukr-message-classifier"
val sequenceClassifier = MPNetForSequenceClassification
.loadSavedModel(modelPath, spark)
// .pretrained()
.setInputCols(Array("document", "token"))
.setOutputCol("label")
val texts: Seq[String] = Seq(
"I love driving my car.",
"The next bus will arrive in 20 minutes.",
"pineapple on pizza is the worst 🤮")
val data = texts.toDF("text")
val pipeline = new Pipeline().setStages(Array(document, tokenizer, sequenceClassifier))
val pipelineModel = pipeline.fit(data)
val results = pipelineModel.transform(data)
results.select("label.result").show()
Results
+--------------------+
| result|
+--------------------+
| [TRANSPORT/CAR]|
|[TRANSPORT/MOVEMENT]|
| [FOOD]|
+--------------------+
Model Information
Model Name: | mpnet_sequence_classifier_ukr_message |
Compatibility: | Spark NLP 5.2.3+ |
License: | Open Source |
Edition: | Official |
Input Labels: | [document, token] |
Output Labels: | [label] |
Language: | en |
Size: | 403.5 MB |