Description
See https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
Predicted Entities
How to use
def embedSentences(sentences: DataFrame): DataFrame = {
val documentAssembler = new DocumentAssembler()
.setInputCol("sentence")
.setOutputCol("document")
val tokenizer = new Tokenizer()
.setInputCols("document")
.setOutputCol("token")
val embeddings = BertEmbeddings
.pretrained("all_minilm_l6", "en")
.setInputCols(Array("document", "token"))
.setOutputCol("embeddings")
val sentenceEmbeddings = new SentenceEmbeddings()
.setInputCols("document", "embeddings")
.setOutputCol("sentence_embeddings")
.setPoolingStrategy("AVERAGE")
val pipeline = new Pipeline().setStages(Array(
documentAssembler,
tokenizer,
embeddings,
sentenceEmbeddings
))
val pipelineModel = pipeline.fit(sentences)
val pipelineDF = pipelineModel.transform(sentences)
def embedSentences(sentences: DataFrame): DataFrame = {
val documentAssembler = new DocumentAssembler()
.setInputCol("sentence")
.setOutputCol("document")
val tokenizer = new Tokenizer()
.setInputCols("document")
.setOutputCol("token")
val embeddings = BertEmbeddings
.pretrained("all_minilm_l6", "en")
.setInputCols(Array("document", "token"))
.setOutputCol("embeddings")
val sentenceEmbeddings = new SentenceEmbeddings()
.setInputCols("document", "embeddings")
.setOutputCol("sentence_embeddings")
.setPoolingStrategy("AVERAGE")
val pipeline = new Pipeline().setStages(Array(
documentAssembler,
tokenizer,
embeddings,
sentenceEmbeddings
))
val pipelineModel = pipeline.fit(sentences)
val pipelineDF = pipelineModel.transform(sentences)
Model Information
Model Name: | all_MiniLM_L6_v2 |
Compatibility: | Spark NLP 5.5.1+ |
License: | Open Source |
Edition: | Community |
Input Labels: | [document, token] |
Output Labels: | [embeddings] |
Language: | en |
Size: | 84.6 MB |
Case sensitive: | false |