POS

case class POS() extends Product with Serializable

Helper class for creating DataFrames for training a part-of-speech tagger.

The dataset needs to consist of sentences on each line, where each word is delimited with its respective tag:

Pierre|NNP Vinken|NNP ,|, 61|CD years|NNS old|JJ ,|, will|MD join|VB the|DT board|NN as|IN a|DT nonexecutive|JJ director|NN Nov.|NNP 29|CD .|.

The sentence can then be parsed with readDataset into a column with annotations of type POS.

Example

In this example, the file test-training.txt has the content of the sentence above.

import com.johnsnowlabs.nlp.training.POS

val pos = POS()
val path = "src/test/resources/anc-pos-corpus-small/test-training.txt"
val posDf = pos.readDataset(spark, path, "|", "tags")

posDf.selectExpr("explode(tags) as tags").show(false)
+---------------------------------------------+
|tags                                         |
+---------------------------------------------+
|[pos, 0, 5, NNP, [word -> Pierre], []]       |
|[pos, 7, 12, NNP, [word -> Vinken], []]      |
|[pos, 14, 14, ,, [word -> ,], []]            |
|[pos, 16, 17, CD, [word -> 61], []]          |
|[pos, 19, 23, NNS, [word -> years], []]      |
|[pos, 25, 27, JJ, [word -> old], []]         |
|[pos, 29, 29, ,, [word -> ,], []]            |
|[pos, 31, 34, MD, [word -> will], []]        |
|[pos, 36, 39, VB, [word -> join], []]        |
|[pos, 41, 43, DT, [word -> the], []]         |
|[pos, 45, 49, NN, [word -> board], []]       |
|[pos, 51, 52, IN, [word -> as], []]          |
|[pos, 47, 47, DT, [word -> a], []]           |
|[pos, 56, 67, JJ, [word -> nonexecutive], []]|
|[pos, 69, 76, NN, [word -> director], []]    |
|[pos, 78, 81, NNP, [word -> Nov.], []]       |
|[pos, 83, 84, CD, [word -> 29], []]          |
|[pos, 81, 81, ., [word -> .], []]            |
+---------------------------------------------+

Linear Supertypes

Serializable, Serializable, Product, Equals, AnyRef, Any

Ordering

Alphabetic
By Inheritance

Inherited

POS
Serializable
Serializable
Product
Equals
AnyRef
Any

Hide All
Show All

Visibility

Public
All

Instance Constructors

new POS()

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws( ... ) @native()
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def finalize(): Unit

Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
Annotations
@native()
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
Annotations
@native()
final def notifyAll(): Unit

Definition Classes
AnyRef
Annotations
@native()
def readDataset(sparkSession: SparkSession, path: String, delimiter: String = "|", outputPosCol: String = "tags", outputDocumentCol: String = "document", outputTextCol: String = "text"): DataFrame
Reads the provided dataset file with given parameters and returns a DataFrame ready to for training a part-of-speech tagger.
Reads the provided dataset file with given parameters and returns a DataFrame ready to for training a part-of-speech tagger.
sparkSession
Current Spark sessions
path
Path to the resource
delimiter
Delimiter used to separate word from their tag in the text
outputPosCol
Name for the output column of the part-of-tags
outputDocumentCol
Name for the DocumentAssembler column
outputTextCol
Name for the column of the raw text
returns
DataFrame of parsed text
def readFromDataframe(posDataframe: DataFrame, tokensCol: String = "tokens", labelsCol: String = "labels", outPutDocColName: String = "text", outPutPosColName: String = "tags"): DataFrame
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... ) @native()
def wrapColumnMetadata(col: Column, annotatorType: String, outPutColName: String): Column

Packages

POS

case class POS() extends Product with Serializable

Example

Instance Constructors

Value Members

Inherited from Serializable

Inherited from Serializable

Inherited from Product

Inherited from Equals

Inherited from AnyRef

Inherited from Any

Ungrouped

Packages

POS 

case class POS() extends Product with Serializable

Example

Instance Constructors

Value Members

Inherited from Serializable

Inherited from Serializable

Inherited from Product

Inherited from Equals

Inherited from AnyRef

Inherited from Any

Ungrouped

POS