Packages

c

com.johnsnowlabs.nlp.training

SpacyToAnnotation

class SpacyToAnnotation extends AnyRef

Linear Supertypes
AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. SpacyToAnnotation
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new SpacyToAnnotation()

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  5. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  6. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  7. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  8. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  9. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  10. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  11. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  12. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  13. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  14. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  15. def readJsonFile(spark: SparkSession, jsonFilePath: String, params: Map[String, String] = Map()): Dataset[_]
  16. def readJsonFileJava(spark: SparkSession, jsonFilePath: String, params: Map[String, String]): Dataset[_]

    Helper class to load a list of tokens/sentences as JSON to Annotation.

    Helper class to load a list of tokens/sentences as JSON to Annotation.

    The JSON will be in this format:

    [ { "tokens": ["Hello", "world", "!", "How", "are", "you", "today", "?", "I", "'m", "fine", "thanks", "."], "token_spaces": [true, false, true, true, true, true, false, true, false, true, true, false, false], "sentence_ends": [2, 7, 12] } ]

    sentence_ends is optional

    This format can be exported from spaCy check this notebook for details:

    Example

    val nlpReader = new SpacyToAnnotation()
    val result = nlpReader.readJsonFile(spark, "src/test/resources/spacy-to-annotation/multi_doc_tokens.json")
    result.show(false)
    
    +-------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
    |document                                                                             |sentence                                                                                                                                                                      |token                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
    +-------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
    |[{document, 0, 55, John went to the store last night. He bought some bread., {}, []}]|[{document, 0, 33, John went to the store last night., {sentence -> 0}, []}, {document, 35, 55, He bought some bread., {sentence -> 1}, []}]                                  |[{token, 0, 3, John, {sentence -> 0}, []}, {token, 5, 8, went, {sentence -> 0}, []}, {token, 10, 11, to, {sentence -> 0}, []}, {token, 13, 15, the, {sentence -> 0}, []}, {token, 17, 21, store, {sentence -> 0}, []}, {token, 23, 26, last, {sentence -> 0}, []}, {token, 28, 32, night, {sentence -> 0}, []}, {token, 33, 33, ., {sentence -> 0}, []}, {token, 35, 36, He, {sentence -> 1}, []}, {token, 38, 43, bought, {sentence -> 1}, []}, {token, 45, 48, some, {sentence -> 1}, []}, {token, 50, 54, bread, {sentence -> 1}, []}, {token, 55, 55, ., {sentence -> 1}, []}]|
    |[{document, 0, 47, Hello world! How are you today? I'm fine thanks., {}, []}]        |[{document, 0, 11, Hello world!, {sentence -> 0}, []}, {document, 13, 30, How are you today?, {sentence -> 1}, []}, {document, 32, 47, I'm fine thanks., {sentence -> 2}, []}]|[{token, 0, 4, Hello, {sentence -> 0}, []}, {token, 6, 10, world, {sentence -> 0}, []}, {token, 11, 11, !, {sentence -> 0}, []}, {token, 13, 15, How, {sentence -> 1}, []}, {token, 17, 19, are, {sentence -> 1}, []}, {token, 21, 23, you, {sentence -> 1}, []}, {token, 25, 29, today, {sentence -> 1}, []}, {token, 30, 30, ?, {sentence -> 1}, []}, {token, 32, 32, I, {sentence -> 2}, []}, {token, 33, 34, 'm, {sentence -> 2}, []}, {token, 36, 39, fine, {sentence -> 2}, []}, {token, 41, 46, thanks, {sentence -> 2}, []}, {token, 47, 47, ., {sentence -> 2}, []}]     |
    +-------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
    
    
    result.printSchema
    root
     |-- document: array (nullable = true)
     |    |-- element: struct (containsNull = true)
     |    |    |-- annotatorType: string (nullable = true)
     |    |    |-- begin: integer (nullable = false)
     |    |    |-- end: integer (nullable = false)
     |    |    |-- result: string (nullable = true)
     |    |    |-- metadata: map (nullable = true)
     |    |    |    |-- key: string
     |    |    |    |-- value: string (valueContainsNull = true)
     |    |    |-- embeddings: array (nullable = true)
     |    |    |    |-- element: float (containsNull = false)
     |-- sentence: array (nullable = true)
     |    |-- element: struct (containsNull = true)
     |    |    |-- annotatorType: string (nullable = true)
     |    |    |-- begin: integer (nullable = false)
     |    |    |-- end: integer (nullable = false)
     |    |    |-- result: string (nullable = true)
     |    |    |-- metadata: map (nullable = true)
     |    |    |    |-- key: string
     |    |    |    |-- value: string (valueContainsNull = true)
     |    |    |-- embeddings: array (nullable = true)
     |    |    |    |-- element: float (containsNull = false)
     |-- token: array (nullable = true)
     |    |-- element: struct (containsNull = true)
     |    |    |-- annotatorType: string (nullable = true)
     |    |    |-- begin: integer (nullable = false)
     |    |    |-- end: integer (nullable = false)
     |    |    |-- result: string (nullable = true)
     |    |    |-- metadata: map (nullable = true)
     |    |    |    |-- key: string
     |    |    |    |-- value: string (valueContainsNull = true)
     |    |    |-- embeddings: array (nullable = true)
     |    |    |    |-- element: float (containsNull = false)
  17. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  18. def toString(): String
    Definition Classes
    AnyRef → Any
  19. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  20. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  21. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()

Inherited from AnyRef

Inherited from Any

Ungrouped