Packages

class BertTokenizer extends BasicTokenizer

Linear Supertypes
BasicTokenizer, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. BertTokenizer
  2. BasicTokenizer
  3. AnyRef
  4. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new BertTokenizer(vocab: Map[String, Int], specialTokens: SpecialTokens)

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  5. val bytesToUnicodeMapping: Map[Int, String]

    Mapping for bytes to a different set of unicode characters (especially white spaces).

    Mapping for bytes to a different set of unicode characters (especially white spaces). This improved model performance for gpt-2

    Attributes
    protected
  6. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  7. def decodeTokens(tokens: Array[Int]): String
  8. def encode(sentences: Seq[(WordpieceTokenizedSentence, Int)], maxSequenceLength: Int): Seq[Array[Int]]

    Encode the input sequence to indexes IDs adding padding where necessary

  9. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  10. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  11. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  12. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  13. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  14. def isChinese(char: Char): Boolean
    Definition Classes
    BasicTokenizer
  15. def isControl(char: Char): Boolean
    Definition Classes
    BasicTokenizer
  16. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  17. def isPunctuation(char: Char): Boolean
    Definition Classes
    BasicTokenizer
  18. def isToFilter(char: Char): Boolean
    Definition Classes
    BasicTokenizer
  19. def isWhitespace(char: Char): Boolean
    Definition Classes
    BasicTokenizer
  20. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  21. def normalize(text: String): String
    Definition Classes
    BasicTokenizer
  22. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  23. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  24. val specialTokens: SpecialTokens
  25. def stripAccents(text: String): String
    Definition Classes
    BasicTokenizer
  26. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  27. def toString(): String
    Definition Classes
    AnyRef → Any
  28. def tokenize(sentence: Sentence): Array[IndexedToken]

    sentence

    input Sentence which can be a full sentence or just a token in type of Sentence

    Definition Classes
    BasicTokenizer
  29. val vocab: Map[String, Int]
  30. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  31. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  32. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()

Inherited from BasicTokenizer

Inherited from AnyRef

Inherited from Any

Ungrouped