object BasicChunker

Linear Supertypes
AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. BasicChunker
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  5. def chunkBasic(elements: List[HTMLElement], maxCharacters: Int, newAfterNChars: Int = -1, overlap: Int = 0): List[Chunk]

    Splits a list of HTMLElements into chunks constrained by a maximum number of characters.

    Splits a list of HTMLElements into chunks constrained by a maximum number of characters.

    This method ensures that no chunk exceeds the specified maxCharacters limit. Optionally, a newAfterNChars parameter can be used to set a soft boundary for starting new chunks earlier, and overlap can be used to retain trailing characters from the previous chunk in the next one (when splitting long elements).

    elements

    The list of HTMLElements to be chunked.

    maxCharacters

    The hard limit on the number of characters per chunk.

    newAfterNChars

    Optional soft limit for starting a new chunk before reaching maxCharacters. If set to -1, this soft limit is ignored.

    overlap

    Number of trailing characters to overlap between chunks when splitting long elements. This helps maintain context in downstream NLP tasks.

    returns

    A list of Chunk objects, each containing a group of elements whose combined content length does not exceed the specified limits.

  6. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  7. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  8. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  9. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  10. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  11. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  12. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  13. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  14. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  15. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  16. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  17. def toString(): String
    Definition Classes
    AnyRef → Any
  18. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  19. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  20. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()

Inherited from AnyRef

Inherited from Any

Ungrouped