t

com.johnsnowlabs.partition

HasTextReaderProperties

trait HasTextReaderProperties extends ParamsAndFeaturesWritable

Linear Supertypes
ParamsAndFeaturesWritable, HasFeatures, Params, Serializable, Serializable, Identifiable, DefaultParamsWritable, MLWritable, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. HasTextReaderProperties
  2. ParamsAndFeaturesWritable
  3. HasFeatures
  4. Params
  5. Serializable
  6. Serializable
  7. Identifiable
  8. DefaultParamsWritable
  9. MLWritable
  10. AnyRef
  11. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Abstract Value Members

  1. abstract def copy(extra: ParamMap): Params
    Definition Classes
    Params
  2. abstract val uid: String
    Definition Classes
    Identifiable

Concrete Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def $[T](param: Param[T]): T
    Attributes
    protected
    Definition Classes
    Params
  4. def $$[T](feature: StructFeature[T]): T
    Attributes
    protected
    Definition Classes
    HasFeatures
  5. def $$[K, V](feature: MapFeature[K, V]): Map[K, V]
    Attributes
    protected
    Definition Classes
    HasFeatures
  6. def $$[T](feature: SetFeature[T]): Set[T]
    Attributes
    protected
    Definition Classes
    HasFeatures
  7. def $$[T](feature: ArrayFeature[T]): Array[T]
    Attributes
    protected
    Definition Classes
    HasFeatures
  8. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  9. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  10. final def clear(param: Param[_]): HasTextReaderProperties.this.type
    Definition Classes
    Params
  11. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  12. def copyValues[T <: Params](to: T, extra: ParamMap): T
    Attributes
    protected
    Definition Classes
    Params
  13. final def defaultCopy[T <: Params](extra: ParamMap): T
    Attributes
    protected
    Definition Classes
    Params
  14. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  15. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  16. def explainParam(param: Param[_]): String
    Definition Classes
    Params
  17. def explainParams(): String
    Definition Classes
    Params
  18. final def extractParamMap(): ParamMap
    Definition Classes
    Params
  19. final def extractParamMap(extra: ParamMap): ParamMap
    Definition Classes
    Params
  20. val extractTagAttributes: StringArrayParam
  21. val features: ArrayBuffer[Feature[_, _, _]]
    Definition Classes
    HasFeatures
  22. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  23. def get[T](feature: StructFeature[T]): Option[T]
    Attributes
    protected
    Definition Classes
    HasFeatures
  24. def get[K, V](feature: MapFeature[K, V]): Option[Map[K, V]]
    Attributes
    protected
    Definition Classes
    HasFeatures
  25. def get[T](feature: SetFeature[T]): Option[Set[T]]
    Attributes
    protected
    Definition Classes
    HasFeatures
  26. def get[T](feature: ArrayFeature[T]): Option[Array[T]]
    Attributes
    protected
    Definition Classes
    HasFeatures
  27. final def get[T](param: Param[T]): Option[T]
    Definition Classes
    Params
  28. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  29. final def getDefault[T](param: Param[T]): Option[T]
    Definition Classes
    Params
  30. final def getOrDefault[T](param: Param[T]): T
    Definition Classes
    Params
  31. def getParam(paramName: String): Param[Any]
    Definition Classes
    Params
  32. val groupBrokenParagraphs: Param[Boolean]
  33. final def hasDefault[T](param: Param[T]): Boolean
    Definition Classes
    Params
  34. def hasParam(paramName: String): Boolean
    Definition Classes
    Params
  35. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  36. final def isDefined(param: Param[_]): Boolean
    Definition Classes
    Params
  37. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  38. final def isSet(param: Param[_]): Boolean
    Definition Classes
    Params
  39. val maxLineCount: Param[Int]
  40. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  41. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  42. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  43. def onWrite(path: String, spark: SparkSession): Unit
    Attributes
    protected
    Definition Classes
    ParamsAndFeaturesWritable
  44. val paragraphSplit: Param[String]
  45. lazy val params: Array[Param[_]]
    Definition Classes
    Params
  46. def save(path: String): Unit
    Definition Classes
    MLWritable
    Annotations
    @Since( "1.6.0" ) @throws( ... )
  47. def set[T](feature: StructFeature[T], value: T): HasTextReaderProperties.this.type
    Attributes
    protected
    Definition Classes
    HasFeatures
  48. def set[K, V](feature: MapFeature[K, V], value: Map[K, V]): HasTextReaderProperties.this.type
    Attributes
    protected
    Definition Classes
    HasFeatures
  49. def set[T](feature: SetFeature[T], value: Set[T]): HasTextReaderProperties.this.type
    Attributes
    protected
    Definition Classes
    HasFeatures
  50. def set[T](feature: ArrayFeature[T], value: Array[T]): HasTextReaderProperties.this.type
    Attributes
    protected
    Definition Classes
    HasFeatures
  51. final def set(paramPair: ParamPair[_]): HasTextReaderProperties.this.type
    Attributes
    protected
    Definition Classes
    Params
  52. final def set(param: String, value: Any): HasTextReaderProperties.this.type
    Attributes
    protected
    Definition Classes
    Params
  53. final def set[T](param: Param[T], value: T): HasTextReaderProperties.this.type
    Definition Classes
    Params
  54. def setDefault[T](feature: StructFeature[T], value: () ⇒ T): HasTextReaderProperties.this.type
    Attributes
    protected
    Definition Classes
    HasFeatures
  55. def setDefault[K, V](feature: MapFeature[K, V], value: () ⇒ Map[K, V]): HasTextReaderProperties.this.type
    Attributes
    protected
    Definition Classes
    HasFeatures
  56. def setDefault[T](feature: SetFeature[T], value: () ⇒ Set[T]): HasTextReaderProperties.this.type
    Attributes
    protected
    Definition Classes
    HasFeatures
  57. def setDefault[T](feature: ArrayFeature[T], value: () ⇒ Array[T]): HasTextReaderProperties.this.type
    Attributes
    protected
    Definition Classes
    HasFeatures
  58. final def setDefault(paramPairs: ParamPair[_]*): HasTextReaderProperties.this.type
    Attributes
    protected
    Definition Classes
    Params
  59. final def setDefault[T](param: Param[T], value: T): HasTextReaderProperties.this.type
    Attributes
    protected[org.apache.spark.ml]
    Definition Classes
    Params
  60. def setExtractTagAttributes(attributes: Array[String]): HasTextReaderProperties.this.type

    Specify which tag attributes should have their values extracted as text when parsing tag-based formats (e.g., HTML or XML).

    Specify which tag attributes should have their values extracted as text when parsing tag-based formats (e.g., HTML or XML).

    attributes

    array of attribute names to extract

    returns

    this instance with the updated extractTagAttributes parameter

  61. def setGroupBrokenParagraphs(value: Boolean): HasTextReaderProperties.this.type

    Enable or disable merging of fragmented lines into coherent paragraphs when parsing text.

    Enable or disable merging of fragmented lines into coherent paragraphs when parsing text. When enabled, heuristics based on line length and structure are used to group lines.

    value

    true to group broken paragraphs, false to preserve original line breaks

    returns

    this instance with the updated groupBrokenParagraphs parameter

  62. def setMaxLineCount(value: Int): HasTextReaderProperties.this.type

    Set the maximum number of lines to evaluate when estimating paragraph layout characteristics.

    Set the maximum number of lines to evaluate when estimating paragraph layout characteristics. This limits the amount of text inspected for layout heuristics.

    value

    maximum number of lines to inspect

    returns

    this instance with the updated maxLineCount parameter

  63. def setParagraphSplit(value: String): HasTextReaderProperties.this.type

    Set the regular expression used to detect paragraph boundaries when grouping broken paragraphs.

    Set the regular expression used to detect paragraph boundaries when grouping broken paragraphs.

    value

    regex pattern string to detect paragraph boundaries

    returns

    this instance with the updated paragraphSplit parameter

  64. def setShortLineWordThreshold(value: Int): HasTextReaderProperties.this.type

    Set the maximum number of words for a line to be considered "short" when grouping broken paragraphs.

    Set the maximum number of words for a line to be considered "short" when grouping broken paragraphs. Short lines often indicate line-wrapping within a paragraph rather than a real paragraph break.

    value

    maximum word count for a line to be considered short

    returns

    this instance with the updated shortLineWordThreshold parameter

  65. def setThreshold(value: Double): HasTextReaderProperties.this.type

    Set the threshold ratio of empty lines used to decide between new line-based or broken-paragraph grouping.

    Set the threshold ratio of empty lines used to decide between new line-based or broken-paragraph grouping. Lower values make it easier to choose broken-paragraph grouping.

    value

    ratio between 0.0 and 1.0 representing the empty-line threshold

    returns

    this instance with the updated threshold parameter

  66. def setTitleLengthSize(value: Int): HasTextReaderProperties.this.type

    Set the maximum character length used to determine if a text block qualifies as a title during parsing.

    Set the maximum character length used to determine if a text block qualifies as a title during parsing.

    value

    maximum number of characters to treat a block as a title

    returns

    this instance with the updated titleLengthSize parameter

  67. val shortLineWordThreshold: Param[Int]
  68. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  69. val threshold: Param[Double]
  70. val titleLengthSize: Param[Int]
  71. def toString(): String
    Definition Classes
    Identifiable → AnyRef → Any
  72. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  73. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  74. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  75. def write: MLWriter
    Definition Classes
    ParamsAndFeaturesWritable → DefaultParamsWritable → MLWritable

Inherited from ParamsAndFeaturesWritable

Inherited from HasFeatures

Inherited from Params

Inherited from Serializable

Inherited from Serializable

Inherited from Identifiable

Inherited from DefaultParamsWritable

Inherited from MLWritable

Inherited from AnyRef

Inherited from Any

Ungrouped