trait HasTextReaderProperties extends ParamsAndFeaturesWritable
- Alphabetic
- By Inheritance
- HasTextReaderProperties
- ParamsAndFeaturesWritable
- HasFeatures
- Params
- Serializable
- Serializable
- Identifiable
- DefaultParamsWritable
- MLWritable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Abstract Value Members
Concrete Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
$[T](param: Param[T]): T
- Attributes
- protected
- Definition Classes
- Params
-
def
$$[T](feature: StructFeature[T]): T
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
$$[K, V](feature: MapFeature[K, V]): Map[K, V]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
$$[T](feature: SetFeature[T]): Set[T]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
$$[T](feature: ArrayFeature[T]): Array[T]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
final
def
clear(param: Param[_]): HasTextReaderProperties.this.type
- Definition Classes
- Params
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
copyValues[T <: Params](to: T, extra: ParamMap): T
- Attributes
- protected
- Definition Classes
- Params
-
final
def
defaultCopy[T <: Params](extra: ParamMap): T
- Attributes
- protected
- Definition Classes
- Params
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
explainParam(param: Param[_]): String
- Definition Classes
- Params
-
def
explainParams(): String
- Definition Classes
- Params
-
final
def
extractParamMap(): ParamMap
- Definition Classes
- Params
-
final
def
extractParamMap(extra: ParamMap): ParamMap
- Definition Classes
- Params
- val extractTagAttributes: StringArrayParam
-
val
features: ArrayBuffer[Feature[_, _, _]]
- Definition Classes
- HasFeatures
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
get[T](feature: StructFeature[T]): Option[T]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
get[K, V](feature: MapFeature[K, V]): Option[Map[K, V]]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
get[T](feature: SetFeature[T]): Option[Set[T]]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
get[T](feature: ArrayFeature[T]): Option[Array[T]]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
final
def
get[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
getDefault[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
final
def
getOrDefault[T](param: Param[T]): T
- Definition Classes
- Params
-
def
getParam(paramName: String): Param[Any]
- Definition Classes
- Params
- val groupBrokenParagraphs: Param[Boolean]
-
final
def
hasDefault[T](param: Param[T]): Boolean
- Definition Classes
- Params
-
def
hasParam(paramName: String): Boolean
- Definition Classes
- Params
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isDefined(param: Param[_]): Boolean
- Definition Classes
- Params
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
isSet(param: Param[_]): Boolean
- Definition Classes
- Params
- val maxLineCount: Param[Int]
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
onWrite(path: String, spark: SparkSession): Unit
- Attributes
- protected
- Definition Classes
- ParamsAndFeaturesWritable
- val paragraphSplit: Param[String]
-
lazy val
params: Array[Param[_]]
- Definition Classes
- Params
-
def
save(path: String): Unit
- Definition Classes
- MLWritable
- Annotations
- @Since( "1.6.0" ) @throws( ... )
-
def
set[T](feature: StructFeature[T], value: T): HasTextReaderProperties.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
set[K, V](feature: MapFeature[K, V], value: Map[K, V]): HasTextReaderProperties.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
set[T](feature: SetFeature[T], value: Set[T]): HasTextReaderProperties.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
set[T](feature: ArrayFeature[T], value: Array[T]): HasTextReaderProperties.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
final
def
set(paramPair: ParamPair[_]): HasTextReaderProperties.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
set(param: String, value: Any): HasTextReaderProperties.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
set[T](param: Param[T], value: T): HasTextReaderProperties.this.type
- Definition Classes
- Params
-
def
setDefault[T](feature: StructFeature[T], value: () ⇒ T): HasTextReaderProperties.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
setDefault[K, V](feature: MapFeature[K, V], value: () ⇒ Map[K, V]): HasTextReaderProperties.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
setDefault[T](feature: SetFeature[T], value: () ⇒ Set[T]): HasTextReaderProperties.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
setDefault[T](feature: ArrayFeature[T], value: () ⇒ Array[T]): HasTextReaderProperties.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
final
def
setDefault(paramPairs: ParamPair[_]*): HasTextReaderProperties.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
setDefault[T](param: Param[T], value: T): HasTextReaderProperties.this.type
- Attributes
- protected[org.apache.spark.ml]
- Definition Classes
- Params
-
def
setExtractTagAttributes(attributes: Array[String]): HasTextReaderProperties.this.type
Specify which tag attributes should have their values extracted as text when parsing tag-based formats (e.g., HTML or XML).
Specify which tag attributes should have their values extracted as text when parsing tag-based formats (e.g., HTML or XML).
- attributes
array of attribute names to extract
- returns
this instance with the updated
extractTagAttributesparameter
-
def
setGroupBrokenParagraphs(value: Boolean): HasTextReaderProperties.this.type
Enable or disable merging of fragmented lines into coherent paragraphs when parsing text.
Enable or disable merging of fragmented lines into coherent paragraphs when parsing text. When enabled, heuristics based on line length and structure are used to group lines.
- value
true to group broken paragraphs, false to preserve original line breaks
- returns
this instance with the updated
groupBrokenParagraphsparameter
-
def
setMaxLineCount(value: Int): HasTextReaderProperties.this.type
Set the maximum number of lines to evaluate when estimating paragraph layout characteristics.
Set the maximum number of lines to evaluate when estimating paragraph layout characteristics. This limits the amount of text inspected for layout heuristics.
- value
maximum number of lines to inspect
- returns
this instance with the updated
maxLineCountparameter
-
def
setParagraphSplit(value: String): HasTextReaderProperties.this.type
Set the regular expression used to detect paragraph boundaries when grouping broken paragraphs.
Set the regular expression used to detect paragraph boundaries when grouping broken paragraphs.
- value
regex pattern string to detect paragraph boundaries
- returns
this instance with the updated
paragraphSplitparameter
-
def
setShortLineWordThreshold(value: Int): HasTextReaderProperties.this.type
Set the maximum number of words for a line to be considered "short" when grouping broken paragraphs.
Set the maximum number of words for a line to be considered "short" when grouping broken paragraphs. Short lines often indicate line-wrapping within a paragraph rather than a real paragraph break.
- value
maximum word count for a line to be considered short
- returns
this instance with the updated
shortLineWordThresholdparameter
-
def
setThreshold(value: Double): HasTextReaderProperties.this.type
Set the threshold ratio of empty lines used to decide between new line-based or broken-paragraph grouping.
Set the threshold ratio of empty lines used to decide between new line-based or broken-paragraph grouping. Lower values make it easier to choose broken-paragraph grouping.
- value
ratio between 0.0 and 1.0 representing the empty-line threshold
- returns
this instance with the updated
thresholdparameter
-
def
setTitleLengthSize(value: Int): HasTextReaderProperties.this.type
Set the maximum character length used to determine if a text block qualifies as a title during parsing.
Set the maximum character length used to determine if a text block qualifies as a title during parsing.
- value
maximum number of characters to treat a block as a title
- returns
this instance with the updated
titleLengthSizeparameter
- val shortLineWordThreshold: Param[Int]
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
- val threshold: Param[Double]
- val titleLengthSize: Param[Int]
-
def
toString(): String
- Definition Classes
- Identifiable → AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
write: MLWriter
- Definition Classes
- ParamsAndFeaturesWritable → DefaultParamsWritable → MLWritable