class RTFReader extends Serializable
Class to read and parse Rich Text Format (RTF) files.
The reader extracts paragraph-level content from .rtf documents and maps it into
HTMLElements, classifying paragraphs as titles, list items, or narrative text using the same
structural conventions used by the other readers.
Linear Supertypes
Ordering
- Alphabetic
- By Inheritance
Inherited
- RTFReader
- Serializable
- Serializable
- AnyRef
- Any
- Hide All
- Show All
Visibility
- Public
- All
Instance Constructors
-
new
RTFReader(storeContent: Boolean = false, titleLengthSize: Int = 50)
- storeContent
Whether to include the raw RTF bytes in the output DataFrame as a separate
contentcolumn. Default isfalse.- titleLengthSize
Maximum character length used when deciding whether a paragraph should be classified as a title. Default is
50.
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- def getOutputColumn: String
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- def rtf(filePath: String): DataFrame
- def rtfToHTMLElement(content: Array[Byte]): Seq[HTMLElement]
- def rtfToHTMLElement(content: String): Seq[HTMLElement]
- def setOutputColumn(value: String): RTFReader.this.type
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()