case class SmolVLMConfig(doResize: Boolean = true, size: Map[String, Int] = Map("longest_edge" -> 1536), maxImageSize: Map[String, Int] = Map("longest_edge" -> 384), doRescale: Boolean = true, rescaleFactor: Double = 1.0 / 255.0, doNormalize: Boolean = true, imageMean: Array[Double] = Array(0.5, 0.5, 0.5), imageStd: Array[Double] = Array(0.5, 0.5, 0.5), doImageSplitting: Boolean = true, doPad: Boolean = true, resample: Int = 1, doConvertRgb: Boolean = true, imageToken: String = "<image>", imageTokenId: Int = 49153, endOfUtteranceToken: String = "<end_of_utterance>", globalImageToken: String = "<global-img>", fakeImageToken: String = "<fake_token_around_image>", imageSeqLen: Int = 81, paddingConstant: Double = 0.0, unkTokenId: Int = 0, patchSize: Int = 14, returnPixelMask: Boolean = true) extends Product with Serializable
Configuration class for SmolVLM model parameters
- doResize
Whether to resize input images
- size
Target size for image resizing
- maxImageSize
Maximum size for image processing
- doRescale
Whether to rescale pixel values
- rescaleFactor
Factor for pixel value rescaling
- doNormalize
Whether to normalize pixel values
- imageMean
Mean values for image normalization
- imageStd
Standard deviation values for image normalization
- doImageSplitting
Whether to split large images
- doPad
Whether to pad images
- resample
Resampling method for image resizing
- doConvertRgb
Whether to convert images to RGB
- imageToken
Special token for image placeholders
- imageTokenId
Token ID for image placeholders
- endOfUtteranceToken
Token indicating end of utterance
- globalImageToken
Token for global image context
- fakeImageToken
Token for image padding
- imageSeqLen
Length of image sequence
- paddingConstant
Value used for padding
- patchSize
Size of image patches for processing
- returnPixelMask
Whether to return pixel attention masks
- Alphabetic
- By Inheritance
- SmolVLMConfig
- Serializable
- Serializable
- Product
- Equals
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
-
new
SmolVLMConfig(doResize: Boolean = true, size: Map[String, Int] = Map("longest_edge" -> 1536), maxImageSize: Map[String, Int] = Map("longest_edge" -> 384), doRescale: Boolean = true, rescaleFactor: Double = 1.0 / 255.0, doNormalize: Boolean = true, imageMean: Array[Double] = Array(0.5, 0.5, 0.5), imageStd: Array[Double] = Array(0.5, 0.5, 0.5), doImageSplitting: Boolean = true, doPad: Boolean = true, resample: Int = 1, doConvertRgb: Boolean = true, imageToken: String = "<image>", imageTokenId: Int = 49153, endOfUtteranceToken: String = "<end_of_utterance>", globalImageToken: String = "<global-img>", fakeImageToken: String = "<fake_token_around_image>", imageSeqLen: Int = 81, paddingConstant: Double = 0.0, unkTokenId: Int = 0, patchSize: Int = 14, returnPixelMask: Boolean = true)
- doResize
Whether to resize input images
- size
Target size for image resizing
- maxImageSize
Maximum size for image processing
- doRescale
Whether to rescale pixel values
- rescaleFactor
Factor for pixel value rescaling
- doNormalize
Whether to normalize pixel values
- imageMean
Mean values for image normalization
- imageStd
Standard deviation values for image normalization
- doImageSplitting
Whether to split large images
- doPad
Whether to pad images
- resample
Resampling method for image resizing
- doConvertRgb
Whether to convert images to RGB
- imageToken
Special token for image placeholders
- imageTokenId
Token ID for image placeholders
- endOfUtteranceToken
Token indicating end of utterance
- globalImageToken
Token for global image context
- fakeImageToken
Token for image padding
- imageSeqLen
Length of image sequence
- paddingConstant
Value used for padding
- patchSize
Size of image patches for processing
- returnPixelMask
Whether to return pixel attention masks
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
- val doConvertRgb: Boolean
- val doImageSplitting: Boolean
- val doNormalize: Boolean
- val doPad: Boolean
- val doRescale: Boolean
- val doResize: Boolean
- val endOfUtteranceToken: String
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- val fakeImageToken: String
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- val globalImageToken: String
- val imageMean: Array[Double]
- val imageSeqLen: Int
- val imageStd: Array[Double]
- val imageToken: String
- val imageTokenId: Int
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
- val maxImageSize: Map[String, Int]
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- val paddingConstant: Double
- val patchSize: Int
- val resample: Int
- val rescaleFactor: Double
- val returnPixelMask: Boolean
- val size: Map[String, Int]
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
- val unkTokenId: Int
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()