Packages

t

com.johnsnowlabs.nlp

HasLlamaCppModelProperties

trait HasLlamaCppModelProperties extends AnyRef

Contains settable model parameters for the AutoGGUFModel.

Self Type
HasLlamaCppModelProperties with ParamsAndFeaturesWritable with HasProtectedParams
Linear Supertypes
AnyRef, Any
Known Subclasses
Ordering
  1. Grouped
  2. Alphabetic
  3. By Inheritance
Inherited
  1. HasLlamaCppModelProperties
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  5. val chatTemplate: Param[String]

  6. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  7. val defaultGpuLayers: Int
    Attributes
    protected
  8. val defaultMainGpu: Int
    Attributes
    protected
  9. val defragmentationThreshold: FloatParam

  10. val embedding: BooleanParam

  11. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  12. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  13. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  14. val flashAttention: BooleanParam

  15. def getChatTemplate: String

  16. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  17. def getDefragmentationThreshold: Float

  18. def getEmbedding: Boolean

  19. def getFlashAttention: Boolean

  20. def getGrpAttnN: Int
  21. def getGrpAttnW: Int

  22. def getInputPrefixBos: Boolean

  23. def getLookupCacheDynamicFilePath: String

  24. def getLookupCacheStaticFilePath: String

  25. def getLoraAdapters: Map[String, Float]

  26. def getMainGpu: Int

  27. def getMetadata: String

    Get the metadata for the model

  28. def getMetadataMap: Map[String, String]
  29. def getModelDraft: String

  30. def getModelParameters: ModelParameters
    Attributes
    protected
  31. def getNBatch: Int

  32. def getNChunks: Int

  33. def getNCtx: Int

  34. def getNDraft: Int

  35. def getNGpuLayers: Int

  36. def getNGpuLayersDraft: Int

  37. def getNSequences: Int

  38. def getNThreads: Int

  39. def getNThreadsBatch: Int

  40. def getNThreadsBatchDraft: Int

  41. def getNThreadsDraft: Int

  42. def getNUbatch: Int

  43. def getNoKvOffload: Boolean

  44. def getNuma: String

  45. def getPSplit: Float

  46. def getPoolingType: String

  47. def getRopeFreqBase: Float

  48. def getRopeFreqScale: Float

  49. def getRopeScalingType: String

  50. def getSplitMode: String

  51. def getSystemPrompt: String

  52. def getTensorSplit: Array[Double]

  53. def getUseMlock: Boolean

  54. def getUseMmap: Boolean

  55. def getYarnAttnFactor: Float

  56. def getYarnBetaFast: Float

  57. def getYarnBetaSlow: Float

  58. def getYarnExtFactor: Float

  59. def getYarnOrigCtx: Int

  60. val gpuSplitMode: Param[String]

    Set how to split the model across GPUs

    Set how to split the model across GPUs

    • NONE: No GPU split
    • LAYER: Split the model across GPUs by layer
    • ROW: Split the model across GPUs by rows
  61. val grpAttnN: IntParam

  62. val grpAttnW: IntParam

  63. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  64. val inputPrefixBos: BooleanParam

  65. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  66. val logger: Logger
    Attributes
    protected
  67. val lookupCacheDynamicFilePath: Param[String]

  68. val lookupCacheStaticFilePath: Param[String]

  69. val loraAdapters: StructFeature[Map[String, Float]]

  70. val mainGpu: IntParam

  71. val metadata: (HasLlamaCppModelProperties.this)#ProtectedParam[String]
  72. val modelDraft: Param[String]

  73. val nBatch: IntParam

  74. val nChunks: IntParam

  75. val nCtx: IntParam

  76. val nDraft: IntParam

  77. val nGpuLayers: IntParam

  78. val nGpuLayersDraft: IntParam

  79. val nSequences: IntParam

  80. val nThreads: IntParam

  81. val nThreadsBatch: IntParam

  82. val nThreadsBatchDraft: IntParam

  83. val nThreadsDraft: IntParam

  84. val nUbatch: IntParam

  85. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  86. val noKvOffload: BooleanParam

  87. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  88. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  89. val numaStrategy: Param[String]

    Set optimization strategies that help on some NUMA systems (if available)

    Set optimization strategies that help on some NUMA systems (if available)

    Available Strategies:

    • DISABLED: No NUMA optimizations
    • DISTRIBUTE: Spread execution evenly over all
    • ISOLATE: Only spawn threads on CPUs on the node that execution started on
    • NUMA_CTL: Use the CPU map provided by numactl
    • MIRROR: Mirrors the model across NUMA nodes
  90. val pSplit: FloatParam

  91. val poolingType: Param[String]

    Set the pooling type for embeddings, use model default if unspecified

    Set the pooling type for embeddings, use model default if unspecified

    • 0 NONE: Don't use any pooling
    • 1 MEAN: Mean Pooling
    • 2 CLS: Choose the CLS token
    • 3 LAST: Choose the last token
  92. val ropeFreqBase: FloatParam

  93. val ropeFreqScale: FloatParam

  94. val ropeScalingType: Param[String]

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    • UNSPECIFIED: Don't use any scaling
    • LINEAR: Linear scaling
    • YARN: YaRN RoPE scaling
  95. def setChatTemplate(chatTemplate: String): HasLlamaCppModelProperties.this

    The chat template to use

  96. def setDefragmentationThreshold(defragThold: Float): HasLlamaCppModelProperties.this

    Set the KV cache defragmentation threshold

  97. def setEmbedding(embedding: Boolean): HasLlamaCppModelProperties.this

    Whether to load model with embedding support

  98. def setFlashAttention(flashAttention: Boolean): HasLlamaCppModelProperties.this

    Whether to enable Flash Attention

  99. def setGpuSplitMode(splitMode: String): HasLlamaCppModelProperties.this

    Set how to split the model across GPUs

    Set how to split the model across GPUs

    • NONE: No GPU split -LAYER: Split the model across GPUs by layer 2. ROW: Split the model across GPUs by rows
  100. def setGpuSupportIfAvailable(spark: SparkSession): HasLlamaCppModelProperties.this
    Attributes
    protected
  101. def setGrpAttnN(grpAttnN: Int): HasLlamaCppModelProperties.this

    Set the group-attention factor

  102. def setGrpAttnW(grpAttnW: Int): HasLlamaCppModelProperties.this

    Set the group-attention width

  103. def setInputPrefixBos(inputPrefixBos: Boolean): HasLlamaCppModelProperties.this

    Whether to add prefix BOS to user inputs, preceding the --in-prefix string

  104. def setLookupCacheDynamicFilePath(lookupCacheDynamicFilePath: String): HasLlamaCppModelProperties.this

    Set path to dynamic lookup cache to use for lookup decoding (updated by generation)

  105. def setLookupCacheStaticFilePath(lookupCacheStaticFilePath: String): HasLlamaCppModelProperties.this

    Set path to static lookup cache to use for lookup decoding (not updated by generation)

  106. def setLoraAdapters(loraAdapters: HashMap[String, Double]): HasLlamaCppModelProperties.this

    Sets paths to lora adapters with user defined scale.

    Sets paths to lora adapters with user defined scale. (PySpark Override)

  107. def setLoraAdapters(loraAdapters: Map[String, Float]): HasLlamaCppModelProperties.this

    Sets paths to lora adapters with user defined scale.

  108. def setMainGpu(mainGpu: Int): HasLlamaCppModelProperties.this

    Set the GPU that is used for scratch and small tensors

  109. def setMetadata(metadata: String): HasLlamaCppModelProperties.this

    Set the metadata for the model

  110. def setModelDraft(modelDraft: String): HasLlamaCppModelProperties.this

    Set the draft model for speculative decoding

  111. def setNBatch(nBatch: Int): HasLlamaCppModelProperties.this

    Set the logical batch size for prompt processing (must be >=32 to use BLAS)

  112. def setNChunks(nChunks: Int): HasLlamaCppModelProperties.this

    Set the maximal number of chunks to process

  113. def setNCtx(nCtx: Int): HasLlamaCppModelProperties.this

    Set the size of the prompt context

  114. def setNDraft(nDraft: Int): HasLlamaCppModelProperties.this

    Set the number of tokens to draft for speculative decoding

  115. def setNGpuLayers(nGpuLayers: Int): HasLlamaCppModelProperties.this

    Set the number of layers to store in VRAM (-1 - use default)

  116. def setNGpuLayersDraft(nGpuLayersDraft: Int): HasLlamaCppModelProperties.this

    Set the number of layers to store in VRAM for the draft model (-1 - use default)

  117. def setNSequences(nSequences: Int): HasLlamaCppModelProperties.this

    Set the number of sequences to decode

  118. def setNThreads(nThreads: Int): HasLlamaCppModelProperties.this

    Set the number of threads to use during generation

  119. def setNThreadsBatch(nThreadsBatch: Int): HasLlamaCppModelProperties.this

    Set the number of threads to use during batch and prompt processing

  120. def setNThreadsBatchDraft(nThreadsBatchDraft: Int): HasLlamaCppModelProperties.this

    Set the number of threads to use during batch and prompt processing

  121. def setNThreadsDraft(nThreadsDraft: Int): HasLlamaCppModelProperties.this

    Set the number of threads to use during draft generation

  122. def setNUbatch(nUbatch: Int): HasLlamaCppModelProperties.this

    Set the physical batch size for prompt processing (must be >=32 to use BLAS)

  123. def setNoKvOffload(noKvOffload: Boolean): HasLlamaCppModelProperties.this

    Whether to disable KV offload

  124. def setNumaStrategy(numa: String): HasLlamaCppModelProperties.this

    Set optimization strategies that help on some NUMA systems (if available)

    Set optimization strategies that help on some NUMA systems (if available)

    Available Strategies:

    • DISABLED: No NUMA optimizations
    • DISTRIBUTE: spread execution evenly over all
    • ISOLATE: only spawn threads on CPUs on the node that execution started on
    • NUMA_CTL: use the CPU map provided by numactl
    • MIRROR: Mirrors the model across NUMA nodes
  125. def setPSplit(pSplit: Float): HasLlamaCppModelProperties.this

    Set the speculative decoding split probability

  126. def setPoolingType(poolingType: String): HasLlamaCppModelProperties.this

    Set the pooling type for embeddings, use model default if unspecified

    Set the pooling type for embeddings, use model default if unspecified

    • 0 NONE: Don't use any pooling and return token embeddings (if the model supports it)
    • 1 MEAN: Mean Pooling
    • 2 CLS: Choose the CLS token
    • 3 LAST: Choose the last token
  127. def setRopeFreqBase(ropeFreqBase: Float): HasLlamaCppModelProperties.this

    Set the RoPE base frequency, used by NTK-aware scaling

  128. def setRopeFreqScale(ropeFreqScale: Float): HasLlamaCppModelProperties.this

    Set the RoPE frequency scaling factor, expands context by a factor of 1/N

  129. def setRopeScalingType(ropeScalingType: String): HasLlamaCppModelProperties.this

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    • UNSPECIFIED: Don't use any scaling
    • LINEAR: Linear scaling
    • YARN: YaRN RoPE scaling
  130. def setSystemPrompt(systemPrompt: String): HasLlamaCppModelProperties.this

    Set a system prompt to use

  131. def setTensorSplit(tensorSplit: Array[Double]): HasLlamaCppModelProperties.this

    Set how split tensors should be distributed across GPUs

  132. def setUseMlock(useMlock: Boolean): HasLlamaCppModelProperties.this

    Whether to force the system to keep model in RAM rather than swapping or compressing

  133. def setUseMmap(useMmap: Boolean): HasLlamaCppModelProperties.this

    Whether to use memory-map model (faster load but may increase pageouts if not using mlock)

  134. def setYarnAttnFactor(yarnAttnFactor: Float): HasLlamaCppModelProperties.this

    Set the YaRN scale sqrt(t) or attention magnitude

  135. def setYarnBetaFast(yarnBetaFast: Float): HasLlamaCppModelProperties.this

    Set the YaRN low correction dim or beta

  136. def setYarnBetaSlow(yarnBetaSlow: Float): HasLlamaCppModelProperties.this

    Set the YaRN high correction dim or alpha

  137. def setYarnExtFactor(yarnExtFactor: Float): HasLlamaCppModelProperties.this

    Set the YaRN extrapolation mix factor

  138. def setYarnOrigCtx(yarnOrigCtx: Int): HasLlamaCppModelProperties.this

    Set the YaRN original context size of model

  139. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  140. val systemPrompt: Param[String]

  141. val tensorSplit: DoubleArrayParam

  142. def toString(): String
    Definition Classes
    AnyRef → Any
  143. val useMlock: BooleanParam

  144. val useMmap: BooleanParam

  145. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  146. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  147. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  148. val yarnAttnFactor: FloatParam

  149. val yarnBetaFast: FloatParam

  150. val yarnBetaSlow: FloatParam

  151. val yarnExtFactor: FloatParam

  152. val yarnOrigCtx: IntParam

Inherited from AnyRef

Inherited from Any

Parameter setters

Parameter getters

Parameters

A list of (hyper-)parameter keys this annotator can take. Users can set and get the parameter values through setters and getters, respectively.

Ungrouped