Packages

t

com.johnsnowlabs.nlp

HasLlamaCppModelProperties

trait HasLlamaCppModelProperties extends AnyRef

Contains settable model parameters for the AutoGGUFModel.

Self Type
HasLlamaCppModelProperties with ParamsAndFeaturesWritable with HasProtectedParams
Linear Supertypes
AnyRef, Any
Ordering
  1. Grouped
  2. Alphabetic
  3. By Inheritance
Inherited
  1. HasLlamaCppModelProperties
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  5. val chatTemplate: Param[String]

  6. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  7. val defragmentationThreshold: FloatParam

  8. val disableLog: BooleanParam

  9. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  10. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  11. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  12. val flashAttention: BooleanParam

  13. def getChatTemplate: String

  14. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  15. def getDefragmentationThreshold: Float

  16. def getDisableLog: Boolean

  17. def getFlashAttention: Boolean

  18. def getLogVerbosity: Int
  19. def getMainGpu: Int

  20. def getMetadata: String

    Get the metadata for the model

  21. def getMetadataMap: Map[String, Map[String, String]]
  22. def getModelDraft: String

  23. def getModelParameters: ModelParameters
    Attributes
    protected
  24. def getNBatch: Int

  25. def getNCtx: Int

  26. def getNDraft: Int

  27. def getNGpuLayers: Int

  28. def getNGpuLayersDraft: Int

  29. def getNThreads: Int

  30. def getNThreadsBatch: Int

  31. def getNUbatch: Int

  32. def getNoKvOffload: Boolean

  33. def getNuma: String

  34. def getReasoningBudget: Int

  35. def getRopeFreqBase: Float

  36. def getRopeFreqScale: Float

  37. def getRopeScalingType: String

  38. def getSplitMode: String

  39. def getSystemPrompt: String

  40. def getUseMlock: Boolean

  41. def getUseMmap: Boolean

  42. def getYarnAttnFactor: Float

  43. def getYarnBetaFast: Float

  44. def getYarnBetaSlow: Float

  45. def getYarnExtFactor: Float

  46. def getYarnOrigCtx: Int

  47. val gpuSplitMode: Param[String]

    Set how to split the model across GPUs

    Set how to split the model across GPUs

    • NONE: No GPU split
    • LAYER: Split the model across GPUs by layer
    • ROW: Split the model across GPUs by rows
  48. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  49. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  50. val logVerbosity: IntParam

  51. val logger: Logger
    Attributes
    protected
  52. val mainGpu: IntParam

  53. val metadata: (HasLlamaCppModelProperties.this)#ProtectedParam[String]
  54. val modelDraft: Param[String]

  55. val nBatch: IntParam

  56. val nCtx: IntParam

  57. val nDraft: IntParam

  58. val nGpuLayers: IntParam

  59. val nGpuLayersDraft: IntParam

  60. val nThreads: IntParam

  61. val nThreadsBatch: IntParam

  62. val nUbatch: IntParam

  63. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  64. val noKvOffload: BooleanParam

  65. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  66. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  67. val numaStrategy: Param[String]

    Set optimization strategies that help on some NUMA systems (if available)

    Set optimization strategies that help on some NUMA systems (if available)

    Available Strategies:

    • DISABLED: No NUMA optimizations
    • DISTRIBUTE: Spread execution evenly over all
    • ISOLATE: Only spawn threads on CPUs on the node that execution started on
    • NUMA_CTL: Use the CPU map provided by numactl
    • MIRROR: Mirrors the model across NUMA nodes
  68. val reasoningBudget: IntParam

  69. val ropeFreqBase: FloatParam

  70. val ropeFreqScale: FloatParam

  71. val ropeScalingType: Param[String]

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    • UNSPECIFIED: Don't use any scaling
    • LINEAR: Linear scaling
    • YARN: YaRN RoPE scaling
  72. def setChatTemplate(chatTemplate: String): HasLlamaCppModelProperties.this

    The chat template to use

  73. def setDefragmentationThreshold(defragThold: Float): HasLlamaCppModelProperties.this

    Set the KV cache defragmentation threshold

  74. def setDisableLog(disableLog: Boolean): HasLlamaCppModelProperties.this

  75. def setFlashAttention(flashAttention: Boolean): HasLlamaCppModelProperties.this

    Whether to enable Flash Attention

  76. def setGpuSplitMode(splitMode: String): HasLlamaCppModelProperties.this

    Set how to split the model across GPUs

    Set how to split the model across GPUs

    • NONE: No GPU split -LAYER: Split the model across GPUs by layer 2. ROW: Split the model across GPUs by rows
  77. def setLogVerbosity(logVerbosity: Int): HasLlamaCppModelProperties.this

    Set the verbosity threshold.

    Set the verbosity threshold. Messages with a higher verbosity will be ignored.

    Values map to the following:

    • GGML_LOG_LEVEL_NONE = 0
    • GGML_LOG_LEVEL_DEBUG = 1
    • GGML_LOG_LEVEL_INFO = 2
    • GGML_LOG_LEVEL_WARN = 3
    • GGML_LOG_LEVEL_ERROR = 4
    • GGML_LOG_LEVEL_CONT = 5 (continue previous log)
  78. def setMainGpu(mainGpu: Int): HasLlamaCppModelProperties.this

    Set the GPU that is used for scratch and small tensors

  79. def setMetadata(metadata: String): HasLlamaCppModelProperties.this

    Set the metadata for the model

  80. def setModelDraft(modelDraft: String): HasLlamaCppModelProperties.this

    Set the draft model for speculative decoding

  81. def setNBatch(nBatch: Int): HasLlamaCppModelProperties.this

    Set the logical batch size for prompt processing (must be >=32 to use BLAS)

  82. def setNCtx(nCtx: Int): HasLlamaCppModelProperties.this

    Set the size of the prompt context

  83. def setNDraft(nDraft: Int): HasLlamaCppModelProperties.this

    Set the number of tokens to draft for speculative decoding

  84. def setNGpuLayers(nGpuLayers: Int): HasLlamaCppModelProperties.this

    Set the number of layers to store in VRAM (-1 - use default)

  85. def setNGpuLayersDraft(nGpuLayersDraft: Int): HasLlamaCppModelProperties.this

    Set the number of layers to store in VRAM for the draft model (-1 - use default)

  86. def setNThreads(nThreads: Int): HasLlamaCppModelProperties.this

    Set the number of threads to use during generation

  87. def setNThreadsBatch(nThreadsBatch: Int): HasLlamaCppModelProperties.this

    Set the number of threads to use during batch and prompt processing

  88. def setNUbatch(nUbatch: Int): HasLlamaCppModelProperties.this

    Set the physical batch size for prompt processing (must be >=32 to use BLAS)

  89. def setNoKvOffload(noKvOffload: Boolean): HasLlamaCppModelProperties.this

    Whether to disable KV offload

  90. def setNumaStrategy(numa: String): HasLlamaCppModelProperties.this

    Set optimization strategies that help on some NUMA systems (if available)

    Set optimization strategies that help on some NUMA systems (if available)

    Available Strategies:

    • DISABLED: No NUMA optimizations
    • DISTRIBUTE: spread execution evenly over all
    • ISOLATE: only spawn threads on CPUs on the node that execution started on
    • NUMA_CTL: use the CPU map provided by numactl
    • MIRROR: Mirrors the model across NUMA nodes
  91. def setReasoningBudget(reasoningBudget: Int): HasLlamaCppModelProperties.this

    Controls the amount of thinking allowed; currently only one of: -1 for unrestricted thinking budget, or 0 to disable thinking (default: -1)

  92. def setRopeFreqBase(ropeFreqBase: Float): HasLlamaCppModelProperties.this

    Set the RoPE base frequency, used by NTK-aware scaling

  93. def setRopeFreqScale(ropeFreqScale: Float): HasLlamaCppModelProperties.this

    Set the RoPE frequency scaling factor, expands context by a factor of 1/N

  94. def setRopeScalingType(ropeScalingType: String): HasLlamaCppModelProperties.this

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    • NONE: Don't use any scaling
    • LINEAR: Linear scaling
    • YARN: YaRN RoPE scaling
  95. def setSystemPrompt(systemPrompt: String): HasLlamaCppModelProperties.this

    Set a system prompt to use

  96. def setUseMlock(useMlock: Boolean): HasLlamaCppModelProperties.this

    Whether to force the system to keep model in RAM rather than swapping or compressing

  97. def setUseMmap(useMmap: Boolean): HasLlamaCppModelProperties.this

    Whether to use memory-map model (faster load but may increase pageouts if not using mlock)

  98. def setYarnAttnFactor(yarnAttnFactor: Float): HasLlamaCppModelProperties.this

    Set the YaRN scale sqrt(t) or attention magnitude

  99. def setYarnBetaFast(yarnBetaFast: Float): HasLlamaCppModelProperties.this

    Set the YaRN low correction dim or beta

  100. def setYarnBetaSlow(yarnBetaSlow: Float): HasLlamaCppModelProperties.this

    Set the YaRN high correction dim or alpha

  101. def setYarnExtFactor(yarnExtFactor: Float): HasLlamaCppModelProperties.this

    Set the YaRN extrapolation mix factor

  102. def setYarnOrigCtx(yarnOrigCtx: Int): HasLlamaCppModelProperties.this

    Set the YaRN original context size of model

  103. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  104. val systemPrompt: Param[String]

  105. def toString(): String
    Definition Classes
    AnyRef → Any
  106. val useMlock: BooleanParam

  107. val useMmap: BooleanParam

  108. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  109. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  110. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  111. val yarnAttnFactor: FloatParam

  112. val yarnBetaFast: FloatParam

  113. val yarnBetaSlow: FloatParam

  114. val yarnExtFactor: FloatParam

  115. val yarnOrigCtx: IntParam

Inherited from AnyRef

Inherited from Any

Parameter setters

Parameter getters

Parameters

A list of (hyper-)parameter keys this annotator can take. Users can set and get the parameter values through setters and getters, respectively.

Ungrouped