Packages

t

com.johnsnowlabs.nlp

HasLlamaCppModelProperties

trait HasLlamaCppModelProperties extends AnyRef

Contains settable model parameters for the AutoGGUFModel.

Self Type
HasLlamaCppModelProperties with ParamsAndFeaturesWritable with HasProtectedParams
Linear Supertypes
AnyRef, Any
Ordering
  1. Grouped
  2. Alphabetic
  3. By Inheritance
Inherited
  1. HasLlamaCppModelProperties
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  5. val chatTemplate: Param[String]

  6. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  7. val defragmentationThreshold: FloatParam

  8. val disableLog: BooleanParam

  9. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  10. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  11. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  12. val flashAttention: BooleanParam

  13. def getChatTemplate: String

  14. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  15. def getDefragmentationThreshold: Float

  16. def getDisableLog: Boolean

  17. def getFlashAttention: Boolean

  18. def getLogVerbosity: Int
  19. def getMainGpu: Int

  20. def getMetadata: String

    Get the metadata for the model

  21. def getMetadataMap: Map[String, Map[String, String]]
  22. def getModelDraft: String

  23. def getModelParameters: ModelParameters
    Attributes
    protected
  24. def getNBatch: Int

  25. def getNCtx: Int

  26. def getNDraft: Int

  27. def getNGpuLayers: Int

  28. def getNGpuLayersDraft: Int

  29. def getNThreads: Int

  30. def getNThreadsBatch: Int

  31. def getNUbatch: Int

  32. def getNoKvOffload: Boolean

  33. def getNuma: String

  34. def getRopeFreqBase: Float

  35. def getRopeFreqScale: Float

  36. def getRopeScalingType: String

  37. def getSplitMode: String

  38. def getSystemPrompt: String

  39. def getUseMlock: Boolean

  40. def getUseMmap: Boolean

  41. def getYarnAttnFactor: Float

  42. def getYarnBetaFast: Float

  43. def getYarnBetaSlow: Float

  44. def getYarnExtFactor: Float

  45. def getYarnOrigCtx: Int

  46. val gpuSplitMode: Param[String]

    Set how to split the model across GPUs

    Set how to split the model across GPUs

    • NONE: No GPU split
    • LAYER: Split the model across GPUs by layer
    • ROW: Split the model across GPUs by rows
  47. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  48. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  49. val logVerbosity: IntParam

  50. val logger: Logger
    Attributes
    protected
  51. val mainGpu: IntParam

  52. val metadata: (HasLlamaCppModelProperties.this)#ProtectedParam[String]
  53. val modelDraft: Param[String]

  54. val nBatch: IntParam

  55. val nCtx: IntParam

  56. val nDraft: IntParam

  57. val nGpuLayers: IntParam

  58. val nGpuLayersDraft: IntParam

  59. val nThreads: IntParam

  60. val nThreadsBatch: IntParam

  61. val nUbatch: IntParam

  62. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  63. val noKvOffload: BooleanParam

  64. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  65. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  66. val numaStrategy: Param[String]

    Set optimization strategies that help on some NUMA systems (if available)

    Set optimization strategies that help on some NUMA systems (if available)

    Available Strategies:

    • DISABLED: No NUMA optimizations
    • DISTRIBUTE: Spread execution evenly over all
    • ISOLATE: Only spawn threads on CPUs on the node that execution started on
    • NUMA_CTL: Use the CPU map provided by numactl
    • MIRROR: Mirrors the model across NUMA nodes
  67. val ropeFreqBase: FloatParam

  68. val ropeFreqScale: FloatParam

  69. val ropeScalingType: Param[String]

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    • UNSPECIFIED: Don't use any scaling
    • LINEAR: Linear scaling
    • YARN: YaRN RoPE scaling
  70. def setChatTemplate(chatTemplate: String): HasLlamaCppModelProperties.this

    The chat template to use

  71. def setDefragmentationThreshold(defragThold: Float): HasLlamaCppModelProperties.this

    Set the KV cache defragmentation threshold

  72. def setDisableLog(disableLog: Boolean): HasLlamaCppModelProperties.this

  73. def setFlashAttention(flashAttention: Boolean): HasLlamaCppModelProperties.this

    Whether to enable Flash Attention

  74. def setGpuSplitMode(splitMode: String): HasLlamaCppModelProperties.this

    Set how to split the model across GPUs

    Set how to split the model across GPUs

    • NONE: No GPU split -LAYER: Split the model across GPUs by layer 2. ROW: Split the model across GPUs by rows
  75. def setLogVerbosity(logVerbosity: Int): HasLlamaCppModelProperties.this

    Set the verbosity threshold.

    Set the verbosity threshold. Messages with a higher verbosity will be ignored.

    Values map to the following:

    • GGML_LOG_LEVEL_NONE = 0
    • GGML_LOG_LEVEL_DEBUG = 1
    • GGML_LOG_LEVEL_INFO = 2
    • GGML_LOG_LEVEL_WARN = 3
    • GGML_LOG_LEVEL_ERROR = 4
    • GGML_LOG_LEVEL_CONT = 5 (continue previous log)
  76. def setMainGpu(mainGpu: Int): HasLlamaCppModelProperties.this

    Set the GPU that is used for scratch and small tensors

  77. def setMetadata(metadata: String): HasLlamaCppModelProperties.this

    Set the metadata for the model

  78. def setModelDraft(modelDraft: String): HasLlamaCppModelProperties.this

    Set the draft model for speculative decoding

  79. def setNBatch(nBatch: Int): HasLlamaCppModelProperties.this

    Set the logical batch size for prompt processing (must be >=32 to use BLAS)

  80. def setNCtx(nCtx: Int): HasLlamaCppModelProperties.this

    Set the size of the prompt context

  81. def setNDraft(nDraft: Int): HasLlamaCppModelProperties.this

    Set the number of tokens to draft for speculative decoding

  82. def setNGpuLayers(nGpuLayers: Int): HasLlamaCppModelProperties.this

    Set the number of layers to store in VRAM (-1 - use default)

  83. def setNGpuLayersDraft(nGpuLayersDraft: Int): HasLlamaCppModelProperties.this

    Set the number of layers to store in VRAM for the draft model (-1 - use default)

  84. def setNThreads(nThreads: Int): HasLlamaCppModelProperties.this

    Set the number of threads to use during generation

  85. def setNThreadsBatch(nThreadsBatch: Int): HasLlamaCppModelProperties.this

    Set the number of threads to use during batch and prompt processing

  86. def setNUbatch(nUbatch: Int): HasLlamaCppModelProperties.this

    Set the physical batch size for prompt processing (must be >=32 to use BLAS)

  87. def setNoKvOffload(noKvOffload: Boolean): HasLlamaCppModelProperties.this

    Whether to disable KV offload

  88. def setNumaStrategy(numa: String): HasLlamaCppModelProperties.this

    Set optimization strategies that help on some NUMA systems (if available)

    Set optimization strategies that help on some NUMA systems (if available)

    Available Strategies:

    • DISABLED: No NUMA optimizations
    • DISTRIBUTE: spread execution evenly over all
    • ISOLATE: only spawn threads on CPUs on the node that execution started on
    • NUMA_CTL: use the CPU map provided by numactl
    • MIRROR: Mirrors the model across NUMA nodes
  89. def setRopeFreqBase(ropeFreqBase: Float): HasLlamaCppModelProperties.this

    Set the RoPE base frequency, used by NTK-aware scaling

  90. def setRopeFreqScale(ropeFreqScale: Float): HasLlamaCppModelProperties.this

    Set the RoPE frequency scaling factor, expands context by a factor of 1/N

  91. def setRopeScalingType(ropeScalingType: String): HasLlamaCppModelProperties.this

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    • NONE: Don't use any scaling
    • LINEAR: Linear scaling
    • YARN: YaRN RoPE scaling
  92. def setSystemPrompt(systemPrompt: String): HasLlamaCppModelProperties.this

    Set a system prompt to use

  93. def setUseMlock(useMlock: Boolean): HasLlamaCppModelProperties.this

    Whether to force the system to keep model in RAM rather than swapping or compressing

  94. def setUseMmap(useMmap: Boolean): HasLlamaCppModelProperties.this

    Whether to use memory-map model (faster load but may increase pageouts if not using mlock)

  95. def setYarnAttnFactor(yarnAttnFactor: Float): HasLlamaCppModelProperties.this

    Set the YaRN scale sqrt(t) or attention magnitude

  96. def setYarnBetaFast(yarnBetaFast: Float): HasLlamaCppModelProperties.this

    Set the YaRN low correction dim or beta

  97. def setYarnBetaSlow(yarnBetaSlow: Float): HasLlamaCppModelProperties.this

    Set the YaRN high correction dim or alpha

  98. def setYarnExtFactor(yarnExtFactor: Float): HasLlamaCppModelProperties.this

    Set the YaRN extrapolation mix factor

  99. def setYarnOrigCtx(yarnOrigCtx: Int): HasLlamaCppModelProperties.this

    Set the YaRN original context size of model

  100. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  101. val systemPrompt: Param[String]

  102. def toString(): String
    Definition Classes
    AnyRef → Any
  103. val useMlock: BooleanParam

  104. val useMmap: BooleanParam

  105. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  106. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  107. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  108. val yarnAttnFactor: FloatParam

  109. val yarnBetaFast: FloatParam

  110. val yarnBetaSlow: FloatParam

  111. val yarnExtFactor: FloatParam

  112. val yarnOrigCtx: IntParam

Inherited from AnyRef

Inherited from Any

Parameter setters

Parameter getters

Parameters

A list of (hyper-)parameter keys this annotator can take. Users can set and get the parameter values through setters and getters, respectively.

Ungrouped