Packages

t

com.johnsnowlabs.nlp

HasLlamaCppProperties

trait HasLlamaCppProperties extends AnyRef

Contains settable parameters for the AutoGGUFModel.

Self Type
HasLlamaCppProperties with ParamsAndFeaturesWritable with HasProtectedParams
Linear Supertypes
AnyRef, Any
Known Subclasses
Ordering
  1. Grouped
  2. Alphabetic
  3. By Inheritance
Inherited
  1. HasLlamaCppProperties
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  5. val cachePrompt: BooleanParam

  6. val chatTemplate: Param[String]

  7. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  8. val defragmentationThreshold: FloatParam

  9. val disableTokenIds: IntArrayParam

  10. val dynamicTemperatureExponent: FloatParam

  11. val dynamicTemperatureRange: FloatParam

  12. val embedding: BooleanParam
  13. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  14. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  15. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  16. val flashAttention: BooleanParam

  17. val frequencyPenalty: FloatParam

  18. def getCachePrompt: Boolean

  19. def getChatTemplate: String

  20. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  21. def getDefragmentationThreshold: Float

  22. def getDisableTokenIds: Array[Int]

  23. def getDynamicTemperatureExponent: Float

  24. def getDynamicTemperatureRange: Float

  25. def getEmbedding: Boolean

  26. def getFlashAttention: Boolean

  27. def getFrequencyPenalty: Float

  28. def getGrammar: String

  29. def getGrpAttnN: Int
  30. def getGrpAttnW: Int

  31. def getIgnoreEos: Boolean

  32. def getInferenceParameters: InferenceParameters
    Attributes
    protected
  33. def getInputPrefix: String

  34. def getInputPrefixBos: Boolean

  35. def getInputSuffix: String

  36. def getLookupCacheDynamicFilePath: String

  37. def getLookupCacheStaticFilePath: String

  38. def getLoraAdapters: Map[String, Float]

  39. def getMainGpu: Int

  40. def getMetadata: String

    Get the metadata for the model

  41. def getMinKeep: Int

  42. def getMinP: Float

  43. def getMiroStat: String

  44. def getMiroStatEta: Float

  45. def getMiroStatTau: Float

  46. def getModelDraft: String

  47. def getModelParameters: ModelParameters
    Attributes
    protected
  48. def getNBatch: Int

  49. def getNChunks: Int

  50. def getNCtx: Int

  51. def getNDraft: Int

  52. def getNGpuLayers: Int

  53. def getNGpuLayersDraft: Int

  54. def getNKeep: Int

  55. def getNPredict: Int
  56. def getNProbs: Int

  57. def getNSequences: Int

  58. def getNThreads: Int

  59. def getNThreadsBatch: Int

  60. def getNThreadsBatchDraft: Int

  61. def getNThreadsDraft: Int

  62. def getNUbatch: Int

  63. def getNoKvOffload: Boolean

  64. def getNuma: String

  65. def getPSplit: Float

  66. def getPenalizeNl: Boolean

  67. def getPenaltyPrompt: String

  68. def getPoolingType: String

  69. def getPresencePenalty: Float

  70. def getRepeatLastN: Int

  71. def getRepeatPenalty: Float

  72. def getRopeFreqBase: Float

  73. def getRopeFreqScale: Float

  74. def getRopeScalingType: String

  75. def getSamplers: Array[String]

  76. def getSeed: Int

  77. def getSplitMode: String

  78. def getStopStrings: Array[String]

  79. def getSystemPrompt: String

  80. def getTemperature: Float

  81. def getTensorSplit: Array[Double]

  82. def getTfsZ: Float

  83. def getTokenBias: Map[String, Float]

  84. def getTokenIdBias: Map[Int, Float]

  85. def getTopK: Int

  86. def getTopP: Float

  87. def getTypicalP: Float

  88. def getUseChatTemplate: Boolean

  89. def getUseMlock: Boolean

  90. def getUseMmap: Boolean

  91. def getYarnAttnFactor: Float

  92. def getYarnBetaFast: Float

  93. def getYarnBetaSlow: Float

  94. def getYarnExtFactor: Float

  95. def getYarnOrigCtx: Int

  96. val gpuSplitMode: Param[String]

    Set how to split the model across GPUs

    Set how to split the model across GPUs

    • NONE: No GPU split
    • LAYER: Split the model across GPUs by layer
    • ROW: Split the model across GPUs by rows
  97. val grammar: Param[String]

  98. val grpAttnN: IntParam

  99. val grpAttnW: IntParam

  100. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  101. val ignoreEos: BooleanParam

  102. val inputPrefix: Param[String]

  103. val inputPrefixBos: BooleanParam

  104. val inputSuffix: Param[String]

  105. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  106. val logger: Logger
  107. val lookupCacheDynamicFilePath: Param[String]

  108. val lookupCacheStaticFilePath: Param[String]

  109. val loraAdapters: StructFeature[Map[String, Float]]

  110. val mainGpu: IntParam

  111. val metadata: (HasLlamaCppProperties.this)#ProtectedParam[String]
  112. val minKeep: IntParam

  113. val minP: FloatParam

  114. val miroStat: Param[String]

  115. val miroStatEta: FloatParam

  116. val miroStatTau: FloatParam

  117. val modelDraft: Param[String]

  118. val nBatch: IntParam

  119. val nChunks: IntParam

  120. val nCtx: IntParam

  121. val nDraft: IntParam

  122. val nGpuLayers: IntParam

  123. val nGpuLayersDraft: IntParam

  124. val nKeep: IntParam

  125. val nPredict: IntParam

  126. val nProbs: IntParam

  127. val nSequences: IntParam

  128. val nThreads: IntParam

  129. val nThreadsBatch: IntParam

  130. val nThreadsBatchDraft: IntParam

  131. val nThreadsDraft: IntParam

  132. val nUbatch: IntParam

  133. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  134. val noKvOffload: BooleanParam

  135. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  136. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  137. val numaStrategy: Param[String]

    Set optimization strategies that help on some NUMA systems (if available)

    Set optimization strategies that help on some NUMA systems (if available)

    Available Strategies:

    • DISABLED: No NUMA optimizations
    • DISTRIBUTE: Spread execution evenly over all
    • ISOLATE: Only spawn threads on CPUs on the node that execution started on
    • NUMA_CTL: Use the CPU map provided by numactl
    • MIRROR: Mirrors the model across NUMA nodes
  138. val pSplit: FloatParam

  139. val penalizeNl: BooleanParam

  140. val penaltyPrompt: Param[String]

  141. val poolingType: Param[String]

    Set the pooling type for embeddings, use model default if unspecified

    Set the pooling type for embeddings, use model default if unspecified

    • 0 UNSPECIFIED: Don't use any pooling
    • 1 MEAN: Mean Pooling
    • 2 CLS: CLS Pooling
  142. val presencePenalty: FloatParam

  143. val repeatLastN: IntParam

  144. val repeatPenalty: FloatParam

  145. val ropeFreqBase: FloatParam

  146. val ropeFreqScale: FloatParam

  147. val ropeScalingType: Param[String]

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    • UNSPECIFIED: Don't use any scaling
    • LINEAR: Linear scaling
    • YARN: YaRN RoPE scaling
  148. val samplers: StringArrayParam

  149. val seed: IntParam

  150. def setCachePrompt(cachePrompt: Boolean): HasLlamaCppProperties.this

    Whether to remember the prompt to avoid reprocessing it

  151. def setChatTemplate(chatTemplate: String): HasLlamaCppProperties.this

    The chat template to use

  152. def setDefragmentationThreshold(defragThold: Float): HasLlamaCppProperties.this

    Set the KV cache defragmentation threshold

  153. def setDisableTokenIds(disableTokenIds: Array[Int]): HasLlamaCppProperties.this

    Set the token ids to disable in the completion.

    Set the token ids to disable in the completion. This corresponds to setTokenBias with a value of Float.NEGATIVE_INFINITY.

  154. def setDynamicTemperatureExponent(dynatempExponent: Float): HasLlamaCppProperties.this

    Set the dynamic temperature exponent

  155. def setDynamicTemperatureRange(dynatempRange: Float): HasLlamaCppProperties.this

    Set the dynamic temperature range

  156. def setEmbedding(embedding: Boolean): HasLlamaCppProperties.this

    Whether to load model with embedding support

  157. def setFlashAttention(flashAttention: Boolean): HasLlamaCppProperties.this

    Whether to enable Flash Attention

  158. def setFrequencyPenalty(frequencyPenalty: Float): HasLlamaCppProperties.this

    Set the repetition alpha frequency penalty

  159. def setGpuSplitMode(splitMode: String): HasLlamaCppProperties.this

    Set how to split the model across GPUs

    Set how to split the model across GPUs

    • NONE: No GPU split -LAYER: Split the model across GPUs by layer 2. ROW: Split the model across GPUs by rows
  160. def setGrammar(grammar: String): HasLlamaCppProperties.this

    Set BNF-like grammar to constrain generations

  161. def setGrpAttnN(grpAttnN: Int): HasLlamaCppProperties.this

    Set the group-attention factor

  162. def setGrpAttnW(grpAttnW: Int): HasLlamaCppProperties.this

    Set the group-attention width

  163. def setIgnoreEos(ignoreEos: Boolean): HasLlamaCppProperties.this

    Set whether to ignore end of stream token and continue generating (implies --logit-bias 2-inf)

  164. def setInputPrefix(inputPrefix: String): HasLlamaCppProperties.this

    Set the prompt to start generation with

  165. def setInputPrefixBos(inputPrefixBos: Boolean): HasLlamaCppProperties.this

    Whether to add prefix BOS to user inputs, preceding the --in-prefix string

  166. def setInputSuffix(inputSuffix: String): HasLlamaCppProperties.this

    Set a suffix for infilling

  167. def setLookupCacheDynamicFilePath(lookupCacheDynamicFilePath: String): HasLlamaCppProperties.this

    Set a model alias

  168. def setLookupCacheStaticFilePath(lookupCacheStaticFilePath: String): HasLlamaCppProperties.this

    Set a model alias

  169. def setLoraAdapters(loraAdapters: HashMap[String, Double]): HasLlamaCppProperties.this

    Sets paths to lora adapters with user defined scale.

    Sets paths to lora adapters with user defined scale. (PySpark Override)

  170. def setLoraAdapters(loraAdapters: Map[String, Float]): HasLlamaCppProperties.this

    Sets paths to lora adapters with user defined scale.

  171. def setMainGpu(mainGpu: Int): HasLlamaCppProperties.this

    Set the GPU that is used for scratch and small tensors

  172. def setMetadata(metadata: String): HasLlamaCppProperties.this

    Set the metadata for the model

  173. def setMinKeep(minKeep: Int): HasLlamaCppProperties.this

    Set the amount of tokens the samplers should return at least (0 = disabled)

  174. def setMinP(minP: Float): HasLlamaCppProperties.this

    Set min-p sampling

  175. def setMiroStat(mirostat: String): HasLlamaCppProperties.this

    Set MiroStat sampling strategies.

    Set MiroStat sampling strategies.

    • DISABLED: No MiroStat
    • V1: MiroStat V1
    • V2: MiroStat V2
  176. def setMiroStatEta(mirostatEta: Float): HasLlamaCppProperties.this

    Set the MiroStat learning rate, parameter eta

  177. def setMiroStatTau(mirostatTau: Float): HasLlamaCppProperties.this

    Set the MiroStat target entropy, parameter tau

  178. def setModelDraft(modelDraft: String): HasLlamaCppProperties.this

    Set the draft model for speculative decoding

  179. def setNBatch(nBatch: Int): HasLlamaCppProperties.this

    Set the logical batch size for prompt processing (must be >=32 to use BLAS)

  180. def setNChunks(nChunks: Int): HasLlamaCppProperties.this

    Set the maximal number of chunks to process

  181. def setNCtx(nCtx: Int): HasLlamaCppProperties.this

    Set the size of the prompt context

  182. def setNDraft(nDraft: Int): HasLlamaCppProperties.this

    Set the number of tokens to draft for speculative decoding

  183. def setNGpuLayers(nGpuLayers: Int): HasLlamaCppProperties.this

    Set the number of layers to store in VRAM (-1 - use default)

  184. def setNGpuLayersDraft(nGpuLayersDraft: Int): HasLlamaCppProperties.this

    Set the number of layers to store in VRAM for the draft model (-1 - use default)

  185. def setNKeep(nKeep: Int): HasLlamaCppProperties.this

    Set the number of tokens to keep from the initial prompt

  186. def setNPredict(nPredict: Int): HasLlamaCppProperties.this

    Set the number of tokens to predict

  187. def setNProbs(nProbs: Int): HasLlamaCppProperties.this

    Set the amount top tokens probabilities to output if greater than 0.

  188. def setNSequences(nSequences: Int): HasLlamaCppProperties.this

    Set the number of sequences to decode

  189. def setNThreads(nThreads: Int): HasLlamaCppProperties.this

    Set the number of threads to use during generation

  190. def setNThreadsBatch(nThreadsBatch: Int): HasLlamaCppProperties.this

    Set the number of threads to use during batch and prompt processing

  191. def setNThreadsBatchDraft(nThreadsBatchDraft: Int): HasLlamaCppProperties.this

    Set the number of threads to use during batch and prompt processing

  192. def setNThreadsDraft(nThreadsDraft: Int): HasLlamaCppProperties.this

    Set the number of threads to use during draft generation

  193. def setNUbatch(nUbatch: Int): HasLlamaCppProperties.this

    Set the physical batch size for prompt processing (must be >=32 to use BLAS)

  194. def setNoKvOffload(noKvOffload: Boolean): HasLlamaCppProperties.this

    Whether to disable KV offload

  195. def setNumaStrategy(numa: String): HasLlamaCppProperties.this

    Set optimization strategies that help on some NUMA systems (if available)

    Set optimization strategies that help on some NUMA systems (if available)

    Available Strategies:

    • DISABLED: No NUMA optimizations
    • DISTRIBUTE: spread execution evenly over all
    • ISOLATE: only spawn threads on CPUs on the node that execution started on
    • NUMA_CTL: use the CPU map provided by numactl
    • MIRROR: Mirrors the model across NUMA nodes
  196. def setPSplit(pSplit: Float): HasLlamaCppProperties.this

    Set the speculative decoding split probability

  197. def setPenalizeNl(penalizeNl: Boolean): HasLlamaCppProperties.this

    Set whether to penalize newline tokens

  198. def setPenaltyPrompt(penaltyPrompt: String): HasLlamaCppProperties.this

    Override which part of the prompt is penalized for repetition.

  199. def setPoolingType(poolingType: String): HasLlamaCppProperties.this

    Set the pooling type for embeddings, use model default if unspecified

    Set the pooling type for embeddings, use model default if unspecified

    • UNSPECIFIED: Don't use any pooling
    • MEAN: Mean Pooling
    • CLS: CLS Pooling
  200. def setPresencePenalty(presencePenalty: Float): HasLlamaCppProperties.this

    Set the repetition alpha presence penalty

  201. def setRepeatLastN(repeatLastN: Int): HasLlamaCppProperties.this

    Set the last n tokens to consider for penalties

  202. def setRepeatPenalty(repeatPenalty: Float): HasLlamaCppProperties.this

    Set the penalty of repeated sequences of tokens

  203. def setRopeFreqBase(ropeFreqBase: Float): HasLlamaCppProperties.this

    Set the RoPE base frequency, used by NTK-aware scaling

  204. def setRopeFreqScale(ropeFreqScale: Float): HasLlamaCppProperties.this

    Set the RoPE frequency scaling factor, expands context by a factor of 1/N

  205. def setRopeScalingType(ropeScalingType: String): HasLlamaCppProperties.this

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    • UNSPECIFIED: Don't use any scaling
    • LINEAR: Linear scaling
    • YARN: YaRN RoPE scaling
  206. def setSamplers(samplers: Array[String]): HasLlamaCppProperties.this

    Set which samplers to use for token generation in the given order .

    Set which samplers to use for token generation in the given order .

    Available Samplers are:

    • TOP_K: Top-k sampling
    • TFS_Z: Tail free sampling
    • TYPICAL_P: Locally typical sampling p
    • TOP_P: Top-p sampling
    • MIN_P: Min-p sampling
    • TEMPERATURE: Temperature sampling
  207. def setSeed(seed: Int): HasLlamaCppProperties.this

    Set the RNG seed

  208. def setStopStrings(stopStrings: Array[String]): HasLlamaCppProperties.this

    Set strings upon seeing which token generation is stopped

  209. def setSystemPrompt(systemPrompt: String): HasLlamaCppProperties.this

    Set a system prompt to use

  210. def setTemperature(temperature: Float): HasLlamaCppProperties.this

    Set the temperature

  211. def setTensorSplit(tensorSplit: Array[Double]): HasLlamaCppProperties.this

    Set how split tensors should be distributed across GPUs

  212. def setTfsZ(tfsZ: Float): HasLlamaCppProperties.this

    Set tail free sampling, parameter z

  213. def setTokenBias(tokenBias: HashMap[String, Double]): HasLlamaCppProperties.this

    Set the tokens to disable during completion.

    Set the tokens to disable during completion. (Override for PySpark)

  214. def setTokenBias(tokenBias: Map[String, Float]): HasLlamaCppProperties.this

    Set the tokens to disable during completion.

  215. def setTokenIdBias(tokenIdBias: HashMap[Integer, Double]): HasLlamaCppProperties.this

    Set the token ids to disable in the completion.

    Set the token ids to disable in the completion. (Override for PySpark)

  216. def setTokenIdBias(tokenIdBias: Map[Int, Float]): HasLlamaCppProperties.this

    Set the token ids to disable in the completion.

  217. def setTopK(topK: Int): HasLlamaCppProperties.this

    Set top-k sampling

  218. def setTopP(topP: Float): HasLlamaCppProperties.this

    Set top-p sampling

  219. def setTypicalP(typicalP: Float): HasLlamaCppProperties.this

    Set locally typical sampling, parameter p

  220. def setUseChatTemplate(useChatTemplate: Boolean): HasLlamaCppProperties.this

    Set whether or not generate should apply a chat template

  221. def setUseMlock(useMlock: Boolean): HasLlamaCppProperties.this

    Whether to force the system to keep model in RAM rather than swapping or compressing

  222. def setUseMmap(useMmap: Boolean): HasLlamaCppProperties.this

    Whether to use memory-map model (faster load but may increase pageouts if not using mlock)

  223. def setYarnAttnFactor(yarnAttnFactor: Float): HasLlamaCppProperties.this

    Set the YaRN scale sqrt(t) or attention magnitude

  224. def setYarnBetaFast(yarnBetaFast: Float): HasLlamaCppProperties.this

    Set the YaRN low correction dim or beta

  225. def setYarnBetaSlow(yarnBetaSlow: Float): HasLlamaCppProperties.this

    Set the YaRN high correction dim or alpha

  226. def setYarnExtFactor(yarnExtFactor: Float): HasLlamaCppProperties.this

    Set the YaRN extrapolation mix factor

  227. def setYarnOrigCtx(yarnOrigCtx: Int): HasLlamaCppProperties.this

    Set the YaRN original context size of model

  228. val stopStrings: StringArrayParam

  229. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  230. val systemPrompt: Param[String]

  231. val temperature: FloatParam

  232. val tensorSplit: DoubleArrayParam

  233. val tfsZ: FloatParam

  234. def toString(): String
    Definition Classes
    AnyRef → Any
  235. val tokenBias: StructFeature[Map[String, Float]]

  236. val tokenIdBias: StructFeature[Map[Int, Float]]
  237. val topK: IntParam

  238. val topP: FloatParam

  239. val typicalP: FloatParam

  240. val useChatTemplate: BooleanParam

  241. val useMlock: BooleanParam

  242. val useMmap: BooleanParam

  243. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  244. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  245. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  246. val yarnAttnFactor: FloatParam

  247. val yarnBetaFast: FloatParam

  248. val yarnBetaSlow: FloatParam

  249. val yarnExtFactor: FloatParam

  250. val yarnOrigCtx: IntParam

Inherited from AnyRef

Inherited from Any

Parameter setters

Parameter getters

Parameters

A list of (hyper-)parameter keys this annotator can take. Users can set and get the parameter values through setters and getters, respectively.

Ungrouped