Spark NLP 5.5.1 ScalaDoc - com.johnsnowlabs.nlp.HasLlamaCppProperties

final def !=(arg0: Any): Boolean

Definition Classes: AnyRef → Any

final def ##(): Int

Definition Classes: AnyRef → Any

final def ==(arg0: Any): Boolean

Definition Classes: AnyRef → Any

final def asInstanceOf[T0]: T0

Definition Classes: Any

val cachePrompt: BooleanParam

val chatTemplate: Param[String]

def clone(): AnyRef

Attributes: protected[lang]
Definition Classes: AnyRef
Annotations: @throws( ... ) @native()

val defragmentationThreshold: FloatParam

val disableTokenIds: IntArrayParam

val dynamicTemperatureExponent: FloatParam

val dynamicTemperatureRange: FloatParam

val embedding: BooleanParam

final def eq(arg0: AnyRef): Boolean

Definition Classes: AnyRef

def equals(arg0: Any): Boolean

Definition Classes: AnyRef → Any

def finalize(): Unit

Attributes: protected[lang]
Definition Classes: AnyRef
Annotations: @throws( classOf[java.lang.Throwable] )

val flashAttention: BooleanParam

val frequencyPenalty: FloatParam

def getCachePrompt: Boolean

def getChatTemplate: String

final def getClass(): Class[_]

Definition Classes: AnyRef → Any
Annotations: @native()

def getDefragmentationThreshold: Float

def getDisableTokenIds: Array[Int]

def getDynamicTemperatureExponent: Float

def getDynamicTemperatureRange: Float

def getEmbedding: Boolean

def getFlashAttention: Boolean

def getFrequencyPenalty: Float

def getGrammar: String

def getGrpAttnN: Int

def getGrpAttnW: Int

def getIgnoreEos: Boolean

def getInferenceParameters: InferenceParameters

Attributes: protected

def getInputPrefix: String

def getInputPrefixBos: Boolean

def getInputSuffix: String

def getLookupCacheDynamicFilePath: String

def getLookupCacheStaticFilePath: String

def getLoraAdapters: Map[String, Float]

def getMainGpu: Int

def getMetadata: String

Get the metadata for the model

def getMinKeep: Int

def getMinP: Float

def getMiroStat: String

def getMiroStatEta: Float

def getMiroStatTau: Float

def getModelDraft: String

def getModelParameters: ModelParameters

Attributes: protected

def getNBatch: Int

def getNChunks: Int

def getNCtx: Int

def getNDraft: Int

def getNGpuLayers: Int

def getNGpuLayersDraft: Int

def getNKeep: Int

def getNPredict: Int

def getNProbs: Int

def getNSequences: Int

def getNThreads: Int

def getNThreadsBatch: Int

def getNThreadsBatchDraft: Int

def getNThreadsDraft: Int

def getNUbatch: Int

def getNoKvOffload: Boolean

def getNuma: String

def getPSplit: Float

def getPenalizeNl: Boolean

def getPenaltyPrompt: String

def getPoolingType: String

def getPresencePenalty: Float

def getRepeatLastN: Int

def getRepeatPenalty: Float

def getRopeFreqBase: Float

def getRopeFreqScale: Float

def getRopeScalingType: String

def getSamplers: Array[String]

def getSeed: Int

def getSplitMode: String

def getStopStrings: Array[String]

def getSystemPrompt: String

def getTemperature: Float

def getTensorSplit: Array[Double]

def getTfsZ: Float

def getTokenBias: Map[String, Float]

def getTokenIdBias: Map[Int, Float]

def getTopK: Int

def getTopP: Float

def getTypicalP: Float

def getUseChatTemplate: Boolean

def getUseMlock: Boolean

def getUseMmap: Boolean

def getYarnAttnFactor: Float

def getYarnBetaFast: Float

def getYarnBetaSlow: Float

def getYarnExtFactor: Float

def getYarnOrigCtx: Int

val gpuSplitMode: Param[String]

Set how to split the model across GPUs

NONE: No GPU split
LAYER: Split the model across GPUs by layer
ROW: Split the model across GPUs by rows

val grammar: Param[String]

val grpAttnN: IntParam

val grpAttnW: IntParam

def hashCode(): Int

Definition Classes: AnyRef → Any
Annotations: @native()

val ignoreEos: BooleanParam

val inputPrefix: Param[String]

val inputPrefixBos: BooleanParam

val inputSuffix: Param[String]

final def isInstanceOf[T0]: Boolean

Definition Classes: Any

val logger: Logger

val lookupCacheDynamicFilePath: Param[String]

val lookupCacheStaticFilePath: Param[String]

val loraAdapters: StructFeature[Map[String, Float]]

val mainGpu: IntParam

val metadata: (HasLlamaCppProperties.this)#ProtectedParam[String]

val minKeep: IntParam

val minP: FloatParam

val miroStat: Param[String]

val miroStatEta: FloatParam

val miroStatTau: FloatParam

val modelDraft: Param[String]

val nBatch: IntParam

val nChunks: IntParam

val nCtx: IntParam

val nDraft: IntParam

val nGpuLayers: IntParam

val nGpuLayersDraft: IntParam

val nKeep: IntParam

val nPredict: IntParam

val nProbs: IntParam

val nSequences: IntParam

val nThreads: IntParam

val nThreadsBatch: IntParam

val nThreadsBatchDraft: IntParam

val nThreadsDraft: IntParam

val nUbatch: IntParam

final def ne(arg0: AnyRef): Boolean

Definition Classes: AnyRef

val noKvOffload: BooleanParam

final def notify(): Unit

Definition Classes: AnyRef
Annotations: @native()

final def notifyAll(): Unit

Definition Classes: AnyRef
Annotations: @native()

val numaStrategy: Param[String]

Set optimization strategies that help on some NUMA systems (if available)

Available Strategies:

DISABLED: No NUMA optimizations
DISTRIBUTE: Spread execution evenly over all
ISOLATE: Only spawn threads on CPUs on the node that execution started on
NUMA_CTL: Use the CPU map provided by numactl
MIRROR: Mirrors the model across NUMA nodes

val pSplit: FloatParam

val penalizeNl: BooleanParam

val penaltyPrompt: Param[String]

val poolingType: Param[String]

Set the pooling type for embeddings, use model default if unspecified

0 UNSPECIFIED: Don't use any pooling
1 MEAN: Mean Pooling
2 CLS: CLS Pooling

val presencePenalty: FloatParam

val repeatLastN: IntParam

val repeatPenalty: FloatParam

val ropeFreqBase: FloatParam

val ropeFreqScale: FloatParam

val ropeScalingType: Param[String]

Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

UNSPECIFIED: Don't use any scaling
LINEAR: Linear scaling
YARN: YaRN RoPE scaling

val samplers: StringArrayParam

val seed: IntParam

def setCachePrompt(cachePrompt: Boolean): HasLlamaCppProperties.this

Whether to remember the prompt to avoid reprocessing it

def setChatTemplate(chatTemplate: String): HasLlamaCppProperties.this

The chat template to use

def setDefragmentationThreshold(defragThold: Float): HasLlamaCppProperties.this

Set the KV cache defragmentation threshold

def setDisableTokenIds(disableTokenIds: Array[Int]): HasLlamaCppProperties.this

Set the token ids to disable in the completion.

Set the token ids to disable in the completion. This corresponds to setTokenBias with a value of Float.NEGATIVE_INFINITY.

def setDynamicTemperatureExponent(dynatempExponent: Float): HasLlamaCppProperties.this

Set the dynamic temperature exponent

def setDynamicTemperatureRange(dynatempRange: Float): HasLlamaCppProperties.this

Set the dynamic temperature range

def setEmbedding(embedding: Boolean): HasLlamaCppProperties.this

Whether to load model with embedding support

def setFlashAttention(flashAttention: Boolean): HasLlamaCppProperties.this

Whether to enable Flash Attention

def setFrequencyPenalty(frequencyPenalty: Float): HasLlamaCppProperties.this

Set the repetition alpha frequency penalty

def setGpuSplitMode(splitMode: String): HasLlamaCppProperties.this

Set how to split the model across GPUs

NONE: No GPU split -LAYER: Split the model across GPUs by layer 2. ROW: Split the model across GPUs by rows

def setGrammar(grammar: String): HasLlamaCppProperties.this

Set BNF-like grammar to constrain generations

def setGrpAttnN(grpAttnN: Int): HasLlamaCppProperties.this

Set the group-attention factor

def setGrpAttnW(grpAttnW: Int): HasLlamaCppProperties.this

Set the group-attention width

def setIgnoreEos(ignoreEos: Boolean): HasLlamaCppProperties.this

Set whether to ignore end of stream token and continue generating (implies --logit-bias 2-inf)

def setInputPrefix(inputPrefix: String): HasLlamaCppProperties.this

Set the prompt to start generation with

def setInputPrefixBos(inputPrefixBos: Boolean): HasLlamaCppProperties.this

Whether to add prefix BOS to user inputs, preceding the --in-prefix string

def setInputSuffix(inputSuffix: String): HasLlamaCppProperties.this

Set a suffix for infilling

def setLookupCacheDynamicFilePath(lookupCacheDynamicFilePath: String): HasLlamaCppProperties.this

Set a model alias

def setLookupCacheStaticFilePath(lookupCacheStaticFilePath: String): HasLlamaCppProperties.this

Set a model alias

def setLoraAdapters(loraAdapters: HashMap[String, Double]): HasLlamaCppProperties.this

Sets paths to lora adapters with user defined scale.

Sets paths to lora adapters with user defined scale. (PySpark Override)

def setLoraAdapters(loraAdapters: Map[String, Float]): HasLlamaCppProperties.this

Sets paths to lora adapters with user defined scale.

def setMainGpu(mainGpu: Int): HasLlamaCppProperties.this

Set the GPU that is used for scratch and small tensors

def setMetadata(metadata: String): HasLlamaCppProperties.this

Set the metadata for the model

def setMinKeep(minKeep: Int): HasLlamaCppProperties.this

Set the amount of tokens the samplers should return at least (0 = disabled)

def setMinP(minP: Float): HasLlamaCppProperties.this

Set min-p sampling

def setMiroStat(mirostat: String): HasLlamaCppProperties.this

Set MiroStat sampling strategies.

DISABLED: No MiroStat
V1: MiroStat V1
V2: MiroStat V2

def setMiroStatEta(mirostatEta: Float): HasLlamaCppProperties.this

Set the MiroStat learning rate, parameter eta

def setMiroStatTau(mirostatTau: Float): HasLlamaCppProperties.this

Set the MiroStat target entropy, parameter tau

def setModelDraft(modelDraft: String): HasLlamaCppProperties.this

Set the draft model for speculative decoding

def setNBatch(nBatch: Int): HasLlamaCppProperties.this

Set the logical batch size for prompt processing (must be >=32 to use BLAS)

def setNChunks(nChunks: Int): HasLlamaCppProperties.this

Set the maximal number of chunks to process

def setNCtx(nCtx: Int): HasLlamaCppProperties.this

Set the size of the prompt context

def setNDraft(nDraft: Int): HasLlamaCppProperties.this

Set the number of tokens to draft for speculative decoding

def setNGpuLayers(nGpuLayers: Int): HasLlamaCppProperties.this

Set the number of layers to store in VRAM (-1 - use default)

def setNGpuLayersDraft(nGpuLayersDraft: Int): HasLlamaCppProperties.this

Set the number of layers to store in VRAM for the draft model (-1 - use default)

def setNKeep(nKeep: Int): HasLlamaCppProperties.this

Set the number of tokens to keep from the initial prompt

def setNPredict(nPredict: Int): HasLlamaCppProperties.this

Set the number of tokens to predict

def setNProbs(nProbs: Int): HasLlamaCppProperties.this

Set the amount top tokens probabilities to output if greater than 0.

def setNSequences(nSequences: Int): HasLlamaCppProperties.this

Set the number of sequences to decode

def setNThreads(nThreads: Int): HasLlamaCppProperties.this

Set the number of threads to use during generation

def setNThreadsBatch(nThreadsBatch: Int): HasLlamaCppProperties.this

Set the number of threads to use during batch and prompt processing

def setNThreadsBatchDraft(nThreadsBatchDraft: Int): HasLlamaCppProperties.this

Set the number of threads to use during batch and prompt processing

def setNThreadsDraft(nThreadsDraft: Int): HasLlamaCppProperties.this

Set the number of threads to use during draft generation

def setNUbatch(nUbatch: Int): HasLlamaCppProperties.this

Set the physical batch size for prompt processing (must be >=32 to use BLAS)

def setNoKvOffload(noKvOffload: Boolean): HasLlamaCppProperties.this

Whether to disable KV offload

def setNumaStrategy(numa: String): HasLlamaCppProperties.this

Set optimization strategies that help on some NUMA systems (if available)

Available Strategies:

DISABLED: No NUMA optimizations
DISTRIBUTE: spread execution evenly over all
ISOLATE: only spawn threads on CPUs on the node that execution started on
NUMA_CTL: use the CPU map provided by numactl
MIRROR: Mirrors the model across NUMA nodes

def setPSplit(pSplit: Float): HasLlamaCppProperties.this

Set the speculative decoding split probability

def setPenalizeNl(penalizeNl: Boolean): HasLlamaCppProperties.this

Set whether to penalize newline tokens

def setPenaltyPrompt(penaltyPrompt: String): HasLlamaCppProperties.this

Override which part of the prompt is penalized for repetition.

def setPoolingType(poolingType: String): HasLlamaCppProperties.this

Set the pooling type for embeddings, use model default if unspecified

UNSPECIFIED: Don't use any pooling
MEAN: Mean Pooling
CLS: CLS Pooling

def setPresencePenalty(presencePenalty: Float): HasLlamaCppProperties.this

Set the repetition alpha presence penalty

def setRepeatLastN(repeatLastN: Int): HasLlamaCppProperties.this

Set the last n tokens to consider for penalties

def setRepeatPenalty(repeatPenalty: Float): HasLlamaCppProperties.this

Set the penalty of repeated sequences of tokens

def setRopeFreqBase(ropeFreqBase: Float): HasLlamaCppProperties.this

Set the RoPE base frequency, used by NTK-aware scaling

def setRopeFreqScale(ropeFreqScale: Float): HasLlamaCppProperties.this

Set the RoPE frequency scaling factor, expands context by a factor of 1/N

def setRopeScalingType(ropeScalingType: String): HasLlamaCppProperties.this

Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

UNSPECIFIED: Don't use any scaling
LINEAR: Linear scaling
YARN: YaRN RoPE scaling

def setSamplers(samplers: Array[String]): HasLlamaCppProperties.this

Set which samplers to use for token generation in the given order .

Available Samplers are:

TOP_K: Top-k sampling
TFS_Z: Tail free sampling
TYPICAL_P: Locally typical sampling p
TOP_P: Top-p sampling
MIN_P: Min-p sampling
TEMPERATURE: Temperature sampling

def setSeed(seed: Int): HasLlamaCppProperties.this

Set the RNG seed

def setStopStrings(stopStrings: Array[String]): HasLlamaCppProperties.this

Set strings upon seeing which token generation is stopped

def setSystemPrompt(systemPrompt: String): HasLlamaCppProperties.this

Set a system prompt to use

def setTemperature(temperature: Float): HasLlamaCppProperties.this

Set the temperature

def setTensorSplit(tensorSplit: Array[Double]): HasLlamaCppProperties.this

Set how split tensors should be distributed across GPUs

def setTfsZ(tfsZ: Float): HasLlamaCppProperties.this

Set tail free sampling, parameter z

def setTokenBias(tokenBias: HashMap[String, Double]): HasLlamaCppProperties.this

Set the tokens to disable during completion.

Set the tokens to disable during completion. (Override for PySpark)

def setTokenBias(tokenBias: Map[String, Float]): HasLlamaCppProperties.this

Set the tokens to disable during completion.

def setTokenIdBias(tokenIdBias: HashMap[Integer, Double]): HasLlamaCppProperties.this

Set the token ids to disable in the completion.

Set the token ids to disable in the completion. (Override for PySpark)

def setTokenIdBias(tokenIdBias: Map[Int, Float]): HasLlamaCppProperties.this

Set the token ids to disable in the completion.

def setTopK(topK: Int): HasLlamaCppProperties.this

Set top-k sampling

def setTopP(topP: Float): HasLlamaCppProperties.this

Set top-p sampling

def setTypicalP(typicalP: Float): HasLlamaCppProperties.this

Set locally typical sampling, parameter p

def setUseChatTemplate(useChatTemplate: Boolean): HasLlamaCppProperties.this

Set whether or not generate should apply a chat template

def setUseMlock(useMlock: Boolean): HasLlamaCppProperties.this

Whether to force the system to keep model in RAM rather than swapping or compressing

def setUseMmap(useMmap: Boolean): HasLlamaCppProperties.this

Whether to use memory-map model (faster load but may increase pageouts if not using mlock)

def setYarnAttnFactor(yarnAttnFactor: Float): HasLlamaCppProperties.this

Set the YaRN scale sqrt(t) or attention magnitude

def setYarnBetaFast(yarnBetaFast: Float): HasLlamaCppProperties.this

Set the YaRN low correction dim or beta

def setYarnBetaSlow(yarnBetaSlow: Float): HasLlamaCppProperties.this

Set the YaRN high correction dim or alpha

def setYarnExtFactor(yarnExtFactor: Float): HasLlamaCppProperties.this

Set the YaRN extrapolation mix factor

def setYarnOrigCtx(yarnOrigCtx: Int): HasLlamaCppProperties.this

Set the YaRN original context size of model

val stopStrings: StringArrayParam

final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes: AnyRef

val systemPrompt: Param[String]

val temperature: FloatParam

val tensorSplit: DoubleArrayParam

val tfsZ: FloatParam

def toString(): String

Definition Classes: AnyRef → Any

val tokenBias: StructFeature[Map[String, Float]]

val tokenIdBias: StructFeature[Map[Int, Float]]

val topK: IntParam

val topP: FloatParam

val typicalP: FloatParam

val useChatTemplate: BooleanParam

val useMlock: BooleanParam

val useMmap: BooleanParam

final def wait(): Unit

Definition Classes: AnyRef
Annotations: @throws( ... )

final def wait(arg0: Long, arg1: Int): Unit

Definition Classes: AnyRef
Annotations: @throws( ... )

final def wait(arg0: Long): Unit

Definition Classes: AnyRef
Annotations: @throws( ... ) @native()

val yarnAttnFactor: FloatParam

val yarnBetaFast: FloatParam

val yarnBetaSlow: FloatParam

val yarnExtFactor: FloatParam

val yarnOrigCtx: IntParam

Packages

HasLlamaCppProperties

trait HasLlamaCppProperties extends AnyRef

Value Members

Inherited from AnyRef

Inherited from Any

Parameter setters

Parameter getters

Parameters

Ungrouped

Packages

HasLlamaCppProperties 

trait HasLlamaCppProperties extends AnyRef

Value Members

Inherited from AnyRef

Inherited from Any

Parameter setters

Parameter getters

Parameters

Ungrouped

HasLlamaCppProperties