Description
In the past five months since Qwen2-VL’s release, numerous developers have built new models on the Qwen2-VL vision-language models, providing us with valuable feedback. During this period, we focused on building more useful vision-language models. Today, we are excited to introduce the latest addition to the Qwen family: Qwen2.5-VL.
Imported from https://huggingface.co/ggml-org/Qwen2.5-VL-3B-Instruct-GGUF
Predicted Entities
How to use
import sparknlp
from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.ml import Pipeline
from pyspark.sql.functions import lit
documentAssembler = DocumentAssembler() \
.setInputCol("caption") \
.setOutputCol("caption_document")
imageAssembler = ImageAssembler() \
.setInputCol("image") \
.setOutputCol("image_assembler")
imagesPath = "src/test/resources/image/"
data = ImageAssembler \
.loadImagesAsBytes(spark, imagesPath) \
.withColumn("caption", lit("Caption this image.")) # Add a caption to each image.
nPredict = 40
model = AutoGGUFVisionModel.pretrained("Qwen2.5_VL_3B_Instruct_Q4_K_M_gguf") \
.setInputCols(["caption_document", "image_assembler"]) \
.setOutputCol("completions") \
.setBatchSize(4) \
.setNGpuLayers(99) \
.setNCtx(4096) \
.setNPredict(nPredict)
pipeline = Pipeline().setStages([documentAssembler, imageAssembler, model])
pipeline.fit(data).transform(data) \
.selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "completions.result") \
.show(truncate = False)
import com.johnsnowlabs.nlp.ImageAssembler
import com.johnsnowlabs.nlp.annotator._
import com.johnsnowlabs.nlp.base._
import org.apache.spark.ml.Pipeline
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions.lit
val documentAssembler = new DocumentAssembler()
.setInputCol("caption")
.setOutputCol("caption_document")
val imageAssembler = new ImageAssembler()
.setInputCol("image")
.setOutputCol("image_assembler")
val imagesPath = "src/test/resources/image/"
val data: DataFrame = ImageAssembler
.loadImagesAsBytes(ResourceHelper.spark, imagesPath)
.withColumn("caption", lit("Caption this image.")) // Add a caption to each image.
val nPredict = 40
val model = AutoGGUFVisionModel.pretrained("Qwen2.5_VL_3B_Instruct_Q4_K_M_gguf")
.setInputCols("caption_document", "image_assembler")
.setOutputCol("completions")
.setBatchSize(4)
.setNGpuLayers(99)
.setNCtx(4096)
.setNPredict(nPredict)
val pipeline = new Pipeline().setStages(Array(documentAssembler, imageAssembler, model))
pipeline
.fit(data)
.transform(data)
.selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "completions.result")
.show(truncate = false)
Model Information
Model Name: | Qwen2.5_VL_3B_Instruct_Q4_K_M_gguf |
Compatibility: | Spark NLP 6.1.1+ |
License: | Open Source |
Edition: | Official |
Input Labels: | [caption_document, image_assembler] |
Output Labels: | [completions] |
Language: | en |
Size: | 2.6 GB |