diff --git a/misc/llama-cpp/Makefile b/misc/llama-cpp/Makefile
index cde8ceb39f81..d56bdbf3dd40 100644
--- a/misc/llama-cpp/Makefile
+++ b/misc/llama-cpp/Makefile
@@ -1,83 +1,79 @@
 PORTNAME=	llama-cpp
 DISTVERSIONPREFIX=	b
 DISTVERSION=	8895
+PORTREVISION=	1
 CATEGORIES=	misc # machine-learning
 
 MAINTAINER=	yuri@FreeBSD.org
 COMMENT=	Facebook's LLaMA model in C/C++ # '
 WWW=		https://github.com/ggerganov/llama.cpp
 
 LICENSE=	MIT
 LICENSE_FILE=	${WRKSRC}/LICENSE
 
 BROKEN_armv7=	clang crashes, see https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=278810
 BROKEN_i386=	compilation fails, see https://github.com/ggerganov/llama.cpp/issues/9545
 
 LIB_DEPENDS=	libggml-base.so:misc/ggml
 
 USES=		cmake:testing compiler:c++11-lang python:run shebangfix
 USE_LDCONFIG=	yes
 
 USE_GITHUB=	yes
 GH_ACCOUNT=	ggerganov
 GH_PROJECT=	llama.cpp
 GH_TUPLE=	nomic-ai:kompute:4565194:kompute/kompute
 
 SHEBANG_GLOB=	*.py
 
 CMAKE_ON=	BUILD_SHARED_LIBS \
 		LLAMA_USE_SYSTEM_GGML
 CMAKE_OFF=	GGML_NATIVE \
 		FREEBSD_ALLOW_ADVANCED_CPU_FEATURES \
 		LLAMA_BUILD_TESTS
 CMAKE_TESTING_ON=	LLAMA_BUILD_TESTS
 
 # user for llama-server, only used when EXAMPLES=ON
 USER=		nobody
 SUB_LIST=	USER=${USER}
 
-OPTIONS_DEFINE=		CURL EXAMPLES VULKAN
-OPTIONS_DEFAULT=	CURL VULKAN
+OPTIONS_DEFINE=		CURL EXAMPLES
+OPTIONS_DEFAULT=	CURL EXAMPLES
 OPTIONS_SUB=		yes
 
 CURL_DESCR=		Use libcurl to download model from an URL
 CURL_CMAKE_BOOL=	LLAMA_CURL
 CURL_USES=		localbase
 CURL_LIB_DEPENDS=	libcurl.so:ftp/curl
 
 EXAMPLES_CMAKE_BOOL=	LLAMA_BUILD_EXAMPLES
 
-VULKAN_CMAKE_BOOL=	GGML_VULKAN
-VULKAN_BUILD_DEPENDS=	glslc:graphics/shaderc \
-			vulkan-headers>0:graphics/vulkan-headers
-VULKAN_LIB_DEPENDS=	libvulkan.so:graphics/vulkan-loader
-
 BINARY_ALIAS=	git=false \
 		python=${PYTHON_CMD} # for tests
 
 post-patch: # set version in the code
 	@${REINPLACE_CMD} \
 		-e "s|set(BUILD_NUMBER 0)|set(BUILD_NUMBER ${DISTVERSION})|" \
 		${WRKSRC}/cmake/build-info.cmake
 
 do-test-ci: # build of tests fails, see https://github.com/ggerganov/llama.cpp/issues/10955
 	@cd ${WRKSRC} && \
 		${SETENV} ${MAKE_ENV} bash ci/run.sh ./tmp/results ./tmp/mnt
 
 .include <bsd.port.options.mk>
 
 .if ${PORT_OPTIONS:MEXAMPLES}
 USE_RC_SUBR=	llama-server
 .endif
 
 # tests as of 4458: 97% tests passed, 1 tests failed out of 31, see https://github.com/ggerganov/llama.cpp/issues/11036
 
 # tests as of 4649:
 # 88% tests passed, 4 tests failed out of 32
 # The following tests FAILED:
 #         18 - test-chat (Subprocess aborted)                    main   # see https://github.com/ggerganov/llama.cpp/issues/11705
 #         24 - test-gguf (SEGFAULT)                              main
 #         25 - test-backend-ops (SEGFAULT)                       main
 #         32 - test-eval-callback (SEGFAULT)                     curl eval-callback
 
 .include <bsd.port.mk>
diff --git a/misc/llama-cpp/files/llama-server.in b/misc/llama-cpp/files/llama-server.in
index 6eec15953978..82992f661ca3 100644
--- a/misc/llama-cpp/files/llama-server.in
+++ b/misc/llama-cpp/files/llama-server.in
@@ -1,63 +1,62 @@
 #!/bin/sh
 
 #
 # PROVIDE: llama_server
 # REQUIRE: LOGIN
 # KEYWORD: shutdown
 
 # Add the following lines to /etc/rc.conf to enable llama_server
 # llama_server_enable="YES"
 #
 # llama_server_enable (bool): 	Set to YES to enable llama_server
 #				Default: NO
 # llama_server_user (str):	llama_server daemon user
 #				Default: %%USER%%
 # llama_server_model (str):	AI model that llama-server will use
-#				Default: "" (required)
+#				Default: "" (not required)
 # llama_server_args (str):	Additional arguments for llama-server
 #				Default: "" (optional)
 # llama_server_log (str):	Log file that llama-server will write log to
 #				Default: "/var/log/llama-server.log" (optional)
 # llama_server_pidfile (str):	Pidfile file that llama-server's pid will be written to
 #				Default: "/var/run/llama_server.pid" (optional)
 
 . /etc/rc.subr
 
 name="llama_server"
 rcvar=llama_server_enable
 load_rc_config $name
 
 : ${llama_server_enable:="NO"}
 : ${llama_server_user:="%%USER%%"}
 : ${llama_server_model:=""}
 : ${llama_server_args:=""}
 : ${llama_server_log:="/var/log/llama-server.log"}
 : ${llama_server_pidfile:="/var/run/${name}.pid"}
 
 run_command="%%PREFIX%%/bin/llama-server"
 procname="${run_command}"
 pidfile=${llama_server_pidfile}
 command=/usr/sbin/daemon
-command_args="-f -t ${name} -p ${pidfile} -o ${llama_server_log} ${run_command} -m ${llama_server_model} ${llama_server_args}"
+command_args="-f -t ${name} -p ${pidfile} -o ${llama_server_log} ${run_command} ${llama_server_args}"
 start_precmd="llama_server_precmd"
 llama_server_chdir=/tmp
 
 llama_server_precmd()
 {
 	# check model
-	if [ -z "${llama_server_model}" ]; then
-		echo "llama_server_model isn't set, it is required"
-		exit 1
-	fi
-	if [ ! -f "${llama_server_model}" ]; then
-		echo "llama_server_model isn't a file"
-		exit 1
+	if [ -n "${llama_server_model}" ]; then
+		if [ ! -f "${llama_server_model}" ]; then
+			echo "llama_server_model isn't a file"
+			exit 1
+		fi
+		command_args="${command_args} -m ${llama_server_model}"
 	fi
 
 	# ensure that the log file exists and has right permissions
 	touch ${llama_server_log} ${pidfile}
 	chown ${llama_server_user} ${llama_server_log} ${pidfile}
 	chmod 640 ${llama_server_log}
 }
 
 run_rc_command "$1"
diff --git a/misc/llama-cpp/pkg-message b/misc/llama-cpp/pkg-message
index 157a4db6ea78..15534c8fd534 100644
--- a/misc/llama-cpp/pkg-message
+++ b/misc/llama-cpp/pkg-message
@@ -1,27 +1,36 @@
 [
 { type: install
   message: <<EOM
 You installed LLaMA-cpp: Facebook's LLaMA model runner.
 
 In order to experience LLaMA-cpp please download some
 AI model in the GGUF format, for example from huggingface.com,
 run the script below, and open localhost:9011 in your browser
 to communicate with this AI model.
 
 $ llama-server -m $MODEL \
   --host 0.0.0.0 \
   --port 9011 \
   -ngl 15
 
 or
 
 you can add the following lines to /etc/rc.conf,
 start the llama-server service,
 and navigate to http://localhost:8080:
 > llama_server_enable=YES
 > llama_server_model=/path/to/models/llama-2-7b-chat.Q4_K_M.gguf
 > llama_server_args="--device Vulkan0 -ngl 27"
 
+In order to use the multi-model feature do not use llama_server_model.
+Instead add the argument "--models-preset /path/to/models.ini"
+Add pre-downloaded models into models.ini, for example:
+[Qwen3.5-35B-A3B-Uncensored]
+model = /path/to/Qwen3.5-35B-A3B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf
+
+You can switch to the CPU-only operation by choosing the port option
+VULKAN=OFF in misc/ggml (not in llama-cpp).
+
 EOM
 }
 ]