diff --git a/misc/llama-cpp/Makefile b/misc/llama-cpp/Makefile index fafd959c3860..37c41e840c80 100644 --- a/misc/llama-cpp/Makefile +++ b/misc/llama-cpp/Makefile @@ -1,64 +1,75 @@ PORTNAME= llama-cpp DISTVERSIONPREFIX= b DISTVERSION= 5054 +PORTREVISION= 1 CATEGORIES= misc # machine-learning MAINTAINER= yuri@FreeBSD.org COMMENT= Facebook's LLaMA model in C/C++ # ' WWW= https://github.com/ggerganov/llama.cpp LICENSE= MIT LICENSE_FILE= ${WRKSRC}/LICENSE BROKEN_armv7= clang crashes, see https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=278810 BROKEN_i386= compilation fails, see https://github.com/ggerganov/llama.cpp/issues/9545 USES= cmake:testing compiler:c++11-lang python:run shebangfix USE_LDCONFIG= yes USE_GITHUB= yes GH_ACCOUNT= ggerganov GH_PROJECT= llama.cpp GH_TUPLE= nomic-ai:kompute:4565194:kompute/kompute SHEBANG_GLOB= *.py CMAKE_ON= BUILD_SHARED_LIBS CMAKE_OFF= LLAMA_BUILD_TESTS CMAKE_TESTING_ON= LLAMA_BUILD_TESTS +# user for llama-server, only used when EXAMPLES=ON +USER= nobody +SUB_LIST= USER=${USER} + OPTIONS_DEFINE= CURL EXAMPLES VULKAN OPTIONS_DEFAULT= CURL VULKAN OPTIONS_SUB= yes CURL_DESCR= Use libcurl to download model from an URL CURL_CMAKE_BOOL= LLAMA_CURL CURL_USES= localbase CURL_LIB_DEPENDS= libcurl.so:ftp/curl EXAMPLES_CMAKE_BOOL= LLAMA_BUILD_EXAMPLES VULKAN_DESC= Vulkan GPU offload support VULKAN_CMAKE_BOOL= GGML_VULKAN VULKAN_BUILD_DEPENDS= glslc:graphics/shaderc \ vulkan-headers>0:graphics/vulkan-headers VULKAN_LIB_DEPENDS= libvulkan.so:graphics/vulkan-loader BINARY_ALIAS= git=false \ python=${PYTHON_CMD} # for tests do-test-ci: # build of tests fails, see https://github.com/ggerganov/llama.cpp/issues/10955 @cd ${WRKSRC} && \ ${SETENV} ${MAKE_ENV} bash ci/run.sh ./tmp/results ./tmp/mnt +.include + +.if ${PORT_OPTIONS:MEXAMPLES} +USE_RC_SUBR= llama-server +.endif + # tests as of 4458: 97% tests passed, 1 tests failed out of 31, see https://github.com/ggerganov/llama.cpp/issues/11036 # tests as of 4649: # 88% tests passed, 4 tests failed out of 32 # The following tests FAILED: # 18 - test-chat (Subprocess aborted) main # see https://github.com/ggerganov/llama.cpp/issues/11705 # 24 - test-gguf (SEGFAULT) main # 25 - test-backend-ops (SEGFAULT) main # 32 - test-eval-callback (SEGFAULT) curl eval-callback .include diff --git a/misc/llama-cpp/files/llama-server.in b/misc/llama-cpp/files/llama-server.in new file mode 100644 index 000000000000..d3e564ee488c --- /dev/null +++ b/misc/llama-cpp/files/llama-server.in @@ -0,0 +1,66 @@ +#!/bin/sh + +# +# PROVIDE: llama_server +# REQUIRE: LOGIN +# KEYWORD: shutdown + +# Add the following lines to /etc/rc.conf to enable llama_server +# llama_server_enable="YES" +# +# llama_server_enable (bool): Set to YES to enable llama_server +# Default: NO +# llama_server_user (str): llama_server daemon user +# Default: %%USER%% +# llama_server_model (str): AI model that llama-server will use +# Default: "" (required) +# llama_server_args (str): Additional arguments for llama-server +# Default: "" (optional) +# llama_server_log (str): Log file that llama-server will write log to +# Default: "/var/log/llama-server.log" (optional) +# llama_server_pidfile (str): Pidfile file that llama-server's pid will be written to +# Default: "" (optional) + +. /etc/rc.subr + +name="llama_server" +rcvar=llama_server_enable +load_rc_config $name + +: ${llama_server_enable:="NO"} +: ${llama_server_user:="%%USER%%"} +: ${llama_server_model:=""} +: ${llama_server_args:=""} +: ${llama_server_log:="/var/log/llama-server.log"} +: ${llama_server_pidfile:="/var/run/${name}.pid"} + +run_command="%%PREFIX%%/bin/llama-server" +procname="${run_command}" +pidfile=${llama_server_pidfile} +command=/usr/sbin/daemon +command_args="-f -t ${name} -p ${pidfile} -o ${llama_server_log} ${run_command} -m ${llama_server_model} ${llama_server_args} --keep -1" +start_precmd="llama_server_precmd" +llama_server_chdir=/tmp + +llama_server_precmd() +{ + # check model + if [ -z "${llama_server_model}" ]; then + echo "llama_server_model isn't set, it is required" + exit 1 + fi + if [ ! -f "${llama_server_model}" ]; then + echo "llama_server_model isn't a file" + exit 1 + fi + + # initialize pifile + #install -o ${llama_server_user} /dev/null ${llama_server_pidfile} + + # ensure that the log file exists and has right permissions + touch ${llama_server_log} + chown ${llama_server_user} ${llama_server_log} + chmod 640 ${llama_server_log} +} + +run_rc_command "$1" diff --git a/misc/llama-cpp/pkg-message b/misc/llama-cpp/pkg-message index 071e82665d9a..157a4db6ea78 100644 --- a/misc/llama-cpp/pkg-message +++ b/misc/llama-cpp/pkg-message @@ -1,17 +1,27 @@ [ { type: install message: < llama_server_enable=YES +> llama_server_model=/path/to/models/llama-2-7b-chat.Q4_K_M.gguf +> llama_server_args="--device Vulkan0 -ngl 27" EOM } ]