Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/build.yml
#	README.md
#	common/common.cpp
#	examples/embedding/embedding.cpp
#	examples/json_schema_to_grammar.py
#	examples/llama.android/llama/src/main/cpp/llama-android.cpp
#	examples/llama.swiftui/README.md
#	examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj
#	examples/lookahead/lookahead.cpp
#	examples/parallel/parallel.cpp
#	examples/passkey/passkey.cpp
#	ggml/CMakeLists.txt
#	ggml/src/CMakeLists.txt
#	ggml/src/ggml-cpu/CMakeLists.txt
#	requirements.txt
#	requirements/requirements-all.txt
#	scripts/fetch_server_test_models.py
#	tests/test-chat.cpp
#	tests/test-json-schema-to-grammar.cpp
This commit is contained in:
Concedo 2025-03-06 18:54:58 +08:00
commit ec43d2b147
52 changed files with 5600 additions and 946 deletions

519
build-xcframework.sh Executable file
View file

@ -0,0 +1,519 @@
#!/bin/bash
#
# Options
IOS_MIN_OS_VERSION=16.4
MACOS_MIN_OS_VERSION=13.3
VISIONOS_MIN_OS_VERSION=1.0
TVOS_MIN_OS_VERSION=16.4
BUILD_SHARED_LIBS=OFF
LLAMA_BUILD_EXAMPLES=OFF
LLAMA_BUILD_TESTS=OFF
LLAMA_BUILD_SERVER=OFF
GGML_METAL=ON
GGML_METAL_EMBED_LIBRARY=ON
GGML_BLAS_DEFAULT=ON
GGML_METAL_USE_BF16=ON
GGML_OPENMP=OFF
COMMON_C_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g"
COMMON_CXX_FLAGS="-Wno-macro-redefined -Wno-shorten-64-to-32 -Wno-unused-command-line-argument -g"
# Common options for all builds
COMMON_CMAKE_ARGS=(
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED=NO
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY=""
-DCMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED=NO
-DCMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT="dwarf-with-dsym"
-DCMAKE_XCODE_ATTRIBUTE_GCC_GENERATE_DEBUGGING_SYMBOLS=YES
-DCMAKE_XCODE_ATTRIBUTE_COPY_PHASE_STRIP=NO
-DCMAKE_XCODE_ATTRIBUTE_STRIP_INSTALLED_PRODUCT=NO
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
-DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
-DLLAMA_BUILD_EXAMPLES=${LLAMA_BUILD_EXAMPLES}
-DLLAMA_BUILD_TESTS=${LLAMA_BUILD_TESTS}
-DLLAMA_BUILD_SERVER=${LLAMA_BUILD_SERVER}
-DGGML_METAL_EMBED_LIBRARY=${GGML_METAL_EMBED_LIBRARY}
-DGGML_BLAS_DEFAULT=${GGML_BLAS_DEFAULT}
-DGGML_METAL=${GGML_METAL}
-DGGML_METAL_USE_BF16=${GGML_METAL_USE_BF16}
-DGGML_NATIVE=OFF
-DGGML_OPENMP=${GGML_OPENMP}
)
check_required_tool() {
local tool=$1
local install_message=$2
if ! command -v $tool &> /dev/null; then
echo "Error: $tool is required but not found."
echo "$install_message"
exit 1
fi
}
echo "Checking for required tools..."
check_required_tool "cmake" "Please install CMake 3.28.0 or later (brew install cmake)"
check_required_tool "xcodebuild" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"
check_required_tool "libtool" "Please install libtool which should be available with Xcode Command Line Tools (CLT). Make sure Xcode CLT is installed (xcode-select --install)"
check_required_tool "dsymutil" "Please install Xcode and Xcode Command Line Tools (xcode-select --install)"
set -e
## Clean up previous builds
rm -rf build-apple
rm -rf build-ios-sim
rm -rf build-ios-device
rm -rf build-macos
rm -rf build-visionos
rm -rf build-visionos-sim
rm -rf build-tvos-sim
rm -rf build-tvos-device
# Setup the xcframework build directory structure
setup_framework_structure() {
local build_dir=$1
local min_os_version=$2
local platform=$3 # "ios", "macos", "visionos", or "tvos"
local framework_name="llama"
echo "Creating ${platform}-style framework structure for ${build_dir}"
if [[ "$platform" == "macos" ]]; then
# macOS versioned structure uses versioned directories
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Headers
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Modules
mkdir -p ${build_dir}/framework/${framework_name}.framework/Versions/A/Resources
# Create symbolic links
ln -sf A ${build_dir}/framework/${framework_name}.framework/Versions/Current
ln -sf Versions/Current/Headers ${build_dir}/framework/${framework_name}.framework/Headers
ln -sf Versions/Current/Modules ${build_dir}/framework/${framework_name}.framework/Modules
ln -sf Versions/Current/Resources ${build_dir}/framework/${framework_name}.framework/Resources
ln -sf Versions/Current/${framework_name} ${build_dir}/framework/${framework_name}.framework/${framework_name}
# Set header and module paths
local header_path=${build_dir}/framework/${framework_name}.framework/Versions/A/Headers/
local module_path=${build_dir}/framework/${framework_name}.framework/Versions/A/Modules/
else
# iOS/VisionOS/tvOS use a flat structure
mkdir -p ${build_dir}/framework/${framework_name}.framework/Headers
mkdir -p ${build_dir}/framework/${framework_name}.framework/Modules
# Remove any existing structure to ensure clean build
rm -rf ${build_dir}/framework/${framework_name}.framework/Versions
# Set header and module paths
local header_path=${build_dir}/framework/${framework_name}.framework/Headers/
local module_path=${build_dir}/framework/${framework_name}.framework/Modules/
fi
# Copy all required headers (common for all platforms)
cp include/llama.h ${header_path}
cp ggml/include/ggml.h ${header_path}
cp ggml/include/ggml-alloc.h ${header_path}
cp ggml/include/ggml-backend.h ${header_path}
cp ggml/include/ggml-metal.h ${header_path}
cp ggml/include/ggml-cpu.h ${header_path}
cp ggml/include/ggml-blas.h ${header_path}
cp ggml/include/gguf.h ${header_path}
# Create module map (common for all platforms)
cat > ${module_path}module.modulemap << EOF
framework module llama {
header "llama.h"
header "ggml.h"
header "ggml-alloc.h"
header "ggml-backend.h"
header "ggml-metal.h"
header "ggml-cpu.h"
header "ggml-blas.h"
header "gguf.h"
link "c++"
link framework "Accelerate"
link framework "Metal"
link framework "Foundation"
export *
}
EOF
# Platform-specific settings for Info.plist
local platform_name=""
local sdk_name=""
local supported_platform=""
case "$platform" in
"ios")
platform_name="iphoneos"
sdk_name="iphoneos${min_os_version}"
supported_platform="iPhoneOS"
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
local device_family=' <key>UIDeviceFamily</key>
<array>
<integer>1</integer>
<integer>2</integer>
</array>'
;;
"macos")
platform_name="macosx"
sdk_name="macosx${min_os_version}"
supported_platform="MacOSX"
local plist_path="${build_dir}/framework/${framework_name}.framework/Versions/A/Resources/Info.plist"
local device_family=""
;;
"visionos")
platform_name="xros"
sdk_name="xros${min_os_version}"
supported_platform="XRPlatform"
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
local device_family=""
;;
"tvos")
platform_name="appletvos"
sdk_name="appletvos${min_os_version}"
supported_platform="AppleTVOS"
local plist_path="${build_dir}/framework/${framework_name}.framework/Info.plist"
local device_family=' <key>UIDeviceFamily</key>
<array>
<integer>3</integer>
</array>'
;;
esac
# Create Info.plist
cat > ${plist_path} << EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>llama</string>
<key>CFBundleIdentifier</key>
<string>org.ggml.llama</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>llama</string>
<key>CFBundlePackageType</key>
<string>FMWK</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>MinimumOSVersion</key>
<string>${min_os_version}</string>
<key>CFBundleSupportedPlatforms</key>
<array>
<string>${supported_platform}</string>
</array>${device_family}
<key>DTPlatformName</key>
<string>${platform_name}</string>
<key>DTSDKName</key>
<string>${sdk_name}</string>
</dict>
</plist>
EOF
}
# Create dynamic libraries from static libraries.
combine_static_libraries() {
local build_dir="$1"
local release_dir="$2"
local platform="$3" # "ios", "macos", "visionos", or "tvos"
local is_simulator="$4"
local base_dir="$(pwd)"
local framework_name="llama"
# Determine output path based on platform
local output_lib=""
if [[ "$platform" == "macos" ]]; then
# macOS uses versioned structure
output_lib="${build_dir}/framework/${framework_name}.framework/Versions/A/${framework_name}"
else
# iOS, visionOS, and tvOS use a directory flat structure
output_lib="${build_dir}/framework/${framework_name}.framework/${framework_name}"
fi
local libs=(
"${base_dir}/${build_dir}/src/${release_dir}/libllama.a"
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml.a"
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-base.a"
"${base_dir}/${build_dir}/ggml/src/${release_dir}/libggml-cpu.a"
"${base_dir}/${build_dir}/ggml/src/ggml-metal/${release_dir}/libggml-metal.a"
"${base_dir}/${build_dir}/ggml/src/ggml-blas/${release_dir}/libggml-blas.a"
)
# Create temporary directory for processing
local temp_dir="${base_dir}/${build_dir}/temp"
mkdir -p "${temp_dir}"
# Since we have multiple architectures libtool will find object files that do not
# match the target architecture. We suppress these warnings.
libtool -static -o "${temp_dir}/combined.a" "${libs[@]}" 2> /dev/null
# Determine SDK, architectures, and install_name based on platform and simulator flag.
local sdk=""
local archs=""
local min_version_flag=""
local install_name=""
case "$platform" in
"ios")
if [[ "$is_simulator" == "true" ]]; then
sdk="iphonesimulator"
archs="arm64 x86_64"
min_version_flag="-mios-simulator-version-min=${IOS_MIN_OS_VERSION}"
else
sdk="iphoneos"
archs="arm64"
min_version_flag="-mios-version-min=${IOS_MIN_OS_VERSION}"
fi
install_name="@rpath/llama.framework/llama"
;;
"macos")
sdk="macosx"
archs="arm64 x86_64"
min_version_flag="-mmacosx-version-min=${MACOS_MIN_OS_VERSION}"
install_name="@rpath/llama.framework/Versions/Current/llama"
;;
"visionos")
if [[ "$is_simulator" == "true" ]]; then
sdk="xrsimulator"
archs="arm64 x86_64"
min_version_flag="-mtargetos=xros${VISIONOS_MIN_OS_VERSION}-simulator"
else
sdk="xros"
archs="arm64"
min_version_flag="-mtargetos=xros${VISIONOS_MIN_OS_VERSION}"
fi
# Use flat structure for visionOS, same as iOS
install_name="@rpath/llama.framework/llama"
;;
"tvos")
if [[ "$is_simulator" == "true" ]]; then
sdk="appletvsimulator"
archs="arm64 x86_64"
min_version_flag="-mtvos-simulator-version-min=${TVOS_MIN_OS_VERSION}"
else
sdk="appletvos"
archs="arm64"
min_version_flag="-mtvos-version-min=${TVOS_MIN_OS_VERSION}"
fi
install_name="@rpath/llama.framework/llama"
;;
esac
# Build architecture flags
local arch_flags=""
for arch in $archs; do
arch_flags+=" -arch $arch"
done
# Create dynamic library
echo "Creating dynamic library for ${platform}."
xcrun -sdk $sdk clang++ -dynamiclib \
-isysroot $(xcrun --sdk $sdk --show-sdk-path) \
$arch_flags \
$min_version_flag \
-Wl,-force_load,"${temp_dir}/combined.a" \
-framework Foundation -framework Metal -framework Accelerate \
-install_name "$install_name" \
-o "${base_dir}/${output_lib}"
# Platform-specific post-processing for device builds
if [[ "$is_simulator" == "false" ]]; then
if command -v vtool &>/dev/null; then
case "$platform" in
"ios")
echo "Marking binary as a framework binary for iOS..."
vtool -set-build-version ios ${IOS_MIN_OS_VERSION} ${IOS_MIN_OS_VERSION} -replace \
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
;;
"visionos")
echo "Marking binary as a framework binary for visionOS..."
vtool -set-build-version xros ${VISIONOS_MIN_OS_VERSION} ${VISIONOS_MIN_OS_VERSION} -replace \
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
;;
"tvos")
echo "Marking binary as a framework binary for tvOS..."
vtool -set-build-version tvos ${TVOS_MIN_OS_VERSION} ${TVOS_MIN_OS_VERSION} -replace \
-output "${base_dir}/${output_lib}" "${base_dir}/${output_lib}"
;;
esac
else
echo "Warning: vtool not found. Binary may not pass App Store validation."
fi
fi
echo "Creating properly formatted dSYM..."
# Create a separate directory for dSYMs for all platforms
mkdir -p "${base_dir}/${build_dir}/dSYMs"
# iOS and visionOS style dSYM (flat structure)
if [[ "$platform" == "ios" || "$platform" == "visionos" || "$platform" == "tvos" ]]; then
# Generate dSYM in the dSYMs directory
xcrun dsymutil "${base_dir}/${output_lib}" -o "${base_dir}/${build_dir}/dSYMs/llama.dSYM"
# Create a copy of the binary that will be stripped
cp "${base_dir}/${output_lib}" "${temp_dir}/binary_to_strip"
# Strip debug symbols from the copy
xcrun strip -S "${temp_dir}/binary_to_strip" -o "${temp_dir}/stripped_lib"
# Replace the original with the stripped version
mv "${temp_dir}/stripped_lib" "${base_dir}/${output_lib}"
else
# macOS style dSYM
# First strip debug info to a separate file
xcrun strip -S "${base_dir}/${output_lib}" -o "${temp_dir}/stripped_lib"
# Generate dSYM in the dSYMs directory
xcrun dsymutil "${base_dir}/${output_lib}" -o "${base_dir}/${build_dir}/dSYMs/llama.dSYM"
# Replace original binary with stripped version
mv "${temp_dir}/stripped_lib" "${base_dir}/${output_lib}"
fi
# Remove any automatically generated dSYM files in the framework structure as they will
# otherwise case Invalid Bundle Structure validation errors.
if [ -d "${base_dir}/${output_lib}.dSYM" ]; then
echo "Removing generated dSYM file in framework structure: ${base_dir}/${output_lib}.dSYM"
rm -rf "${base_dir}/${output_lib}.dSYM"
fi
# Clean up
rm -rf "${temp_dir}"
}
echo "Building for iOS simulator..."
cmake -B build-ios-sim -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \
-DIOS=ON \
-DCMAKE_SYSTEM_NAME=iOS \
-DCMAKE_OSX_SYSROOT=iphonesimulator \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphonesimulator \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-ios-sim --config Release -- -quiet
echo "Building for iOS devices..."
cmake -B build-ios-device -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \
-DCMAKE_OSX_SYSROOT=iphoneos \
-DCMAKE_OSX_ARCHITECTURES="arm64" \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphoneos \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-ios-device --config Release -- -quiet
echo "Building for macOS..."
cmake -B build-macos -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${MACOS_MIN_OS_VERSION} \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-macos --config Release -- -quiet
echo "Building for visionOS..."
cmake -B build-visionos -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${VISIONOS_MIN_OS_VERSION} \
-DCMAKE_OSX_ARCHITECTURES="arm64" \
-DCMAKE_SYSTEM_NAME=visionOS \
-DCMAKE_OSX_SYSROOT=xros \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xros \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-visionos --config Release -- -quiet
echo "Building for visionOS simulator..."
cmake -B build-visionos-sim -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${VISIONOS_MIN_OS_VERSION} \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-DCMAKE_SYSTEM_NAME=visionOS \
-DCMAKE_OSX_SYSROOT=xrsimulator \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=xrsimulator \
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 -Du_int=unsigned\ int -Du_char=unsigned\ char -Du_short=unsigned\ short ${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-visionos-sim --config Release -- -quiet
# Add tvOS builds (might need the same u_int definitions as watchOS and visionOS)
echo "Building for tvOS simulator..."
cmake -B build-tvos-sim -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${TVOS_MIN_OS_VERSION} \
-DCMAKE_SYSTEM_NAME=tvOS \
-DCMAKE_OSX_SYSROOT=appletvsimulator \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" \
-DGGML_METAL=ON \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvsimulator \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-tvos-sim --config Release -- -quiet
echo "Building for tvOS devices..."
cmake -B build-tvos-device -G Xcode \
"${COMMON_CMAKE_ARGS[@]}" \
-DCMAKE_OSX_DEPLOYMENT_TARGET=${TVOS_MIN_OS_VERSION} \
-DCMAKE_SYSTEM_NAME=tvOS \
-DCMAKE_OSX_SYSROOT=appletvos \
-DCMAKE_OSX_ARCHITECTURES="arm64" \
-DGGML_METAL=ON \
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=appletvos \
-DCMAKE_C_FLAGS="${COMMON_C_FLAGS}" \
-DCMAKE_CXX_FLAGS="${COMMON_CXX_FLAGS}" \
-S .
cmake --build build-tvos-device --config Release -- -quiet
# Setup frameworks and copy binaries and headers
echo "Setting up framework structures..."
setup_framework_structure "build-ios-sim" ${IOS_MIN_OS_VERSION} "ios"
setup_framework_structure "build-ios-device" ${IOS_MIN_OS_VERSION} "ios"
setup_framework_structure "build-macos" ${MACOS_MIN_OS_VERSION} "macos"
setup_framework_structure "build-visionos" ${VISIONOS_MIN_OS_VERSION} "visionos"
setup_framework_structure "build-visionos-sim" ${VISIONOS_MIN_OS_VERSION} "visionos"
setup_framework_structure "build-tvos-sim" ${TVOS_MIN_OS_VERSION} "tvos"
setup_framework_structure "build-tvos-device" ${TVOS_MIN_OS_VERSION} "tvos"
# Create dynamic libraries from static libraries
echo "Creating dynamic libraries from static libraries..."
combine_static_libraries "build-ios-sim" "Release-iphonesimulator" "ios" "true"
combine_static_libraries "build-ios-device" "Release-iphoneos" "ios" "false"
combine_static_libraries "build-macos" "Release" "macos" "false"
combine_static_libraries "build-visionos" "Release-xros" "visionos" "false"
combine_static_libraries "build-visionos-sim" "Release-xrsimulator" "visionos" "true"
combine_static_libraries "build-tvos-sim" "Release-appletvsimulator" "tvos" "true"
combine_static_libraries "build-tvos-device" "Release-appletvos" "tvos" "false"
# Create XCFramework with correct debug symbols paths
echo "Creating XCFramework..."
xcodebuild -create-xcframework \
-framework $(pwd)/build-ios-sim/framework/llama.framework \
-debug-symbols $(pwd)/build-ios-sim/dSYMs/llama.dSYM \
-framework $(pwd)/build-ios-device/framework/llama.framework \
-debug-symbols $(pwd)/build-ios-device/dSYMs/llama.dSYM \
-framework $(pwd)/build-macos/framework/llama.framework \
-debug-symbols $(pwd)/build-macos/dSYMS/llama.dSYM \
-framework $(pwd)/build-visionos/framework/llama.framework \
-debug-symbols $(pwd)/build-visionos/dSYMs/llama.dSYM \
-framework $(pwd)/build-visionos-sim/framework/llama.framework \
-debug-symbols $(pwd)/build-visionos-sim/dSYMs/llama.dSYM \
-framework $(pwd)/build-tvos-device/framework/llama.framework \
-debug-symbols $(pwd)/build-tvos-device/dSYMs/llama.dSYM \
-framework $(pwd)/build-tvos-sim/framework/llama.framework \
-debug-symbols $(pwd)/build-tvos-sim/dSYMs/llama.dSYM \
-output $(pwd)/build-apple/llama.xcframework

View file

@ -950,6 +950,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.conversation_mode = COMMON_CONVERSATION_MODE_DISABLED;
}
).set_examples({LLAMA_EXAMPLE_MAIN}));
add_opt(common_arg(
{"-st", "--single-turn"},
"run conversation for a single turn only, then exit when done\n"
"will not be interactive if first turn is predefined with --prompt\n"
"(default: false)",
[](common_params & params) {
params.single_turn = true;
}
).set_examples({LLAMA_EXAMPLE_MAIN}));
add_opt(common_arg(
{"-i", "--interactive"},
string_format("run in interactive mode (default: %s)", params.interactive ? "true" : "false"),

View file

@ -449,12 +449,6 @@ std::string common_chat_format_name(common_chat_format format) {
}
}
const common_grammar_options grammar_options {
/* .dotall = */ false,
/* .compact_spaces = */ false,
// /* .compact_spaces = */ true,
};
static bool parse_json(std::string::const_iterator & it, const std::string::const_iterator & end, json & out) {
// // https://json.nlohmann.me/features/parsing/sax_interface/
struct json_error_locator : public nlohmann::json_sax<json> {
@ -500,6 +494,34 @@ static bool parse_json(std::string::const_iterator & it, const std::string::cons
}
}
static bool parse_literal(std::string::const_iterator & it, const std::string::const_iterator & end, const std::string & expected) {
auto expected_it = expected.begin();
auto tmp_it = it;
while (tmp_it != end && expected_it != expected.end() && *tmp_it == *expected_it) {
++tmp_it;
++expected_it;
}
if (expected_it == expected.end()) {
it = tmp_it;
return true;
}
return false;
}
static std::optional<std::smatch> parse_pattern(std::string::const_iterator & it, const std::string::const_iterator & end, const std::regex & expected) {
std::smatch match;
if (std::regex_match(it, end, match, expected)) {
it = match.suffix().first;
return match;
}
return std::nullopt;
}
static void consume_spaces(std::string::const_iterator & it, const std::string::const_iterator & end) {
while (it != end && std::isspace(*it)) {
++it;
}
}
/**
* Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
@ -509,7 +531,8 @@ static common_chat_msg parse_json_tool_calls(
const std::string& input,
const std::optional<std::regex> & trigger_opt,
const std::regex & function_regex,
const std::regex & close_regex) {
const std::regex & close_regex,
bool allow_raw_python = false) {
std::smatch match;
common_chat_msg result;
@ -540,14 +563,19 @@ static common_chat_msg parse_json_tool_calls(
it = rit->suffix().first;
json arguments;
if (!parse_json(it, end, arguments)) {
throw std::runtime_error("Failed to parse json tool call arguments: " + input);
}
if (parse_json(it, end, arguments)) {
if (!std::regex_search(it, end, match, close_regex)) {
throw std::runtime_error("Malformed input, missing closing pattern: " + input);
}
it = match.suffix().first;
result.tool_calls.push_back({name, arguments.is_string() ? arguments.get<std::string>() : arguments.dump(), /* id= */ ""});
} else {
if (allow_raw_python && name == "python") {
result.tool_calls.push_back({name, json({{"code", std::string(it, end)}}).dump(), /* id= */ ""});
break;
}
throw std::runtime_error("Failed to parse json tool call arguments: " + input);
}
}
if (!result.tool_calls.empty()) {
@ -559,29 +587,29 @@ static common_chat_msg parse_json_tool_calls(
return result;
}
static common_chat_tool_call process_tool_call(const json & tool_call) {
const auto & arguments = tool_call.at("arguments");
return {
/* .name = */ tool_call.at("name"),
/* .arguments = */ arguments.is_string() ? arguments.get<std::string>() : arguments.dump(),
/* .id = */ tool_call.contains("id") ? tool_call.at("id") : "",
};
}
static common_chat_msg parse_prefixed_json_tool_call_array(const std::string& input, const std::string & prefix, size_t rstrip_prefix = 0) {
auto content_end = input.find(prefix);
size_t tc_start = std::string::npos;
common_chat_msg result;
result.role = "assistant";
const auto process_tool_calls = [&](const json & tool_calls) {
for (const auto & tool_call : tool_calls) {
const auto & arguments = tool_call.at("arguments");
result.tool_calls.push_back({
tool_call.at("name"),
arguments.is_string() ? arguments.get<std::string>() : arguments.dump(),
tool_call.contains("id") ? tool_call.at("id") : "",
});
}
};
if (content_end == std::string::npos) {
result.content = input;
} else {
tc_start = content_end + prefix.size() - rstrip_prefix;
result.content = input.substr(0, content_end);
auto tool_calls = json::parse(input.substr(tc_start));
process_tool_calls(tool_calls);
for (const auto & tool_call : tool_calls) {
result.tool_calls.emplace_back(process_tool_call(tool_call));
}
}
return result;
}
@ -700,7 +728,7 @@ static common_chat_params common_chat_params_init_generic(const common_chat_temp
data.grammar_lazy = false;
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
builder.add_schema("root", schema);
}, grammar_options);
});
auto tweaked_messages = common_chat_template::add_system(
inputs.messages,
@ -770,8 +798,11 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
schema["maxItems"] = 1;
}
builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
}, grammar_options);
data.grammar_triggers.push_back({"[TOOL_CALLS]", /* .at_start = */ true});
});
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
data.preserved_tokens = {
"[TOOL_CALLS]",
};
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
return data;
@ -813,14 +844,18 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
schema["maxItems"] = 1;
}
builder.add_rule("root", "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
}, grammar_options);
data.grammar_triggers.push_back({"<|START_ACTION|>", /* .at_start = */ false});
});
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
"<|START_ACTION|>",
});
data.preserved_tokens = {
"<|START_ACTION|>",
"<|END_ACTION|>",
"<|START_RESPONSE|>",
"<|END_RESPONSE|>",
"<|START_THINKING|>",
"<|END_THINKING|>",
"<|END_ACTION|>",
};
auto adjusted_messages = json::array();
for (const auto & msg : inputs.messages) {
@ -840,9 +875,9 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
return data;
}
static common_chat_msg common_chat_parse_command_r7b(const std::string & input, bool extract_reasoning) {
static std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|>)([\\s\\S\\n\\r]*)");
static std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>");
static std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S\\n\\r]*?)<\\|END_RESPONSE\\|>");
static std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S]*?)<\\|END_THINKING\\|>)([\\s\\S]*)");
static std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S]*?)<\\|END_ACTION\\|>");
static std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S]*?)<\\|END_RESPONSE\\|>");
std::smatch match;
@ -945,23 +980,23 @@ static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const com
builder.add_rule(
name + "-call",
"\"{\" space "
"( \"\\\"type\\\":\" space \"\\\"function\\\",\" space )? "
"\"\\\"name\\\": \\\"" + name + "\\\", \\\"parameters\\\": \" " +
builder.add_schema(name + "-args", parameters) +
" \"}\""));
data.grammar_triggers.push_back({"{\"name\": \"" + name + "\"", /* .at_start = */ true});
"( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? "
" \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
" \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
"\"}\" space"));
});
// Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
"\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"", // + name + "\"[\\s\\S]*",
});
data.grammar_triggers.push_back({"{\"name\":", /* .at_start = */ true});
data.grammar_triggers.push_back({"{\n \"name\":", /* .at_start = */ true});
data.grammar_triggers.push_back({"{\n \"name\":", /* .at_start = */ true});
data.grammar_triggers.push_back({"{\"type\": \"function\"", /* .at_start = */ true});
data.grammar_triggers.push_back({"{\n \"type\": \"function\"", /* .at_start = */ true});
data.grammar_triggers.push_back({"{\n \"type\": \"function\"", /* .at_start = */ true});
if (!builtin_tools.empty()) {
data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false});
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
data.preserved_tokens.push_back("<|python_tag|>");
}
// Allow a few empty lines on top of the usual constrained json schema space rule.
builder.add_rule("root", string_join(tool_rules, " | "));
}, grammar_options);
});
data.additional_stops.push_back("<|eom_id|>");
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {
{"tools_in_user_message", false},
@ -974,25 +1009,22 @@ static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const com
}
static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) {
// TODO: tighten & simplify the parser, don't accept leading text context.
static std::regex function_regex("\\{[\\s\\n\\r]*(?:\"type\"[\\s\\n\\r]*:[\\s\\n\\r]*\"function\"[\\s\\n\\r]*,[\\s\\n\\r]*|[\\s\\n\\r]*)\"name\"[\\s\\n\\r]*:[\\s\\n\\r]*\"([^\"]+)\"[\\s\\n\\r]*,[\\s\\n\\r]*\"parameters\": ");
static std::regex close_regex("\\}");
static std::regex builtin_call_regex("<\\|python_tag\\|>([^.(]+)\\.call\\((.*)\\)");
static std::regex function_regex(
"\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
static std::regex close_regex("\\}\\s*");
static std::regex builtin_call_regex("<\\|python_tag\\|>\\s*([^.(]+)\\s*\\.\\s*call\\s*\\(\\s*([\\w]+)\\s*=\\s*([\\s\\S]*?)\\)");
if (with_builtin_tools) {
std::smatch match;
if (std::regex_match(input, match, builtin_call_regex)) {
try {
auto name = match[1].str();
auto raw_args = match[2].str();
// TODO: if/when builtin tools start accepting more than 1 argument, use parse_json for real parsing.
auto it_eq = raw_args.find('=');
auto arg_name = raw_args.substr(0, it_eq);
auto arg_value_str = raw_args.substr(it_eq + 1);
auto arg_name = match[2].str();
auto arg_value_str = match[3].str();
auto arg_value = json::parse(arg_value_str);
common_chat_msg msg;
msg.role = "assistant";
msg.content = match.prefix().str();
msg.tool_calls.push_back({
/* .name = */ name,
/* .arguments = */ (json {
@ -1001,6 +1033,9 @@ static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bo
/* .id = */ "",
});
return msg;
} catch (const std::exception & e) {
LOG_WRN("Failed to parse builtin tool call arguments (%s): %s", e.what(), input.c_str());
}
}
}
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
@ -1017,10 +1052,10 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
std::string name = function.at("name");
auto parameters = function.at("parameters");
builder.resolve_refs(parameters);
auto args_rule = builder.add_schema(name + "-args", parameters);
tool_rules.push_back(builder.add_rule(name + "-call",
"\"<tool▁call▁begin>function<tool▁sep>" + name + "\\n"
"```json\\n\" " + args_rule + " \"```<tool▁call▁end>\""));
"```json\\n\" " + builder.add_schema(name + "-args", parameters) + " "
"\"```<tool▁call▁end>\""));
});
// Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
// so we accept common variants (then it's all constrained)
@ -1029,18 +1064,20 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
"(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
"\"<tool▁calls▁end>\""
" space");
data.grammar_triggers.push_back({"<tool▁calls▁begin>", /* .at_start = */ false});
data.grammar_triggers.push_back({"<tool_calls_begin>", /* .at_start = */ false});
data.grammar_triggers.push_back({"<tool calls begin>", /* .at_start = */ false});
data.grammar_triggers.push_back({"<tool\\_calls\\_begin>", /* .at_start = */ false});
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool▁calls▁begin>"});
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_calls_begin>"});
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool calls begin>"});
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool\\_calls\\_begin>"});
data.preserved_tokens = {
"<think>",
"</think>",
"<tool▁calls▁begin>",
"<tool▁call▁begin>",
"<tool▁sep>",
"<tool▁calls▁end",
"<tool▁call▁end>",
"<tool▁calls▁end",
};
}, grammar_options);
});
}
auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
@ -1129,8 +1166,11 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
schema["maxItems"] = 1;
}
builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema));
}, grammar_options);
data.grammar_triggers.push_back({" functools[", /* .at_start = */ false});
});
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, " functools["});
data.preserved_tokens = {
" functools[",
};
data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2;
} else {
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
@ -1158,11 +1198,28 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
auto parameters = function.at("parameters");
builder.resolve_refs(parameters);
auto args_rule = builder.add_schema(name + "-args", parameters);
first_tool_rules.push_back(builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule));
first_tool_rules.push_back(builder.add_rule(name + "-call", "( \"assistant<|end_header_id|>\\n\" )? \"" + name + "\\n\" " + args_rule));
subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule));
data.grammar_triggers.push_back({name, /* .at_start = */ true});
data.grammar_triggers.push_back({">>>" + name, /* .at_start = */ false});
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
regex_escape(name + "\n"),
});
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
regex_escape("assistant<|end_header_id|>\n" + name + "\n"),
});
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
regex_escape(">>>" + name + "\n"),
});
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
">>>assistant<|end_header_id|>\n" + name,
});
});
data.preserved_tokens = {
"<|end_header_id|>",
};
auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space";
if (inputs.parallel_tool_calls) {
auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space";
@ -1171,34 +1228,20 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
builder.add_rule("root", first_rule);
}
}, grammar_options);
});
}
return data;
}
static bool consume(std::string::const_iterator & it, const std::string::const_iterator & end, const std::string & expected) {
auto expected_it = expected.begin();
auto tmp_it = it;
while (tmp_it != end && expected_it != expected.end() && *tmp_it == *expected_it) {
++tmp_it;
++expected_it;
}
if (expected_it == expected.end()) {
it = tmp_it;
return true;
}
return false;
}
static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & input) {
static std::regex function_regex(R"((?:>>>)?(\w+)\n)");
static std::regex function_regex(R"((?:>>>)?(?:assistant<|end_header_id|>\n)?(\w+)\n)");
static std::regex close_regex(R"($|(?=>>>))");
std::string content;
auto it = input.begin();
const auto end = input.end();
if (consume(it, end, "all\n")) {
if (parse_literal(it, end, "all\n")) {
std::smatch match;
if (std::regex_search(it, end, match, function_regex)) {
auto fun_it = match.prefix().second;
@ -1213,7 +1256,7 @@ static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & in
}
// TODO: tighten & simplify.
try {
auto res = parse_json_tool_calls(std::string(it, end), std::nullopt, function_regex, close_regex);
auto res = parse_json_tool_calls(std::string(it, end), std::nullopt, function_regex, close_regex, /* allow_raw_python= */ true);
res.content = content + res.content;
return res;
} catch (const std::exception & e) {
@ -1266,12 +1309,13 @@ static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(con
});
if (has_raw_python) {
tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false});
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
data.preserved_tokens.push_back("<|python_tag|>");
}
auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
data.grammar_triggers.push_back({"<function=", /* .at_start = */ false});
}, grammar_options);
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
});
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
// TODO: if (has_raw_python)
@ -1306,6 +1350,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
std::vector<std::string> tool_rules;
std::vector<std::string> tool_call_alts;
foreach_function(inputs.tools, [&](const json & tool) {
const auto & function = tool.at("function");
std::string name = function.at("name");
@ -1319,57 +1364,173 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
}},
{"required", json::array({"name", "arguments"})},
}));
tool_call_alts.push_back(builder.add_rule(
name + "-function-tag",
"\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " +
builder.add_schema(name + "-args", parameters) + " "
"\"</function>\" space"));
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
"<function=" + name + ">",
});
auto tool_call = "\"<tool_call>\" space " + builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " \"</tool_call>\" space";
auto escaped_name = regex_escape(name);
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
"<function\\s+name\\s*=\\s*\"" + escaped_name + "\"",
});
});
auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
std::vector<std::string> alt_tags {
any_tool_call,
"\"<tool_call>\" space " + any_tool_call + " \"</tool_call>\"",
// The rest is just to accommodate common "good bad" outputs.
"\"<function_call>\" space " + any_tool_call + " \"</function_call>\"",
"\"<response>\" space " + any_tool_call + " \"</response>\"",
"\"<tools>\" space " + any_tool_call + " \"</tools>\"",
"\"<json>\" space " + any_tool_call + " \"</json>\"",
"\"<xml>\" space " + any_tool_call + " \"</xml>\"",
"\"<JSON>\" space " + any_tool_call + " \"</JSON>\"",
};
auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space");
tool_call_alts.push_back(wrappable_tool_call);
tool_call_alts.push_back(
"( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
data.grammar_triggers.push_back({"<tool_call>", /* .at_start = */ false});
data.preserved_tokens = { "</tool_call>" };
}, grammar_options);
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>"});
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function"});
// Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
data.grammar_triggers.push_back({
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
"(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?\\s*\\{\\s*\"", //name\"\\s*:\\s*\"" + escaped_name + "\"",
});
data.preserved_tokens = {
"<tool_call>",
"</tool_call>",
"<function",
"<tools>",
"</tools>",
"<response>",
"</response>",
"<function_call>",
"</function_call>",
"<json>",
"</json>",
"<JSON>",
"</JSON>",
"```",
"```json",
"```xml",
};
});
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
return data;
}
static common_chat_msg common_chat_parse_hermes_2_pro(const std::string& input) {
const static std::regex open_regex(
"(?:"
"(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
"(<tool_call>" // match 2 (open_tag)
"|<function_call>"
"|<tool>"
"|<tools>"
"|<response>"
"|<json>"
"|<xml>"
"|<JSON>"
")?"
"(\\s*\\{\\s*\"name\"\\s*:[\\s\\S]*)" // match 3 (named tool call + rest)
")"
"|"
"(?:<function=([^>]+)>" // match 4 (function name)
"|<function name=\"([^\"]+)\">)" // match 5 (function name again)
"([\\s\\S]*)" // match 6 (function arguments + rest)})"
);
try {
std::regex start_pattern(R"([\n\s]*<tool_call>)");
std::regex middle_pattern(R"([\n\s]*</tool_call>[\n\s]*<tool_call>)");
std::regex end_pattern(R"([\n\s]*</tool_call>[\n\s]*$)");
common_chat_msg msg;
msg.role = "assistant";
auto end = input.end();
std::sregex_iterator rend;
std::sregex_iterator rit(input.begin(), end, start_pattern);
if (rit == rend) {
msg.content = input;
return msg;
}
std::string::const_iterator it = input.begin();
const std::string::const_iterator end = input.end();
std::smatch match;
msg.content = rit->prefix();
auto it = rit->suffix().first;
while (it != end) {
json call;
if (!parse_json(it, end, call)) {
throw std::runtime_error("Failed to parse json tool call");
if (std::regex_search(it, end, match, open_regex)) {
// Add content before the match
msg.content += std::string(it, match[0].first);
auto block_start = match[1].str();
std::string block_end = block_start.empty() ? "" : "```";
auto open_tag = match[2].str();
std::string close_tag;
if (match[3].matched) {
close_tag = open_tag.empty() ? "" : "</" + open_tag.substr(1);
auto json_it = match[3].first;
json tool_call;
if (parse_json(json_it, end, tool_call) && tool_call.contains("name") && tool_call.contains("arguments")) {
msg.tool_calls.emplace_back(process_tool_call(tool_call));
it = json_it; // Move iterator past parsed JSON
// Handle close tags
consume_spaces(it, end);
if (!close_tag.empty() && !parse_literal(it, end, close_tag)) {
throw std::runtime_error("Failed to parse closing tag");
}
const auto & arguments = call.at("arguments");
msg.tool_calls.push_back({
call.at("name"),
arguments.dump(),
// arguments.is_string() ? arguments.get<std::string>() : arguments.dump(),
/* id= */ "",
});
rit = {it, end, middle_pattern};
if (rit != rend) {
it = rit->suffix().first;
consume_spaces(it, end);
if (!block_end.empty() && !parse_literal(it, end, block_end)) {
throw std::runtime_error("Failed to parse block end");
}
consume_spaces(it, end);
} else {
rit = {it, end, end_pattern};
if (rit == rend) {
throw std::runtime_error("Malformed input, missing </tool_call>");
// Not a valid tool call, treat as content
msg.content += std::string(match[0].first, match[0].second);
it = match[0].second;
}
} else {
auto function_name = match[4].str();
if (function_name.empty()) {
function_name = match[5].str();
}
GGML_ASSERT(!function_name.empty());
close_tag = "</function>";
// Start parsing from after the opening tags
auto json_it = match[6].first;
json arguments;
if (parse_json(json_it, end, arguments)) {
msg.tool_calls.emplace_back(process_tool_call({
{"name", function_name},
{"arguments", arguments},
}));
it = json_it; // Move iterator past parsed JSON
// Handle close tags
consume_spaces(it, end);
if (!close_tag.empty() && !parse_literal(it, end, close_tag)) {
throw std::runtime_error("Failed to parse closing tag");
}
consume_spaces(it, end);
if (!block_end.empty() && !parse_literal(it, end, block_end)) {
throw std::runtime_error("Failed to parse block end");
}
consume_spaces(it, end);
} else {
// Not a valid tool call, treat as content
msg.content += std::string(match[0].first, match[0].second);
it = match[0].second;
}
}
} else {
// Add remaining content
msg.content += std::string(it, end);
break;
}
}

View file

@ -486,6 +486,11 @@ void string_replace_all(std::string & s, const std::string & search, const std::
s = std::move(builder);
}
std::string regex_escape(const std::string & s) {
static const std::regex special_chars("[.^$|()*+?\\[\\]{}\\\\]");
return std::regex_replace(s, special_chars, "\\$0");
}
std::string string_join(const std::vector<std::string> & values, const std::string & separator) {
std::ostringstream result;
for (size_t i = 0; i < values.size(); ++i) {
@ -2029,3 +2034,25 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
return result;
}
template <>
json common_grammar_trigger::to_json() const {
json out {
{"type", (int) type},
{"value", value},
};
if (type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
out["token"] = (int) token;
}
return out;
}
template <>
common_grammar_trigger common_grammar_trigger::from_json(const json & in) {
common_grammar_trigger out;
out.type = (common_grammar_trigger_type) in.at("type").get<int>();
out.value = in.at("value").get<std::string>();
if (out.type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
out.token = (llama_token) in.at("token").get<int>();
}
return out;
}

View file

@ -106,9 +106,21 @@ enum common_conversation_mode {
COMMON_CONVERSATION_MODE_AUTO = 2,
};
enum common_grammar_trigger_type {
COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN,
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
};
struct common_grammar_trigger {
std::string word;
bool at_start;
common_grammar_trigger_type type;
std::string value;
llama_token token = LLAMA_TOKEN_NULL;
// T can only be nlohmann::ordered_json
template <class T> T to_json() const;
template <class T> static common_grammar_trigger from_json(const T & in);
};
// sampling parameters
@ -159,8 +171,7 @@ struct common_params_sampling {
std::string grammar; // optional BNF-like grammar to constrain sampling
bool grammar_lazy = false;
std::vector<common_grammar_trigger> grammar_trigger_words; // optional trigger words to trigger lazy grammar
std::vector<llama_token> grammar_trigger_tokens; // optional trigger tokens to trigger lazy grammar and print trigger special tokens.
std::vector<common_grammar_trigger> grammar_triggers; // optional triggers (for lazy grammars)
std::set<llama_token> preserved_tokens;
std::vector<llama_logit_bias> logit_bias; // logit biases to apply
@ -324,6 +335,8 @@ struct common_params {
bool warmup = true; // warmup run
bool check_tensors = false; // validate tensor data
bool single_turn = false; // single turn chat conversation
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
@ -452,6 +465,8 @@ std::string string_repeat(const std::string & str, size_t n);
void string_replace_all(std::string & s, const std::string & search, const std::string & replace);
std::string regex_escape(const std::string & s);
template<class T>
static std::vector<T> string_split(const std::string & str, char delim) {
static_assert(!std::is_same<T, std::string>::value, "Please use the specialized version for std::string");

View file

@ -264,7 +264,7 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
throw std::runtime_error("At least one of min_value or max_value must be set");
}
const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}";
const std::string SPACE_RULE = "| \" \" | \"\\n\"{1,2} [ \\t]{0,20}";
struct BuiltinRule {
std::string content;
@ -764,11 +764,10 @@ private:
public:
SchemaConverter(
const std::function<json(const std::string &)> & fetch_json,
bool dotall,
bool compact_spaces)
bool dotall)
: _fetch_json(fetch_json), _dotall(dotall)
{
_rules["space"] = compact_spaces ? "\" \"?" : SPACE_RULE;
_rules["space"] = SPACE_RULE;
}
void resolve_refs(json & schema, const std::string & url) {
@ -1007,7 +1006,7 @@ std::string json_schema_to_grammar(const json & schema, bool force_gbnf) {
}
std::string build_grammar(const std::function<void(const common_grammar_builder &)> & cb, const common_grammar_options & options) {
SchemaConverter converter([&](const std::string &) { return json(); }, options.dotall, options.compact_spaces);
SchemaConverter converter([&](const std::string &) { return json(); }, options.dotall);
common_grammar_builder builder {
/* .add_rule = */ [&](const std::string & name, const std::string & rule) {
return converter._add_rule(name, rule);

View file

@ -16,7 +16,6 @@ struct common_grammar_builder {
struct common_grammar_options {
bool dotall = false;
bool compact_spaces = false;
};
std::string build_grammar(const std::function<void(const common_grammar_builder &)> & cb, const common_grammar_options & options = {});

View file

@ -7,6 +7,7 @@
#include <cstdio>
#include <fstream>
#include <thread>
#include <algorithm>
void common_ngram_cache_update(common_ngram_cache & ngram_cache, int ngram_min, int ngram_max,
std::vector<llama_token> & inp, int nnew, bool print_progress) {

View file

@ -4,6 +4,7 @@
#include <cmath>
#include <unordered_map>
#include <algorithm>
// the ring buffer works similarly to std::deque, but with a fixed capacity
// TODO: deduplicate with llama-impl.h
@ -159,16 +160,53 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
#endif // LLAMA_USE_LLGUIDANCE
} else {
std::vector<const char *> trigger_words;
trigger_words.reserve(params.grammar_trigger_words.size());
for (const auto & str : params.grammar_trigger_words) {
trigger_words.push_back(str.word.c_str());
std::vector<std::string> patterns_at_start;
std::vector<std::string> patterns_anywhere;
std::vector<llama_token> trigger_tokens;
for (const auto & trigger : params.grammar_triggers) {
switch (trigger.type) {
case COMMON_GRAMMAR_TRIGGER_TYPE_WORD:
{
const auto & word = trigger.value;
patterns_anywhere.push_back(regex_escape(word));
break;
}
case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START:
{
const auto & pattern = trigger.value;
(trigger.type == COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START ? patterns_at_start : patterns_anywhere).push_back(pattern);
break;
}
case COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN:
{
const auto token = trigger.token;
trigger_tokens.push_back(token);
break;
}
default:
GGML_ASSERT(false && "unknown trigger type");
}
}
std::vector<std::string> trigger_patterns;
if (!patterns_at_start.empty()) {
trigger_patterns.push_back("^(" + string_join(patterns_at_start, "|") + ")[\\s\\S]*");
}
if (!patterns_anywhere.empty()) {
trigger_patterns.push_back("^[\\s\\S]*?(" + string_join(patterns_anywhere, "|") + ")[\\s\\S]*");
}
std::vector<const char *> trigger_patterns_c;
trigger_patterns_c.reserve(trigger_patterns.size());
for (const auto & regex : trigger_patterns) {
trigger_patterns_c.push_back(regex.c_str());
}
grmr = params.grammar_lazy
? llama_sampler_init_grammar_lazy(vocab, params.grammar.c_str(), "root",
trigger_words.data(), trigger_words.size(),
params.grammar_trigger_tokens.data(), params.grammar_trigger_tokens.size())
? llama_sampler_init_grammar_lazy_patterns(vocab, params.grammar.c_str(), "root",
trigger_patterns_c.data(), trigger_patterns_c.size(),
trigger_tokens.data(), trigger_tokens.size())
: llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root");
}

View file

@ -5,6 +5,7 @@
#include "sampling.h"
#include <cstring>
#include <algorithm>
#define SPEC_VOCAB_MAX_SIZE_DIFFERENCE 128
#define SPEC_VOCAB_CHECK_START_TOKEN_ID 5

View file

@ -287,10 +287,9 @@ Here are some models known to work (w/ chat template override when needed):
llama-server --jinja -fa -hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M
llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L
llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M
# Native support for DeepSeek R1 works best w/ our own template (official template buggy)
# Native support for DeepSeek R1 works best w/ our template override (official template is buggy, although we do work around it)
llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L \
--chat-template-file models/templates/llama-cpp-deepseek-r1.jinja
@ -300,17 +299,20 @@ llama-server --jinja -fa -hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M
# Native support requires the right template for these GGUFs:
llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
--chat-template-file models/templates/meetkai-functionary-medium-v3.2.jinja
llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \
--chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B tool_use )
--chat-template-file models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja
llama-server --jinja -fa -hf bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M \
--chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use )
--chat-template-file models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja
llama-server --jinja -fa -hf bartowski/firefunction-v2-GGUF -hff firefunction-v2-IQ1_M.gguf \
--chat-template-file <( python scripts/get_chat_template.py fireworks-ai/llama-3-firefunction-v2 tool_use )
--chat-template-file models/templates/fireworks-ai-llama-3-firefunction-v2.jinja
llama-server --jinja -fa -hf bartowski/c4ai-command-r7b-12-2024-GGUF:Q6_K_L \
--chat-template-file <( python scripts/get_chat_template.py CohereForAI/c4ai-command-r7b-12-2024 tool_use )
--chat-template-file models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
# Generic format support
llama-server --jinja -fa -hf bartowski/phi-4-GGUF:Q4_0
@ -318,6 +320,8 @@ llama-server --jinja -fa -hf bartowski/gemma-2-2b-it-GGUF:Q8_0
llama-server --jinja -fa -hf bartowski/c4ai-command-r-v01-GGUF:Q2_K
```
To get the official template from original HuggingFace repos, you can use [scripts/get_chat_template.py](../scripts/get_chat_template.py) (see examples invocations in [models/templates/README.md](../models/templates/README.md))
> [!TIP]
> If there is no official `tool_use` Jinja template, you may want to set `--chat-template chatml` to use a default that works with many models (YMMV!), or write your own (e.g. we provide a custom [llama-cpp-deepseek-r1.jinja](../models/templates/llama-cpp-deepseek-r1.jinja) for DeepSeek R1 distills)

View file

@ -89,6 +89,7 @@ def bytes_to_unicode():
ap = argparse.ArgumentParser()
ap.add_argument("-m", "--model-dir", help="Path to model directory cloned from HF Hub", required=True)
ap.add_argument("--use-f32", action="store_true", default=False, help="Use f32 instead of f16")
ap.add_argument('--bigendian', action="store_true", default=False, help="Model is executed on big-endian machine")
ap.add_argument("--text-only", action="store_true", required=False,
help="Save a text-only model. It can't be used to encode images")
ap.add_argument("--vision-only", action="store_true", required=False,
@ -191,7 +192,7 @@ output_dir = args.output_dir if args.output_dir is not None else dir_model
os.makedirs(output_dir, exist_ok=True)
output_prefix = os.path.basename(output_dir).replace("ggml_", "")
fname_out = os.path.join(output_dir, f"{fname_middle}model-{ftype_str[ftype]}.gguf")
fout = GGUFWriter(path=fname_out, arch="clip")
fout = GGUFWriter(path=fname_out, arch="clip", endianess=GGUFEndian.LITTLE if not args.bigendian else GGUFEndian.BIG)
fout.add_bool("clip.has_text_encoder", has_text_encoder)
fout.add_bool("clip.has_vision_encoder", has_vision_encoder)

View file

@ -46,8 +46,8 @@ static void print_usage(int argc, char ** argv) {
(void) argc;
LOG("\nexample usage:\n");
LOG("\n text generation: %s -m your_model.gguf -p \"I believe the meaning of life is\" -n 128\n", argv[0]);
LOG("\n chat (conversation): %s -m your_model.gguf -p \"You are a helpful assistant\" -cnv\n", argv[0]);
LOG("\n text generation: %s -m your_model.gguf -p \"I believe the meaning of life is\" -n 128 -no-cnv\n", argv[0]);
LOG("\n chat (conversation): %s -m your_model.gguf -sys \"You are a helpful assistant\"\n", argv[0]);
LOG("\n");
}
@ -218,8 +218,8 @@ int main(int argc, char ** argv) {
// print chat template example in conversation mode
if (params.conversation_mode) {
if (params.enable_chat_template) {
if (!params.prompt.empty()) {
LOG_WRN("*** User-specified prompt in conversation mode will be ignored, did you mean to set --system-prompt (-sys) instead?\n");
if (!params.prompt.empty() && params.system_prompt.empty()) {
LOG_WRN("*** User-specified prompt will pre-start conversation, did you mean to set --system-prompt (-sys) instead?\n");
}
LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(chat_templates.get(), params.use_jinja).c_str());
@ -266,7 +266,7 @@ int main(int argc, char ** argv) {
std::vector<llama_token> embd_inp;
bool waiting_for_first_input = params.conversation_mode && params.enable_chat_template && params.system_prompt.empty();
bool waiting_for_first_input = false;
auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) {
common_chat_msg new_msg;
new_msg.role = role;
@ -277,22 +277,34 @@ int main(int argc, char ** argv) {
return formatted;
};
{
std::string prompt;
{
if (params.conversation_mode && params.enable_chat_template) {
// format the system prompt in conversation mode (will use template default if empty)
prompt = params.system_prompt;
if (!params.system_prompt.empty()) {
// format the system prompt (will use template default if empty)
chat_add_and_format("system", params.system_prompt);
}
if (!prompt.empty()) {
prompt = chat_add_and_format("system", prompt);
if (!params.prompt.empty()) {
// format and append the user prompt
chat_add_and_format("user", params.prompt);
} else {
waiting_for_first_input = true;
}
if (!params.system_prompt.empty() || !params.prompt.empty()) {
common_chat_templates_inputs inputs;
inputs.messages = chat_msgs;
inputs.add_generation_prompt = !params.prompt.empty();
prompt = common_chat_templates_apply(chat_templates.get(), inputs).prompt;
}
} else {
// otherwise use the prompt as is
prompt = params.prompt;
}
if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
if (params.interactive_first || !prompt.empty() || session_tokens.empty()) {
LOG_DBG("tokenize the prompt\n");
embd_inp = common_tokenize(ctx, prompt, true, true);
} else {
@ -305,7 +317,7 @@ int main(int argc, char ** argv) {
}
// Should not run without any tokens
if (!params.conversation_mode && embd_inp.empty()) {
if (!waiting_for_first_input && embd_inp.empty()) {
if (add_bos) {
embd_inp.push_back(llama_vocab_bos(vocab));
LOG_WRN("embd_inp was considered empty and bos was added: %s\n", string_from(ctx, embd_inp).c_str());
@ -365,8 +377,13 @@ int main(int argc, char ** argv) {
}
if (params.conversation_mode) {
if (params.single_turn && !params.prompt.empty()) {
params.interactive = false;
params.interactive_first = false;
} else {
params.interactive_first = true;
}
}
// enable interactive mode if interactive start is specified
if (params.interactive_first) {
@ -809,6 +826,11 @@ int main(int argc, char ** argv) {
if (params.conversation_mode && !waiting_for_first_input) {
const auto id = common_sampler_last(smpl);
assistant_ss << common_token_to_piece(ctx, id, false);
if (!prompt.empty()) {
prompt.clear();
is_interacting = false;
}
}
if ((n_past > 0 || waiting_for_first_input) && is_interacting) {
@ -906,6 +928,11 @@ int main(int argc, char ** argv) {
common_sampler_reset(smpl);
}
is_interacting = false;
if (waiting_for_first_input && params.single_turn) {
params.interactive = false;
params.interactive_first = false;
}
waiting_for_first_input = false;
}
}

View file

@ -9,6 +9,7 @@
#include <unordered_map>
#include <fstream>
#include <cmath>
#include <cctype>
struct quant_option {
std::string name;

View file

@ -1,5 +1,5 @@
// WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first.
const SPACE_RULE = '| " " | "\\n" [ \\t]{0,20}';
const SPACE_RULE = '| " " | "\\n"{1,2} [ \\t]{0,20}';
function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
if (minItems === 0 && maxItems === 1) {

View file

@ -131,9 +131,9 @@ struct slot_params {
lora.push_back({{"id", i}, {"scale", this->lora[i].scale}});
}
std::vector<std::string> grammar_trigger_words;
for (const auto & trigger : sampling.grammar_trigger_words) {
grammar_trigger_words.push_back(trigger.word);
auto grammar_triggers = json::array();
for (const auto & trigger : sampling.grammar_triggers) {
grammar_triggers.push_back(trigger.to_json<json>());
}
return json {
@ -170,8 +170,8 @@ struct slot_params {
{"n_probs", sampling.n_probs},
{"min_keep", sampling.min_keep},
{"grammar", sampling.grammar},
{"grammar_trigger_words", grammar_trigger_words},
{"grammar_trigger_tokens", sampling.grammar_trigger_tokens},
{"grammar_lazy", sampling.grammar_lazy},
{"grammar_triggers", grammar_triggers},
{"preserved_tokens", sampling.preserved_tokens},
{"chat_format", common_chat_format_name(oaicompat_chat_format)},
{"samplers", samplers},
@ -356,24 +356,6 @@ struct server_task {
}
{
const auto grammar_triggers = data.find("grammar_triggers");
if (grammar_triggers != data.end()) {
for (const auto & t : *grammar_triggers) {
common_grammar_trigger trigger;
trigger.word = t.at("word");
trigger.at_start = t.at("at_start");
auto ids = common_tokenize(vocab, trigger.word, /* add_special= */ false, /* parse_special= */ true);
if (ids.size() == 1) {
SRV_DBG("Grammar trigger token: %d (`%s`)\n", ids[0], trigger.word.c_str());
params.sampling.grammar_trigger_tokens.push_back(ids[0]);
params.sampling.preserved_tokens.insert(ids[0]);
continue;
}
SRV_DBG("Grammar trigger word: `%s`\n", trigger.word.c_str());
params.sampling.grammar_trigger_words.push_back(trigger);
}
}
const auto preserved_tokens = data.find("preserved_tokens");
if (preserved_tokens != data.end()) {
for (const auto & t : *preserved_tokens) {
@ -383,12 +365,38 @@ struct server_task {
params.sampling.preserved_tokens.insert(ids[0]);
} else {
// This may happen when using a tool call style meant for a model with special tokens to preserve on a model without said tokens.
SRV_WRN("Not preserved because more than 1 token (wrong chat template override?): %s\n", t.get<std::string>().c_str());
SRV_DBG("Not preserved because more than 1 token: %s\n", t.get<std::string>().c_str());
}
}
}
if (params.sampling.grammar_lazy) {
GGML_ASSERT(params.sampling.grammar_trigger_tokens.size() > 0 || params.sampling.grammar_trigger_words.size() > 0);
const auto grammar_triggers = data.find("grammar_triggers");
if (grammar_triggers != data.end()) {
for (const auto & t : *grammar_triggers) {
auto ct = common_grammar_trigger::from_json(t);
if (ct.type == COMMON_GRAMMAR_TRIGGER_TYPE_WORD) {
const auto & word = ct.value;
auto ids = common_tokenize(vocab, word, /* add_special= */ false, /* parse_special= */ true);
if (ids.size() == 1) {
auto token = ids[0];
if (std::find(params.sampling.preserved_tokens.begin(), params.sampling.preserved_tokens.end(), (llama_token) token) == params.sampling.preserved_tokens.end()) {
throw std::runtime_error("Grammar trigger word should be marked as preserved token: " + word);
}
SRV_DBG("Grammar trigger token: %d (`%s`)\n", token, word.c_str());
common_grammar_trigger trigger;
trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
trigger.value = (llama_token) token;
params.sampling.grammar_triggers.push_back(trigger);
} else {
SRV_DBG("Grammar trigger word: `%s`\n", word.c_str());
params.sampling.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word});
}
} else {
params.sampling.grammar_triggers.push_back(ct);
}
}
}
if (params.sampling.grammar_lazy && params.sampling.grammar_triggers.empty()) {
throw std::runtime_error("Error: no triggers set for lazy grammar!");
}
}
@ -2045,7 +2053,7 @@ struct server_context {
if (slot.n_predict > 0 && slot.params.n_predict > slot.n_predict) {
// Might be better to reject the request with a 400 ?
SLT_WRN(slot, "n_predict = %d exceeds server configuration, setting to %d", slot.params.n_predict, slot.n_predict);
SLT_WRN(slot, "n_predict = %d exceeds server configuration, setting to %d\n", slot.params.n_predict, slot.n_predict);
slot.params.n_predict = slot.n_predict;
}
@ -3003,7 +3011,7 @@ struct server_context {
const int64_t kv_shift = (int64_t) head_p - (int64_t) head_c;
llama_kv_cache_seq_rm (ctx, slot.id, head_p, head_c);
llama_kv_cache_seq_add(ctx, slot.id, head_c, -1, kv_shift);
llama_kv_cache_seq_add(ctx, slot.id, head_c, head_c + n_match, kv_shift);
for (size_t i = 0; i < n_match; i++) {
slot.cache_tokens[head_p + i] = slot.cache_tokens[head_c + i];

228
examples/server/tests/unit/test_tool_call.py Normal file → Executable file
View file

@ -1,4 +1,12 @@
#!/usr/bin/env python
import pytest
# ensure grandparent path is in sys.path
from pathlib import Path
import sys
path = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(path))
from utils import *
server: ServerProcess
@ -66,15 +74,8 @@ WEATHER_TOOL = {
}
def do_test_completion_with_required_tool_tiny(template_name: str, tool: dict, argument_key: str | None):
global server
n_predict = 512
# server = ServerPreset.stories15m_moe()
server.jinja = True
server.n_predict = n_predict
server.chat_template_file = f'../../../models/templates/{template_name}.jinja'
server.start(timeout_seconds=TIMEOUT_SERVER_START)
res = server.make_request("POST", "/chat/completions", data={
def do_test_completion_with_required_tool_tiny(server: ServerProcess, tool: dict, argument_key: str | None, n_predict, **kwargs):
res = server.make_request("POST", "/v1/chat/completions", data={
"max_tokens": n_predict,
"messages": [
{"role": "system", "content": "You are a coding assistant."},
@ -83,16 +84,14 @@ def do_test_completion_with_required_tool_tiny(template_name: str, tool: dict, a
"tool_choice": "required",
"tools": [tool],
"parallel_tool_calls": False,
"temperature": 0.0,
"top_k": 1,
"top_p": 1.0,
**kwargs,
})
assert res.status_code == 200, f"Expected status code 200, got {res.status_code}"
choice = res.body["choices"][0]
tool_calls = choice["message"].get("tool_calls")
assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
tool_call = tool_calls[0]
assert choice["message"].get("content") is None, f'Expected no content in {choice["message"]}'
assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}'
expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"]
assert expected_function_name == tool_call["function"]["name"]
actual_arguments = tool_call["function"]["arguments"]
@ -108,7 +107,14 @@ def do_test_completion_with_required_tool_tiny(template_name: str, tool: dict, a
("meta-llama-Llama-3.3-70B-Instruct", PYTHON_TOOL, "code"),
])
def test_completion_with_required_tool_tiny_fast(template_name: str, tool: dict, argument_key: str | None):
do_test_completion_with_required_tool_tiny(template_name, tool, argument_key)
global server
n_predict = 512
# server = ServerPreset.stories15m_moe()
server.jinja = True
server.n_predict = n_predict
server.chat_template_file = f'../../../models/templates/{template_name}.jinja'
server.start(timeout_seconds=TIMEOUT_SERVER_START)
do_test_completion_with_required_tool_tiny(server, tool, argument_key, n_predict, temperature=0.0, top_k=1, top_p=1.0)
@pytest.mark.slow
@ -130,10 +136,17 @@ def test_completion_with_required_tool_tiny_fast(template_name: str, tool: dict,
("deepseek-ai-DeepSeek-R1-Distill-Llama-8B", TEST_TOOL, "success"),
("deepseek-ai-DeepSeek-R1-Distill-Llama-8B", PYTHON_TOOL, "code"),
("fireworks-ai-llama-3-firefunction-v2", TEST_TOOL, "success"),
("fireworks-ai-llama-3-firefunction-v2", PYTHON_TOOL, "code"),
# ("fireworks-ai-llama-3-firefunction-v2", PYTHON_TOOL, "code"),
])
def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict, argument_key: str | None):
do_test_completion_with_required_tool_tiny(template_name, tool, argument_key)
global server
n_predict = 512
# server = ServerPreset.stories15m_moe()
server.jinja = True
server.n_predict = n_predict
server.chat_template_file = f'../../../models/templates/{template_name}.jinja'
server.start(timeout_seconds=TIMEOUT_SERVER_START)
do_test_completion_with_required_tool_tiny(server, tool, argument_key, n_predict)
@pytest.mark.slow
@ -142,25 +155,33 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict,
(PYTHON_TOOL, "code", "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"),
# Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
(TEST_TOOL, "success", "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
(PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
# (PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", "chatml"),
(PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
(PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
# (PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", "chatml"),
(PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
@ -176,10 +197,10 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict,
(TEST_TOOL, "success", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
(PYTHON_TOOL, "code", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
# (PYTHON_TOOL, "code", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", "chatml"),
# TODO: fix these
# (TEST_TOOL, "success", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
# (PYTHON_TOOL, "code", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", "chatml"),
(TEST_TOOL, "success", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
(PYTHON_TOOL, "code", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
])
def test_completion_with_required_tool_real_model(tool: dict, argument_key: str | None, hf_repo: str, template_override: str | Tuple[str, str | None] | None):
global server
@ -197,7 +218,7 @@ def test_completion_with_required_tool_real_model(tool: dict, argument_key: str
elif isinstance(template_override, str):
server.chat_template = template_override
server.start(timeout_seconds=TIMEOUT_SERVER_START)
res = server.make_request("POST", "/chat/completions", data={
res = server.make_request("POST", "/v1/chat/completions", data={
"max_tokens": n_predict,
"messages": [
{"role": "system", "content": "You are a coding assistant."},
@ -215,7 +236,7 @@ def test_completion_with_required_tool_real_model(tool: dict, argument_key: str
tool_calls = choice["message"].get("tool_calls")
assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
tool_call = tool_calls[0]
assert choice["message"].get("content") is None, f'Expected no content in {choice["message"]}'
# assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}'
expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"]
assert expected_function_name == tool_call["function"]["name"]
actual_arguments = tool_call["function"]["arguments"]
@ -225,13 +246,8 @@ def test_completion_with_required_tool_real_model(tool: dict, argument_key: str
assert argument_key in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: {argument_key}"
def do_test_completion_without_tool_call(template_name: str, n_predict: int, tools: list[dict], tool_choice: str | None):
global server
server.jinja = True
server.n_predict = n_predict
server.chat_template_file = f'../../../models/templates/{template_name}.jinja'
server.start(timeout_seconds=TIMEOUT_SERVER_START)
res = server.make_request("POST", "/chat/completions", data={
def do_test_completion_without_tool_call(server: ServerProcess, n_predict: int, tools: list[dict], tool_choice: str | None, **kwargs):
res = server.make_request("POST", "/v1/chat/completions", data={
"max_tokens": n_predict,
"messages": [
{"role": "system", "content": "You are a coding assistant."},
@ -239,9 +255,7 @@ def do_test_completion_without_tool_call(template_name: str, n_predict: int, too
],
"tools": tools if tools else None,
"tool_choice": tool_choice,
"temperature": 0.0,
"top_k": 1,
"top_p": 1.0,
**kwargs,
}, timeout=TIMEOUT_HTTP_REQUEST)
assert res.status_code == 200, f"Expected status code 200, got {res.status_code}"
choice = res.body["choices"][0]
@ -254,7 +268,12 @@ def do_test_completion_without_tool_call(template_name: str, n_predict: int, too
("meta-llama-Llama-3.3-70B-Instruct", 128, [PYTHON_TOOL], 'none'),
])
def test_completion_without_tool_call_fast(template_name: str, n_predict: int, tools: list[dict], tool_choice: str | None):
do_test_completion_without_tool_call(template_name, n_predict, tools, tool_choice)
global server
server.jinja = True
server.n_predict = n_predict
server.chat_template_file = f'../../../models/templates/{template_name}.jinja'
server.start(timeout_seconds=TIMEOUT_SERVER_START)
do_test_completion_without_tool_call(server, n_predict, tools, tool_choice)
@pytest.mark.slow
@ -270,7 +289,12 @@ def test_completion_without_tool_call_fast(template_name: str, n_predict: int, t
("meta-llama-Llama-3.2-3B-Instruct", 256, [PYTHON_TOOL], 'none'),
])
def test_completion_without_tool_call_slow(template_name: str, n_predict: int, tools: list[dict], tool_choice: str | None):
do_test_completion_without_tool_call(template_name, n_predict, tools, tool_choice)
global server
server.jinja = True
server.n_predict = n_predict
server.chat_template_file = f'../../../models/templates/{template_name}.jinja'
server.start(timeout_seconds=TIMEOUT_SERVER_START)
do_test_completion_without_tool_call(server, n_predict, tools, tool_choice)
@pytest.mark.slow
@ -281,6 +305,12 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"),
("bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", None),
("bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M", "chatml"),
("bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", None),
("bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", "chatml"),
("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"),
@ -324,48 +354,52 @@ def test_weather(hf_repo: str, template_override: str | Tuple[str, str | None] |
elif isinstance(template_override, str):
server.chat_template = template_override
server.start(timeout_seconds=TIMEOUT_SERVER_START)
res = server.make_request("POST", "/chat/completions", data={
"max_tokens": n_predict,
do_test_weather(server, max_tokens=n_predict)
def do_test_weather(server: ServerProcess, **kwargs):
res = server.make_request("POST", "/v1/chat/completions", data={
"messages": [
{"role": "system", "content": "You are a chatbot that uses tools/functions. Dont overthink things."},
{"role": "user", "content": "What is the weather in Istanbul?"},
],
"tools": [WEATHER_TOOL],
**kwargs,
}, timeout=TIMEOUT_HTTP_REQUEST)
assert res.status_code == 200, f"Expected status code 200, got {res.status_code}"
choice = res.body["choices"][0]
tool_calls = choice["message"].get("tool_calls")
assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
tool_call = tool_calls[0]
assert choice["message"].get("content") is None, f'Expected no content in {choice["message"]}'
assert tool_call["function"]["name"] == WEATHER_TOOL["function"]["name"]
# assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}'
assert tool_call["function"]["name"] == WEATHER_TOOL["function"]["name"], f'Expected weather tool call, got {tool_call["function"]["name"]}'
actual_arguments = json.loads(tool_call["function"]["arguments"])
assert 'location' in actual_arguments, f"location not found in {json.dumps(actual_arguments)}"
location = actual_arguments["location"]
assert isinstance(location, str), f"Expected location to be a string, got {type(location)}: {json.dumps(location)}"
assert re.match('^Istanbul(, (TR|Turkey|Türkiye))?$', location), f'Expected Istanbul for location, got {location}'
assert re.match('^Istanbul(( |, ?)(TR|Turkey|Türkiye))?$', location), f'Expected Istanbul for location, got {location}'
@pytest.mark.slow
@pytest.mark.parametrize("result_override,n_predict,hf_repo,template_override", [
(None, 128, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"),
(None, 128, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
(None, 128, "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", None),
(None, 128, "bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M", "chatml"),
(None, 128, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"),
(None, 128, "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
(None, 128, "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
(None, 128, "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)),
(None, 128, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
(None, 128, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
(None, 128, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "chatml"),
(None, 128, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
("[\\s\\S]*?\\*\\*\\s*0.5($|\\*\\*)", 8192, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", ("llama-cpp-deepseek-r1", None)),
# TODO: fix these (wrong results, either didn't respect decimal instruction or got wrong value)
("[\\s\\S]*?\\*\\*\\s*0.5($|\\*\\*)", 8192, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
# ("[\\s\\S]*?\\*\\*\\s*0.5($|\\*\\*)", 8192, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", ("llama-cpp-deepseek-r1", None)),
# (None, 128, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
# ("[\\s\\S]*?\\*\\*\\s*0.5($|\\*\\*)", 8192, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
])
def test_calc_result(result_override: str | None, n_predict: int, hf_repo: str, template_override: str | Tuple[str, str | None] | None):
global server
# n_predict = 512
server.n_slots = 1
server.jinja = True
server.n_ctx = 8192 * 2
@ -379,10 +413,14 @@ def test_calc_result(result_override: str | None, n_predict: int, hf_repo: str,
elif isinstance(template_override, str):
server.chat_template = template_override
server.start(timeout_seconds=TIMEOUT_SERVER_START)
res = server.make_request("POST", "/chat/completions", data={
do_test_calc_result(server, result_override, n_predict)
def do_test_calc_result(server: ServerProcess, result_override: str | None, n_predict: int, **kwargs):
res = server.make_request("POST", "/v1/chat/completions", data={
"max_tokens": n_predict,
"messages": [
{"role": "system", "content": "You are a chatbot that uses tools/functions. Dont overthink things, and provide very concise answers. Do not explain your reasoning to the user. Provide any numerical values back to the user with at most two decimals."},
{"role": "system", "content": "You are a tools-calling assistant. You express numerical values with at most two decimals."},
{"role": "user", "content": "What's the y coordinate of a point on the unit sphere at angle 30 degrees?"},
{
"role": "assistant",
@ -423,7 +461,8 @@ def test_calc_result(result_override: str | None, n_predict: int, hf_repo: str,
}
}
}
]
],
**kwargs,
}, timeout=TIMEOUT_HTTP_REQUEST)
assert res.status_code == 200, f"Expected status code 200, got {res.status_code}"
choice = res.body["choices"][0]
@ -434,19 +473,19 @@ def test_calc_result(result_override: str | None, n_predict: int, hf_repo: str,
if result_override is not None:
assert re.match(result_override, content), f'Expected {result_override}, got {content}'
else:
assert re.match('^[\\s\\S]*?The (y[ -])?coordinate [\\s\\S]*?is (approximately )?0\\.56\\b|^0\\.56$', content), \
assert re.match('^[\\s\\S]*?((That\'s|\\bis) (approximately )?)?\\b0\\.(5\\b|56\\b|556)', content), \
f'Expected something like "The y coordinate is 0.56.", got {content}'
@pytest.mark.slow
@pytest.mark.parametrize("n_predict,reasoning_format,expect_content,expect_reasoning_content,hf_repo,template_override", [
(128, 'deepseek', "^The sum of 102 and 7 is 109.*", None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
(128, None, "^The sum of 102 and 7 is 109.*", None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
(128, 'deepseek', "^The sum of 102 and 7 is 109[\\s\\S]*", None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
(128, None, "^The sum of 102 and 7 is 109[\\s\\S]*", None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
(1024, 'deepseek', "To find the sum of.*", "I need to calculate the sum of 102 and 7.*", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
(1024, 'none', "^I need[\\s\\S]*?</think>\n?To find.*", None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
(1024, 'deepseek', "To find the sum of[\\s\\S]*", "I need to calculate the sum of 102 and 7[\\s\\S]*", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
(1024, 'none', "^(<think>\\s*)?I need[\\s\\S]*?</think>\\s*To find[\\s\\S]*", None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
(1024, 'deepseek', "To find the sum of.*", "First, I [\\s\\S]*", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", ("llama-cpp-deepseek-r1", None)),
(1024, 'deepseek', "To find the sum of[\\s\\S]*", "First, I [\\s\\S]*", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", ("llama-cpp-deepseek-r1", None)),
])
def test_thoughts(n_predict: int, reasoning_format: Literal['deepseek', 'none'] | None, expect_content: str | None, expect_reasoning_content: str | None, hf_repo: str, template_override: str | Tuple[str, str | None] | None):
global server
@ -464,7 +503,7 @@ def test_thoughts(n_predict: int, reasoning_format: Literal['deepseek', 'none']
elif isinstance(template_override, str):
server.chat_template = template_override
server.start(timeout_seconds=TIMEOUT_SERVER_START)
res = server.make_request("POST", "/chat/completions", data={
res = server.make_request("POST", "/v1/chat/completions", data={
"max_tokens": n_predict,
"messages": [
{"role": "user", "content": "What's the sum of 102 and 7?"},
@ -476,7 +515,7 @@ def test_thoughts(n_predict: int, reasoning_format: Literal['deepseek', 'none']
content = choice["message"].get("content")
if expect_content is None:
assert content is None, f'Expected no content in {choice["message"]}'
assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}'
else:
assert re.match(expect_content, content), f'Expected {expect_content}, got {content}'
@ -488,46 +527,46 @@ def test_thoughts(n_predict: int, reasoning_format: Literal['deepseek', 'none']
@pytest.mark.slow
@pytest.mark.parametrize("expected_arguments_override,hf_repo,template_override", [
(None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
# (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", "chatml"),
@pytest.mark.parametrize("hf_repo,template_override", [
("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
(None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
(None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"),
("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", "chatml"),
(None, "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai-functionary-medium-v3.2", None)),
(None, "bartowski/functionary-small-v3.2-GGUF:Q8_0", "chatml"),
("bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai-functionary-medium-v3.2", None)),
("bartowski/functionary-small-v3.2-GGUF:Q8_0", "chatml"),
('{"code":"print("}', "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
(None, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"),
# ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None),
("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", "chatml"),
(None, "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
(None, "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", "chatml"),
("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", None),
('{"code":"print("}', "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
(None, "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", "chatml"),
("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", None),
(None, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
(None, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"),
("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", "chatml"),
(None, "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
(None, "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", "chatml"),
("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", "chatml"),
(None, "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch-Hermes-3-Llama-3.1-8B", "tool_use")),
(None, "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", "chatml"),
("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch-Hermes-3-Llama-3.1-8B", "tool_use")),
("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", "chatml"),
(None, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
(None, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "chatml"),
("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "chatml"),
# Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
(None, "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
("bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
("bartowski/gemma-2-2b-it-GGUF:Q4_K_M", "chatml"),
])
def test_hello_world(expected_arguments_override: str | None, hf_repo: str, template_override: str | Tuple[str, str | None] | None):
def test_hello_world(hf_repo: str, template_override: str | Tuple[str, str | None] | None):
global server
n_predict = 512 # High because of DeepSeek R1
server.n_slots = 1
server.jinja = True
server.n_ctx = 8192
server.n_predict = 512 # High because of DeepSeek R1
server.n_predict = n_predict
server.model_hf_repo = hf_repo
server.model_hf_file = None
if isinstance(template_override, tuple):
@ -537,30 +576,27 @@ def test_hello_world(expected_arguments_override: str | None, hf_repo: str, temp
elif isinstance(template_override, str):
server.chat_template = template_override
server.start(timeout_seconds=TIMEOUT_SERVER_START)
res = server.make_request("POST", "/chat/completions", data={
"max_tokens": 256,
do_test_hello_world(server, max_tokens=n_predict)
def do_test_hello_world(server: ServerProcess, **kwargs):
res = server.make_request("POST", "/v1/chat/completions", data={
"messages": [
{"role": "system", "content": "You are a coding assistant."},
{"role": "system", "content": "You are a tool-calling agent."},
{"role": "user", "content": "say hello world with python"},
],
"tools": [PYTHON_TOOL],
# Note: without these greedy params, Functionary v3.2 writes `def hello_world():\n print("Hello, World!")\nhello_world()` which is correct but a pain to test.
"temperature": 0.0,
"top_k": 1,
"top_p": 1.0,
**kwargs,
}, timeout=TIMEOUT_HTTP_REQUEST)
assert res.status_code == 200, f"Expected status code 200, got {res.status_code}"
choice = res.body["choices"][0]
tool_calls = choice["message"].get("tool_calls")
assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
tool_call = tool_calls[0]
assert choice["message"].get("content") is None, f'Expected no content in {choice["message"]}'
# assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}'
assert tool_call["function"]["name"] == PYTHON_TOOL["function"]["name"]
actual_arguments = tool_call["function"]["arguments"]
if expected_arguments_override is not None:
assert actual_arguments == expected_arguments_override
else:
actual_arguments = json.loads(actual_arguments)
actual_arguments = json.loads(tool_call["function"]["arguments"])
assert 'code' in actual_arguments, f"code not found in {json.dumps(actual_arguments)}"
code = actual_arguments["code"]
assert isinstance(code, str), f"Expected code to be a string, got {type(code)}: {json.dumps(code)}"

View file

@ -67,6 +67,9 @@ class ServerProcess:
id_slot: int | None = None
cache_prompt: bool | None = None
n_slots: int | None = None
ctk: str | None = None
ctv: str | None = None
fa: bool | None = None
server_continuous_batching: bool | None = False
server_embeddings: bool | None = False
server_reranking: bool | None = False
@ -84,6 +87,7 @@ class ServerProcess:
reasoning_format: Literal['deepseek', 'none'] | None = None
chat_template: str | None = None
chat_template_file: str | None = None
server_path: str | None = None
# session variables
process: subprocess.Popen | None = None
@ -97,7 +101,9 @@ class ServerProcess:
self.server_port = int(os.environ["PORT"])
def start(self, timeout_seconds: int | None = DEFAULT_HTTP_TIMEOUT) -> None:
if "LLAMA_SERVER_BIN_PATH" in os.environ:
if self.server_path is not None:
server_path = self.server_path
elif "LLAMA_SERVER_BIN_PATH" in os.environ:
server_path = os.environ["LLAMA_SERVER_BIN_PATH"]
elif os.name == "nt":
server_path = "../../../build/bin/Release/llama-server.exe"
@ -151,6 +157,12 @@ class ServerProcess:
server_args.extend(["--ctx-size", self.n_ctx])
if self.n_slots:
server_args.extend(["--parallel", self.n_slots])
if self.ctk:
server_args.extend(["-ctk", self.ctk])
if self.ctv:
server_args.extend(["-ctv", self.ctv])
if self.fa is not None:
server_args.append("-fa")
if self.n_predict:
server_args.extend(["--n-predict", self.n_predict])
if self.slot_save_path:
@ -184,7 +196,7 @@ class ServerProcess:
server_args.extend(["--chat-template-file", self.chat_template_file])
args = [str(arg) for arg in [server_path, *server_args]]
print(f"bench: starting server with: {' '.join(args)}")
print(f"tests: starting server with: {' '.join(args)}")
flags = 0
if "nt" == os.name:
@ -215,6 +227,10 @@ class ServerProcess:
return # server is ready
except Exception as e:
pass
# Check if process died
if self.process.poll() is not None:
raise RuntimeError(f"Server process died with return code {self.process.returncode}")
print(f"Waiting for server to start...")
time.sleep(0.5)
raise TimeoutError(f"Server did not start within {timeout_seconds} seconds")

View file

@ -607,6 +607,7 @@ static json oaicompat_completion_params_parse(
inputs.use_jinja = use_jinja;
inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false);
inputs.extract_reasoning = reasoning_format != COMMON_REASONING_FORMAT_NONE;
inputs.add_generation_prompt = json_value(body, "add_generation_prompt", true);
if (!inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && body.contains("grammar")) {
throw std::runtime_error("Cannot use custom grammar constraints with tools.");
}
@ -620,10 +621,7 @@ static json oaicompat_completion_params_parse(
llama_params["grammar_lazy"] = chat_params.grammar_lazy;
auto grammar_triggers = json::array();
for (const auto & trigger : chat_params.grammar_triggers) {
grammar_triggers.push_back({
{"word", trigger.word},
{"at_start", trigger.at_start},
});
grammar_triggers.push_back(trigger.to_json<json>());
}
llama_params["grammar_triggers"] = grammar_triggers;
llama_params["preserved_tokens"] = chat_params.preserved_tokens;

View file

@ -1,3 +1,5 @@
#define _USE_MATH_DEFINES // For M_PI on MSVC
#include "arg.h"
#include "common.h"
#include "sampling.h"
@ -5,8 +7,6 @@
#include "llama.h"
#include "json.hpp"
#define _USE_MATH_DEFINES // For M_PI on MSVC
#include <algorithm>
#include <cmath>
#include <cstdio>

View file

@ -80,6 +80,7 @@ extern "C" {
GGML_BACKEND_API int ggml_cpu_has_avx (void);
GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
GGML_BACKEND_API int ggml_cpu_has_bmi2 (void);
GGML_BACKEND_API int ggml_cpu_has_f16c (void);
GGML_BACKEND_API int ggml_cpu_has_fma (void);
GGML_BACKEND_API int ggml_cpu_has_avx512 (void);

View file

@ -2153,7 +2153,11 @@ extern "C" {
# define GGML_RESTRICT
# endif
#else
# if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
# define GGML_RESTRICT __restrict
# else
# define GGML_RESTRICT restrict
# endif
#endif
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);

View file

@ -8,6 +8,7 @@
#include <string>
#include <type_traits>
#include <vector>
#include <cctype>
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN

View file

@ -21,6 +21,7 @@
#include <string.h>
#include <string>
#include <vector>
#include <algorithm>
#ifdef __APPLE__
#include <sys/types.h>

View file

@ -278,6 +278,10 @@ static int ggml_backend_cpu_x86_score() {
if (!is.SSE42()) { return 0; }
score += 1<<2;
#endif
#ifdef GGML_BMI2
if (!is.BMI2()) { return 0; }
score += 1<<3;
#endif
#ifdef GGML_AVX
if (!is.AVX()) { return 0; }
score += 1<<4;

File diff suppressed because it is too large Load diff

View file

@ -252,9 +252,9 @@ typedef pthread_t ggml_thread_t;
static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc);
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t * restrict x, size_t bx, ggml_bf16_t * restrict y, size_t by, int nrc);
static void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * GGML_RESTRICT x, size_t bx, const float * GGML_RESTRICT y, size_t by, int nrc);
static void ggml_vec_dot_f16(int n, float * GGML_RESTRICT s, size_t bs, ggml_fp16_t * GGML_RESTRICT x, size_t bx, ggml_fp16_t * GGML_RESTRICT y, size_t by, int nrc);
static void ggml_vec_dot_bf16(int n, float * GGML_RESTRICT s, size_t bs, ggml_bf16_t * GGML_RESTRICT x, size_t bx, ggml_bf16_t * GGML_RESTRICT y, size_t by, int nrc);
static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
[GGML_TYPE_F32] = {
@ -1456,7 +1456,7 @@ inline static void ggml_vec_div_f16 (const int n, ggml_fp16_t * z, const ggml_fp
}
}
static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc) {
static void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * GGML_RESTRICT x, size_t bx, const float * GGML_RESTRICT y, size_t by, int nrc) {
assert(nrc == 1);
UNUSED(nrc);
UNUSED(bx);
@ -1499,7 +1499,7 @@ static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float *
*s = sumf;
}
static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t * restrict x, size_t bx, ggml_bf16_t * restrict y, size_t by, int nrc) {
static void ggml_vec_dot_bf16(int n, float * GGML_RESTRICT s, size_t bs, ggml_bf16_t * GGML_RESTRICT x, size_t bx, ggml_bf16_t * GGML_RESTRICT y, size_t by, int nrc) {
assert(nrc == 1);
UNUSED(nrc);
UNUSED(bx);
@ -1567,7 +1567,7 @@ static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t
*s = sumf;
}
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc) {
static void ggml_vec_dot_f16(int n, float * GGML_RESTRICT s, size_t bs, ggml_fp16_t * GGML_RESTRICT x, size_t bx, ggml_fp16_t * GGML_RESTRICT y, size_t by, int nrc) {
assert(nrc == 1);
UNUSED(nrc);
UNUSED(bx);
@ -1611,10 +1611,10 @@ static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t *
// compute GGML_VEC_DOT_UNROLL dot products at once
// xs - x row stride in bytes
inline static void ggml_vec_dot_f16_unroll(const int n, const int xs, float * restrict s, void * restrict xv, ggml_fp16_t * restrict y) {
inline static void ggml_vec_dot_f16_unroll(const int n, const int xs, float * GGML_RESTRICT s, void * GGML_RESTRICT xv, ggml_fp16_t * GGML_RESTRICT y) {
ggml_float sumf[GGML_VEC_DOT_UNROLL] = { 0.0 };
ggml_fp16_t * restrict x[GGML_VEC_DOT_UNROLL];
ggml_fp16_t * GGML_RESTRICT x[GGML_VEC_DOT_UNROLL];
for (int i = 0; i < GGML_VEC_DOT_UNROLL; ++i) {
x[i] = (ggml_fp16_t *) ((char *) xv + i*xs);
@ -1664,7 +1664,7 @@ inline static void ggml_vec_dot_f16_unroll(const int n, const int xs, float * re
}
}
inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float * restrict x, const float v) {
inline static void ggml_vec_mad_f32(const int n, float * GGML_RESTRICT y, const float * GGML_RESTRICT x, const float v) {
#if defined(GGML_SIMD)
const int np = (n & ~(GGML_F32_STEP - 1));
@ -1695,7 +1695,7 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float
#endif
}
inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t * restrict y, const ggml_fp16_t * restrict x, const float v) {
inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t * GGML_RESTRICT y, const ggml_fp16_t * GGML_RESTRICT x, const float v) {
#if defined(GGML_SIMD)
const int np = (n & ~(GGML_F16_STEP - 1));
@ -1727,10 +1727,10 @@ inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t * restrict y, const
}
// xs and vs are byte strides of x and v
inline static void ggml_vec_mad_f32_unroll(const int n, const int xs, const int vs, float * restrict y, const float * restrict xv, const float * restrict vv) {
inline static void ggml_vec_mad_f32_unroll(const int n, const int xs, const int vs, float * GGML_RESTRICT y, const float * GGML_RESTRICT xv, const float * GGML_RESTRICT vv) {
const float * restrict x[GGML_VEC_MAD_UNROLL];
const float * restrict v[GGML_VEC_MAD_UNROLL];
const float * GGML_RESTRICT x[GGML_VEC_MAD_UNROLL];
const float * GGML_RESTRICT v[GGML_VEC_MAD_UNROLL];
for (int i = 0; i < GGML_VEC_MAD_UNROLL; ++i) {
x[i] = (const float *) ((const char *) xv + i*xs);
@ -15485,6 +15485,14 @@ int ggml_cpu_has_amx_int8(void) {
#endif
}
int ggml_cpu_has_bmi2(void) {
#if defined(__BMI2__)
return 1;
#else
return 0;
#endif
}
int ggml_cpu_has_fma(void) {
#if defined(__FMA__)
return 1;

View file

@ -511,6 +511,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
if (ggml_cpu_has_fma()) {
features.push_back({ "FMA", "1" });
}
if (ggml_cpu_has_bmi2()) {
features.push_back({ "BMI2", "1" });
}
if (ggml_cpu_has_avx512()) {
features.push_back({ "AVX512", "1" });
}

View file

@ -2576,7 +2576,7 @@ static void maintain_cuda_graph(ggml_backend_cuda_context * cuda_ctx, std::vecto
for (size_t i = 0; i < cuda_ctx->cuda_graph->num_nodes; i++) {
if(count(ggml_cuda_cpy_fn_ptrs.begin(), ggml_cuda_cpy_fn_ptrs.end(), cuda_ctx->cuda_graph->params[i].func) > 0) {
char ** updated_kernel_arg_ptr = cuda_ctx->cuda_graph->updated_kernel_arg.at(k++);
cuda_ctx->cuda_graph->params[i].kernelParams[1] = updated_kernel_arg_ptr;
*(void**)cuda_ctx->cuda_graph->params[i].kernelParams[1] = *(void**)updated_kernel_arg_ptr;
CUDA_CHECK(cudaGraphKernelNodeSetParams(cuda_ctx->cuda_graph->nodes[i], &cuda_ctx->cuda_graph->params[i]));
}
}

View file

@ -467,11 +467,13 @@ struct ggml_backend_metal_context {
// for now it is easier to work in a separate file
// static NSString * const msl_library_source = @"see metal.metal";
#if !GGML_METAL_EMBED_LIBRARY
// Here to assist with NSBundle Path Hack
@interface GGMLMetalClass : NSObject
@end
@implementation GGMLMetalClass
@end
#endif
static void * ggml_metal_host_malloc(size_t n) {
void * data = NULL;
@ -520,7 +522,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
id<MTLLibrary> metal_library;
id<MTLLibrary> metal_library = nil;
// load library
//
@ -529,19 +531,23 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
// - if not found, load the source and compile it
// - if that fails, return NULL
{
NSBundle * bundle = nil;
#ifdef SWIFT_PACKAGE
bundle = SWIFTPM_MODULE_BUNDLE;
#else
bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
#endif
NSError * error = nil;
NSString * src = nil;
#if GGML_METAL_EMBED_LIBRARY
const bool try_metallib = false;
GGML_LOG_INFO("%s: using embedded metal library\n", __func__);
extern const char ggml_metallib_start[];
extern const char ggml_metallib_end[];
src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding];
#else
const bool try_metallib = true;
#ifdef SWIFT_PACKAGE
NSBundle * bundle = SWIFTPM_MODULE_BUNDLE;
#else
NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
#endif
NSString * path_lib = [bundle pathForResource:@"default" ofType:@"metallib"];
@ -574,7 +580,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
path_lib = default_metallib_path;
}
if (try_metallib && path_lib != nil) {
if (path_lib != nil) {
// pre-compiled library found
NSURL * libURL = [NSURL fileURLWithPath:path_lib];
GGML_LOG_INFO("%s: loading '%s'\n", __func__, [path_lib UTF8String]);
@ -585,14 +591,6 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
return NULL;
}
} else {
#if GGML_METAL_EMBED_LIBRARY
GGML_LOG_INFO("%s: using embedded metal library\n", __func__);
extern const char ggml_metallib_start[];
extern const char ggml_metallib_end[];
NSString * src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding];
#else
GGML_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__);
NSString * path_source;
@ -613,13 +611,15 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
GGML_LOG_INFO("%s: loading '%s'\n", __func__, [path_source UTF8String]);
NSString * src = [NSString stringWithContentsOfFile:path_source encoding:NSUTF8StringEncoding error:&error];
src = [NSString stringWithContentsOfFile:path_source encoding:NSUTF8StringEncoding error:&error];
if (error) {
GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
return NULL;
}
#endif // GGML_METAL_EMBED_LIBRARY
}
#endif
if (!metal_library) {
@autoreleasepool {
// dictionary of preprocessor macros
NSMutableDictionary * prep = [NSMutableDictionary dictionary];
@ -647,11 +647,12 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
[options release];
#endif
}
}
#if GGML_METAL_EMBED_LIBRARY
[src release];
#endif // GGML_METAL_EMBED_LIBRARY
}
}
// print MTL GPU family:
GGML_LOG_INFO("%s: GPU name: %s\n", __func__, [[device name] UTF8String]);

View file

@ -278,7 +278,7 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
cl_int err;
#ifdef GGML_PROFILE_OPENCL
#ifdef GGML_OPENCL_PROFILING
GGML_LOG_INFO("ggml_opencl: OpenCL profiling enabled\n");
#endif
@ -524,7 +524,10 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
return backend_ctx;
}
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), &backend_ctx->alignment, NULL));
cl_uint base_align_in_bits;
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), &base_align_in_bits, NULL));
GGML_ASSERT(base_align_in_bits % 8u == 0);
backend_ctx->alignment = base_align_in_bits / 8u;
GGML_LOG_INFO("ggml_opencl: mem base addr align: %u\n", backend_ctx->alignment);
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &backend_ctx->max_alloc_size, NULL);
@ -1198,17 +1201,14 @@ struct ggml_backend_opencl_buffer_context {
std::string name;
};
static void * const cl_ptr_base = (void *)(uintptr_t) 0x1000;
static void ggml_backend_opencl_buffer_free_buffer(ggml_backend_buffer_t buffer) {
ggml_backend_opencl_buffer_context * ctx = (ggml_backend_opencl_buffer_context *) buffer->context;
delete ctx;
}
static void * ggml_backend_opencl_buffer_get_base(ggml_backend_buffer_t buffer) {
return cl_ptr_base;
GGML_UNUSED(buffer);
ggml_backend_opencl_context * backend_ctx = ggml_cl2_init(buffer->buft->device);
return (void *) (uintptr_t) backend_ctx->alignment;
}
static enum ggml_status ggml_backend_opencl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
@ -1241,7 +1241,7 @@ static enum ggml_status ggml_backend_opencl_buffer_init_tensor(ggml_backend_buff
tensor->extra = view_extra;
} else {
{
size_t offset = (char *)tensor->data - (char *)cl_ptr_base;
size_t offset = (char *) tensor->data - (char *) ggml_backend_opencl_buffer_get_base(buffer);
ggml_tensor_extra_cl * extra = ctx->ggml_opencl_alloc_temp_tensor_extra();
extra->offset = offset;
@ -3023,6 +3023,7 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co
// enqueue kernel with profiling
// <--------------------------------------------> //
#ifdef GGML_OPENCL_PROFILING
cl_event evt;
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt));
g_profiling_info.emplace_back();
@ -3764,10 +3765,10 @@ static void ggml_cl_rope(ggml_backend_t backend, const ggml_tensor * src0, const
const int ne02 = src0 ? src0->ne[2] : 0;
const int ne03 = src0 ? src0->ne[3] : 0;
const int nb00 = src0 ? src0->nb[0] : 0;
const int nb01 = src0 ? src0->nb[1] : 0;
const int nb02 = src0 ? src0->nb[2] : 0;
const int nb03 = src0 ? src0->nb[3] : 0;
const cl_ulong nb00 = src0 ? src0->nb[0] : 0;
const cl_ulong nb01 = src0 ? src0->nb[1] : 0;
const cl_ulong nb02 = src0 ? src0->nb[2] : 0;
const cl_ulong nb03 = src0 ? src0->nb[3] : 0;
const int ne10 = src1 ? src1->ne[0] : 0;
const int ne11 = src1 ? src1->ne[1] : 0; UNUSED(ne11);
@ -3779,10 +3780,10 @@ static void ggml_cl_rope(ggml_backend_t backend, const ggml_tensor * src0, const
const int ne2 = dst ? dst->ne[2] : 0;
const int ne3 = dst ? dst->ne[3] : 0;
const int nb0 = dst ? dst->nb[0] : 0;
const int nb1 = dst ? dst->nb[1] : 0;
const int nb2 = dst ? dst->nb[2] : 0;
const int nb3 = dst ? dst->nb[3] : 0;
const cl_ulong nb0 = dst ? dst->nb[0] : 0;
const cl_ulong nb1 = dst ? dst->nb[1] : 0;
const cl_ulong nb2 = dst ? dst->nb[2] : 0;
const cl_ulong nb3 = dst ? dst->nb[3] : 0;
GGML_ASSERT(ne10 % ne02 == 0);
GGML_ASSERT(ne10 >= ne02);

View file

@ -28,7 +28,7 @@
#define UNUSED GGML_UNUSED
// reference implementation for deterministic creation of model files
void quantize_row_q4_0_ref(const float * restrict x, block_q4_0 * restrict y, int64_t k) {
void quantize_row_q4_0_ref(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t k) {
static const int qk = QK4_0;
assert(k % qk == 0);
@ -65,7 +65,7 @@ void quantize_row_q4_0_ref(const float * restrict x, block_q4_0 * restrict y, in
}
}
void quantize_row_q4_1_ref(const float * restrict x, block_q4_1 * restrict y, int64_t k) {
void quantize_row_q4_1_ref(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t k) {
const int qk = QK4_1;
assert(k % qk == 0);
@ -102,7 +102,7 @@ void quantize_row_q4_1_ref(const float * restrict x, block_q4_1 * restrict y, in
}
}
void quantize_row_q5_0_ref(const float * restrict x, block_q5_0 * restrict y, int64_t k) {
void quantize_row_q5_0_ref(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t k) {
static const int qk = QK5_0;
assert(k % qk == 0);
@ -146,7 +146,7 @@ void quantize_row_q5_0_ref(const float * restrict x, block_q5_0 * restrict y, in
}
}
void quantize_row_q5_1_ref(const float * restrict x, block_q5_1 * restrict y, int64_t k) {
void quantize_row_q5_1_ref(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t k) {
const int qk = QK5_1;
assert(k % qk == 0);
@ -191,7 +191,7 @@ void quantize_row_q5_1_ref(const float * restrict x, block_q5_1 * restrict y, in
}
// reference implementation for deterministic creation of model files
void quantize_row_q8_0_ref(const float * restrict x, block_q8_0 * restrict y, int64_t k) {
void quantize_row_q8_0_ref(const float * GGML_RESTRICT x, block_q8_0 * GGML_RESTRICT y, int64_t k) {
assert(k % QK8_0 == 0);
const int nb = k / QK8_0;
@ -217,7 +217,7 @@ void quantize_row_q8_0_ref(const float * restrict x, block_q8_0 * restrict y, in
}
// reference implementation for deterministic creation of model files
void quantize_row_q8_1_ref(const float * restrict x, block_q8_1 * restrict y, int64_t k) {
void quantize_row_q8_1_ref(const float * GGML_RESTRICT x, block_q8_1 * GGML_RESTRICT y, int64_t k) {
assert(QK8_1 == 32);
assert(k % QK8_1 == 0);
const int nb = k / QK8_1;
@ -252,7 +252,7 @@ void quantize_row_q8_1_ref(const float * restrict x, block_q8_1 * restrict y, in
}
}
void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int64_t k) {
void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
static const int qk = QK4_0;
assert(k % qk == 0);
@ -272,7 +272,7 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int6
}
}
void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int64_t k) {
void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
static const int qk = QK4_1;
assert(k % qk == 0);
@ -293,7 +293,7 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int6
}
}
void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int64_t k) {
void dequantize_row_q5_0(const block_q5_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
static const int qk = QK5_0;
assert(k % qk == 0);
@ -319,7 +319,7 @@ void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int6
}
}
void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int64_t k) {
void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
static const int qk = QK5_1;
assert(k % qk == 0);
@ -346,7 +346,7 @@ void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int6
}
}
void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int64_t k) {
void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
static const int qk = QK8_0;
assert(k % qk == 0);
@ -376,8 +376,8 @@ static inline int nearest_int(float fval) {
return (i & 0x007fffff) - 0x00400000;
}
static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t * restrict L, int rmse_type,
const float * restrict qw) {
static float make_qx_quants(int n, int nmax, const float * GGML_RESTRICT x, int8_t * GGML_RESTRICT L, int rmse_type,
const float * GGML_RESTRICT qw) {
float max = 0;
float amax = 0;
for (int i = 0; i < n; ++i) {
@ -445,7 +445,7 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t *
return scale;
}
static float make_q3_quants(int n, int nmax, const float * restrict x, int8_t * restrict L, bool do_rmse) {
static float make_q3_quants(int n, int nmax, const float * GGML_RESTRICT x, int8_t * GGML_RESTRICT L, bool do_rmse) {
float max = 0;
float amax = 0;
for (int i = 0; i < n; ++i) {
@ -504,7 +504,7 @@ static float make_q3_quants(int n, int nmax, const float * restrict x, int8_t *
return 1/iscale;
}
static float make_qkx1_quants(int n, int nmax, const float * restrict x, uint8_t * restrict L, float * restrict the_min,
static float make_qkx1_quants(int n, int nmax, const float * GGML_RESTRICT x, uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min,
int ntry, float alpha) {
float min = x[0];
float max = x[0];
@ -547,8 +547,8 @@ static float make_qkx1_quants(int n, int nmax, const float * restrict x, uint8_t
return scale;
}
static float make_qkx2_quants(int n, int nmax, const float * restrict x, const float * restrict weights,
uint8_t * restrict L, float * restrict the_min, uint8_t * restrict Laux,
static float make_qkx2_quants(int n, int nmax, const float * GGML_RESTRICT x, const float * GGML_RESTRICT weights,
uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min, uint8_t * GGML_RESTRICT Laux,
float rmin, float rdelta, int nstep, bool use_mad) {
float min = x[0];
float max = x[0];
@ -628,7 +628,7 @@ static float make_qkx2_quants(int n, int nmax, const float * restrict x, const f
return scale;
}
static inline void get_scale_min_k4(int j, const uint8_t * restrict q, uint8_t * restrict d, uint8_t * restrict m) {
static inline void get_scale_min_k4(int j, const uint8_t * GGML_RESTRICT q, uint8_t * GGML_RESTRICT d, uint8_t * GGML_RESTRICT m) {
if (j < 4) {
*d = q[j] & 63; *m = q[j + 4] & 63;
} else {
@ -639,7 +639,7 @@ static inline void get_scale_min_k4(int j, const uint8_t * restrict q, uint8_t *
//========================- 2-bit (de)-quantization
void quantize_row_q2_K_ref(const float * restrict x, block_q2_K * restrict y, int64_t k) {
void quantize_row_q2_K_ref(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int nb = k / QK_K;
@ -709,7 +709,7 @@ void quantize_row_q2_K_ref(const float * restrict x, block_q2_K * restrict y, in
}
}
void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int64_t k) {
void dequantize_row_q2_K(const block_q2_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int nb = k / QK_K;
@ -741,8 +741,8 @@ void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int6
}
}
static float make_qkx3_quants(int n, int nmax, const float * restrict x, const float * restrict weights,
uint8_t * restrict L, float * restrict the_min, uint8_t * restrict Laux,
static float make_qkx3_quants(int n, int nmax, const float * GGML_RESTRICT x, const float * GGML_RESTRICT weights,
uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min, uint8_t * GGML_RESTRICT Laux,
float rmin, float rdelta, int nstep, bool use_mad) {
float min = x[0];
float max = x[0];
@ -824,7 +824,7 @@ static float make_qkx3_quants(int n, int nmax, const float * restrict x, const f
return scale;
}
static float make_qp_quants(int n, int nmax, const float * restrict x, uint8_t * restrict L, const float * quant_weights) {
static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint8_t * GGML_RESTRICT L, const float * quant_weights) {
float max = 0;
for (int i = 0; i < n; ++i) {
max = MAX(max, x[i]);
@ -897,7 +897,7 @@ static float make_qp_quants(int n, int nmax, const float * restrict x, uint8_t *
return sumlx/suml2;
}
static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restrict y, int k, const float * restrict quant_weights) {
static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k, const float * GGML_RESTRICT quant_weights) {
GGML_ASSERT(quant_weights);
assert(k % QK_K == 0);
const int nb = k / QK_K;
@ -917,7 +917,7 @@ static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restri
for (int j = 0; j < QK_K; ++j) sumx2 += x[j]*x[j];
float sigma2 = sumx2/QK_K;
for (int j = 0; j < QK_K/16; ++j) {
const float * restrict qw = quant_weights + QK_K * i + 16*j;
const float * GGML_RESTRICT qw = quant_weights + QK_K * i + 16*j;
for (int l = 0; l < 16; ++l) weight[l] = qw[l] * sqrtf(sigma2 + x[16*j + l]*x[16*j + l]);
for (int l = 0; l < QK_K/16; ++l) sw[j] += weight[l];
scales[j] = make_qkx3_quants(16, 3, x + 16*j, weight, L + 16*j, &mins[j], Laux, -0.9f, 0.05f, 36, false);
@ -959,7 +959,7 @@ static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restri
}
}
size_t quantize_q2_K(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_q2_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t row_size = ggml_row_size(GGML_TYPE_Q2_K, n_per_row);
if (!quant_weights) {
quantize_row_q2_K_ref(src, dst, (int64_t)nrow*n_per_row);
@ -977,7 +977,7 @@ size_t quantize_q2_K(const float * restrict src, void * restrict dst, int64_t nr
//========================= 3-bit (de)-quantization
void quantize_row_q3_K_ref(const float * restrict x, block_q3_K * restrict y, int64_t k) {
void quantize_row_q3_K_ref(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int nb = k / QK_K;
@ -1053,7 +1053,7 @@ void quantize_row_q3_K_ref(const float * restrict x, block_q3_K * restrict y, in
}
}
void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int64_t k) {
void dequantize_row_q3_K(const block_q3_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int nb = k / QK_K;
@ -1067,8 +1067,8 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int6
const float d_all = GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q = x[i].qs;
const uint8_t * restrict hm = x[i].hmask;
const uint8_t * GGML_RESTRICT q = x[i].qs;
const uint8_t * GGML_RESTRICT hm = x[i].hmask;
uint8_t m = 1;
memcpy(aux, x[i].scales, 12);
@ -1103,7 +1103,7 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int6
}
}
static void quantize_row_q3_K_impl(const float * restrict x, block_q3_K * restrict y, int64_t n_per_row, const float * restrict quant_weights) {
static void quantize_row_q3_K_impl(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t n_per_row, const float * GGML_RESTRICT quant_weights) {
assert(n_per_row % QK_K == 0);
const int nb = n_per_row / QK_K;
@ -1187,7 +1187,7 @@ static void quantize_row_q3_K_impl(const float * restrict x, block_q3_K * restri
}
}
size_t quantize_q3_K(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_q3_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t row_size = ggml_row_size(GGML_TYPE_Q3_K, n_per_row);
if (!quant_weights) {
quantize_row_q3_K_ref(src, dst, (int64_t)nrow*n_per_row);
@ -1205,7 +1205,7 @@ size_t quantize_q3_K(const float * restrict src, void * restrict dst, int64_t nr
// ====================== 4-bit (de)-quantization
void quantize_row_q4_K_ref(const float * restrict x, block_q4_K * restrict y, int64_t k) {
void quantize_row_q4_K_ref(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int nb = k / QK_K;
@ -1277,7 +1277,7 @@ void quantize_row_q4_K_ref(const float * restrict x, block_q4_K * restrict y, in
}
}
void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int64_t k) {
void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int nb = k / QK_K;
@ -1301,7 +1301,7 @@ void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int6
}
}
static void quantize_row_q4_K_impl(const float * restrict x, block_q4_K * restrict y, int64_t n_per_row, const float * quant_weights) {
static void quantize_row_q4_K_impl(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
assert(n_per_row % QK_K == 0);
const int64_t nb = n_per_row / QK_K;
@ -1374,7 +1374,7 @@ static void quantize_row_q4_K_impl(const float * restrict x, block_q4_K * restri
}
}
size_t quantize_q4_K(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_q4_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t row_size = ggml_row_size(GGML_TYPE_Q4_K, n_per_row);
if (!quant_weights) {
quantize_row_q4_K_ref(src, dst, (int64_t)nrow*n_per_row);
@ -1392,7 +1392,7 @@ size_t quantize_q4_K(const float * restrict src, void * restrict dst, int64_t nr
// ====================== 5-bit (de)-quantization
void quantize_row_q5_K_ref(const float * restrict x, block_q5_K * restrict y, int64_t k) {
void quantize_row_q5_K_ref(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -1454,8 +1454,8 @@ void quantize_row_q5_K_ref(const float * restrict x, block_q5_K * restrict y, in
}
}
uint8_t * restrict qh = y[i].qh;
uint8_t * restrict ql = y[i].qs;
uint8_t * GGML_RESTRICT qh = y[i].qh;
uint8_t * GGML_RESTRICT ql = y[i].qs;
memset(qh, 0, QK_K/8);
uint8_t m1 = 1, m2 = 2;
@ -1479,7 +1479,7 @@ void quantize_row_q5_K_ref(const float * restrict x, block_q5_K * restrict y, in
}
}
void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int64_t k) {
void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -1506,7 +1506,7 @@ void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int6
}
}
static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restrict y, int64_t n_per_row, const float * quant_weights) {
static void quantize_row_q5_K_impl(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
assert(n_per_row % QK_K == 0);
const int64_t nb = n_per_row / QK_K;
@ -1573,8 +1573,8 @@ static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restri
}
}
uint8_t * restrict qh = y[i].qh;
uint8_t * restrict ql = y[i].qs;
uint8_t * GGML_RESTRICT qh = y[i].qh;
uint8_t * GGML_RESTRICT ql = y[i].qs;
memset(qh, 0, QK_K/8);
uint8_t m1 = 1, m2 = 2;
@ -1599,7 +1599,7 @@ static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restri
}
}
size_t quantize_q5_K(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_q5_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t row_size = ggml_row_size(GGML_TYPE_Q5_K, n_per_row);
if (!quant_weights) {
quantize_row_q5_K_ref(src, dst, (int64_t)nrow*n_per_row);
@ -1617,7 +1617,7 @@ size_t quantize_q5_K(const float * restrict src, void * restrict dst, int64_t nr
// ====================== 6-bit (de)-quantization
void quantize_row_q6_K_ref(const float * restrict x, block_q6_K * restrict y, int64_t k) {
void quantize_row_q6_K_ref(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -1667,8 +1667,8 @@ void quantize_row_q6_K_ref(const float * restrict x, block_q6_K * restrict y, in
}
}
uint8_t * restrict ql = y[i].ql;
uint8_t * restrict qh = y[i].qh;
uint8_t * GGML_RESTRICT ql = y[i].ql;
uint8_t * GGML_RESTRICT qh = y[i].qh;
for (int j = 0; j < QK_K; j += 128) {
for (int l = 0; l < 32; ++l) {
const uint8_t q1 = L[j + l + 0] & 0xF;
@ -1687,16 +1687,16 @@ void quantize_row_q6_K_ref(const float * restrict x, block_q6_K * restrict y, in
}
}
void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int64_t k) {
void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
for (int i = 0; i < nb; i++) {
const float d = GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict ql = x[i].ql;
const uint8_t * restrict qh = x[i].qh;
const int8_t * restrict sc = x[i].scales;
const uint8_t * GGML_RESTRICT ql = x[i].ql;
const uint8_t * GGML_RESTRICT qh = x[i].qh;
const int8_t * GGML_RESTRICT sc = x[i].scales;
for (int n = 0; n < QK_K; n += 128) {
for (int l = 0; l < 32; ++l) {
@ -1718,7 +1718,7 @@ void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int6
}
}
static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restrict y, int64_t n_per_row, const float * quant_weights) {
static void quantize_row_q6_K_impl(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
assert(n_per_row % QK_K == 0);
const int64_t nb = n_per_row / QK_K;
@ -1781,8 +1781,8 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri
}
}
uint8_t * restrict ql = y[i].ql;
uint8_t * restrict qh = y[i].qh;
uint8_t * GGML_RESTRICT ql = y[i].ql;
uint8_t * GGML_RESTRICT qh = y[i].qh;
for (int j = 0; j < QK_K; j += 128) {
for (int l = 0; l < 32; ++l) {
const uint8_t q1 = L[j + l + 0] & 0xF;
@ -1802,7 +1802,7 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri
}
}
size_t quantize_q6_K(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_q6_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t row_size = ggml_row_size(GGML_TYPE_Q6_K, n_per_row);
if (!quant_weights) {
quantize_row_q6_K_ref(src, dst, (int64_t)nrow*n_per_row);
@ -1818,7 +1818,7 @@ size_t quantize_q6_K(const float * restrict src, void * restrict dst, int64_t nr
return nrow * row_size;
}
static void quantize_row_q4_0_impl(const float * restrict x, block_q4_0 * restrict y, int64_t n_per_row, const float * quant_weights) {
static void quantize_row_q4_0_impl(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
static_assert(QK4_0 == 32, "QK4_0 must be 32");
if (!quant_weights) {
@ -1846,7 +1846,7 @@ static void quantize_row_q4_0_impl(const float * restrict x, block_q4_0 * restri
}
}
size_t quantize_q4_0(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_q4_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
if (!quant_weights) {
quantize_row_q4_0_ref(src, dst, (int64_t)nrow*n_per_row);
return nrow * ggml_row_size(GGML_TYPE_Q4_0, n_per_row);
@ -1861,7 +1861,7 @@ size_t quantize_q4_0(const float * restrict src, void * restrict dst, int64_t nr
return nrow * row_size;
}
static void quantize_row_q4_1_impl(const float * restrict x, block_q4_1 * restrict y, int64_t n_per_row, const float * quant_weights) {
static void quantize_row_q4_1_impl(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
static_assert(QK4_1 == 32, "QK4_1 must be 32");
if (!quant_weights) {
@ -1891,7 +1891,7 @@ static void quantize_row_q4_1_impl(const float * restrict x, block_q4_1 * restri
}
}
size_t quantize_q4_1(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_q4_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
if (!quant_weights) {
quantize_row_q4_1_ref(src, dst, (int64_t)nrow*n_per_row);
return nrow * ggml_row_size(GGML_TYPE_Q4_1, n_per_row);
@ -1906,7 +1906,7 @@ size_t quantize_q4_1(const float * restrict src, void * restrict dst, int64_t nr
return nrow * row_size;
}
static void quantize_row_q5_0_impl(const float * restrict x, block_q5_0 * restrict y, int64_t n_per_row, const float * quant_weights) {
static void quantize_row_q5_0_impl(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
static_assert(QK5_0 == 32, "QK5_0 must be 32");
if (!quant_weights) {
@ -1945,7 +1945,7 @@ static void quantize_row_q5_0_impl(const float * restrict x, block_q5_0 * restri
}
}
size_t quantize_q5_0(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_q5_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
if (!quant_weights) {
quantize_row_q5_0_ref(src, dst, (int64_t)nrow*n_per_row);
return nrow * ggml_row_size(GGML_TYPE_Q5_0, n_per_row);
@ -1960,7 +1960,7 @@ size_t quantize_q5_0(const float * restrict src, void * restrict dst, int64_t nr
return nrow * row_size;
}
static void quantize_row_q5_1_impl(const float * restrict x, block_q5_1 * restrict y, int64_t n_per_row, const float * quant_weights) {
static void quantize_row_q5_1_impl(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
static_assert(QK5_1 == 32, "QK5_1 must be 32");
if (!quant_weights) {
@ -1998,7 +1998,7 @@ static void quantize_row_q5_1_impl(const float * restrict x, block_q5_1 * restri
}
}
size_t quantize_q5_1(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
if (!quant_weights) {
quantize_row_q5_1_ref(src, dst, (int64_t)nrow*n_per_row);
return nrow * ggml_row_size(GGML_TYPE_Q5_1, n_per_row);
@ -2013,7 +2013,7 @@ size_t quantize_q5_1(const float * restrict src, void * restrict dst, int64_t nr
return nrow * row_size;
}
size_t quantize_q8_0(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
(void)quant_weights; // not used
const size_t row_size = ggml_row_size(GGML_TYPE_Q8_0, n_per_row);
quantize_row_q8_0_ref(src, dst, (int64_t)nrow*n_per_row);
@ -2022,7 +2022,7 @@ size_t quantize_q8_0(const float * restrict src, void * restrict dst, int64_t nr
// ====================== Ternary (de)-quantization (BitNet b1.58 and TriLMs)
void quantize_row_tq1_0_ref(const float * restrict x, block_tq1_0 * restrict y, int64_t k) {
void quantize_row_tq1_0_ref(const float * GGML_RESTRICT x, block_tq1_0 * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -2088,7 +2088,7 @@ void quantize_row_tq1_0_ref(const float * restrict x, block_tq1_0 * restrict y,
}
}
void quantize_row_tq2_0_ref(const float * restrict x, block_tq2_0 * restrict y, int64_t k) {
void quantize_row_tq2_0_ref(const float * GGML_RESTRICT x, block_tq2_0 * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -2120,21 +2120,21 @@ void quantize_row_tq2_0_ref(const float * restrict x, block_tq2_0 * restrict y,
}
}
size_t quantize_tq1_0(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_tq1_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
(void)quant_weights; // not used
const size_t row_size = ggml_row_size(GGML_TYPE_TQ1_0, n_per_row);
quantize_row_tq1_0_ref(src, dst, (int64_t)nrow*n_per_row);
return nrow * row_size;
}
size_t quantize_tq2_0(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_tq2_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
(void)quant_weights; // not used
const size_t row_size = ggml_row_size(GGML_TYPE_TQ2_0, n_per_row);
quantize_row_tq2_0_ref(src, dst, (int64_t)nrow*n_per_row);
return nrow * row_size;
}
void dequantize_row_tq1_0(const block_tq1_0 * restrict x, float * restrict y, int64_t k) {
void dequantize_row_tq1_0(const block_tq1_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -2173,7 +2173,7 @@ void dequantize_row_tq1_0(const block_tq1_0 * restrict x, float * restrict y, in
}
}
void dequantize_row_tq2_0(const block_tq2_0 * restrict x, float * restrict y, int64_t k) {
void dequantize_row_tq2_0(const block_tq2_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -2194,7 +2194,7 @@ void dequantize_row_tq2_0(const block_tq2_0 * restrict x, float * restrict y, in
// ====================== "True" 2-bit (de)-quantization
void dequantize_row_iq2_xxs(const block_iq2_xxs * restrict x, float * restrict y, int64_t k) {
void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -2222,7 +2222,7 @@ void dequantize_row_iq2_xxs(const block_iq2_xxs * restrict x, float * restrict y
// ====================== 2.3125 bpw (de)-quantization
void dequantize_row_iq2_xs(const block_iq2_xs * restrict x, float * restrict y, int64_t k) {
void dequantize_row_iq2_xs(const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -2249,7 +2249,7 @@ void dequantize_row_iq2_xs(const block_iq2_xs * restrict x, float * restrict y,
// ====================== 2.5625 bpw (de)-quantization
void dequantize_row_iq2_s(const block_iq2_s * restrict x, float * restrict y, int64_t k) {
void dequantize_row_iq2_s(const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -2281,7 +2281,7 @@ void dequantize_row_iq2_s(const block_iq2_s * restrict x, float * restrict y, in
// ====================== 3.0625 bpw (de)-quantization
void dequantize_row_iq3_xxs(const block_iq3_xxs * restrict x, float * restrict y, int64_t k) {
void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -2313,7 +2313,7 @@ void dequantize_row_iq3_xxs(const block_iq3_xxs * restrict x, float * restrict y
// ====================== 3.3125 bpw (de)-quantization
void dequantize_row_iq3_s(const block_iq3_s * restrict x, float * restrict y, int64_t k) {
void dequantize_row_iq3_s(const block_iq3_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -2356,7 +2356,7 @@ void dequantize_row_iq3_s(const block_iq3_s * restrict x, float * restrict y, in
// ====================== 1.5625 bpw (de)-quantization
void dequantize_row_iq1_s(const block_iq1_s * restrict x, float * restrict y, int64_t k) {
void dequantize_row_iq1_s(const block_iq1_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -2381,7 +2381,7 @@ void dequantize_row_iq1_s(const block_iq1_s * restrict x, float * restrict y, in
}
}
void dequantize_row_iq1_m(const block_iq1_m * restrict x, float * restrict y, int64_t k) {
void dequantize_row_iq1_m(const block_iq1_m * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -2433,7 +2433,7 @@ void dequantize_row_iq1_m(const block_iq1_m * restrict x, float * restrict y, in
static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
void dequantize_row_iq4_nl(const block_iq4_nl * restrict x, float * restrict y, int64_t k) {
void dequantize_row_iq4_nl(const block_iq4_nl * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK4_NL == 0);
const int64_t nb = k / QK4_NL;
@ -2451,7 +2451,7 @@ void dequantize_row_iq4_nl(const block_iq4_nl * restrict x, float * restrict y,
}
}
void dequantize_row_iq4_xs(const block_iq4_xs * restrict x, float * restrict y, int64_t k) {
void dequantize_row_iq4_xs(const block_iq4_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -2476,7 +2476,7 @@ void dequantize_row_iq4_xs(const block_iq4_xs * restrict x, float * restrict y,
//===================================== Q8_K ==============================================
void quantize_row_q8_K_ref(const float * restrict x, block_q8_K * restrict y, int64_t k) {
void quantize_row_q8_K_ref(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -2515,7 +2515,7 @@ void quantize_row_q8_K_ref(const float * restrict x, block_q8_K * restrict y, in
}
}
void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int64_t k) {
void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
const int64_t nb = k / QK_K;
@ -2927,8 +2927,8 @@ void iq2xs_free_impl(enum ggml_type type) {
}
}
static int iq2_find_best_neighbour(const uint16_t * restrict neighbours, const uint64_t * restrict grid,
const float * restrict xval, const float * restrict weight, float scale, int8_t * restrict L) {
static int iq2_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, int8_t * GGML_RESTRICT L) {
int num_neighbors = neighbours[0];
GGML_ASSERT(num_neighbors > 0);
float best_d2 = FLT_MAX;
@ -2951,7 +2951,7 @@ static int iq2_find_best_neighbour(const uint16_t * restrict neighbours, const u
return grid_index;
}
static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights) {
static void quantize_row_iq2_xxs_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
const int gindex = iq2_data_index(GGML_TYPE_IQ2_XXS);
@ -3124,7 +3124,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
}
}
static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights) {
static void quantize_row_iq2_xs_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
const int gindex = iq2_data_index(GGML_TYPE_IQ2_XS);
@ -3304,7 +3304,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
}
}
size_t quantize_iq2_xxs(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
GGML_ASSERT(n_per_row%QK_K == 0);
int64_t nblock = n_per_row/QK_K;
char * qrow = (char *)dst;
@ -3316,7 +3316,7 @@ size_t quantize_iq2_xxs(const float * restrict src, void * restrict dst, int64_t
return nrow * nblock * sizeof(block_iq2_xxs);
}
size_t quantize_iq2_xs(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_iq2_xs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
GGML_ASSERT(n_per_row%QK_K == 0);
int64_t nblock = n_per_row/QK_K;
char * qrow = (char *)dst;
@ -3521,8 +3521,8 @@ void iq3xs_free_impl(int grid_size) {
}
}
static int iq3_find_best_neighbour(const uint16_t * restrict neighbours, const uint32_t * restrict grid,
const float * restrict xval, const float * restrict weight, float scale, int8_t * restrict L) {
static int iq3_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint32_t * GGML_RESTRICT grid,
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, int8_t * GGML_RESTRICT L) {
int num_neighbors = neighbours[0];
GGML_ASSERT(num_neighbors > 0);
float best_d2 = FLT_MAX;
@ -3545,8 +3545,8 @@ static int iq3_find_best_neighbour(const uint16_t * restrict neighbours, const u
return grid_index;
}
static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, void * restrict vy, int64_t n,
const float * restrict quant_weights) {
static void quantize_row_iq3_xxs_impl(int grid_size, const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n,
const float * GGML_RESTRICT quant_weights) {
const int gindex = iq3_data_index(grid_size);
@ -3758,7 +3758,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
}
}
size_t quantize_iq3_xxs(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_iq3_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
GGML_ASSERT(n_per_row%QK_K == 0);
int64_t nblock = n_per_row/QK_K;
char * qrow = (char *)dst;
@ -3770,13 +3770,13 @@ size_t quantize_iq3_xxs(const float * restrict src, void * restrict dst, int64_t
return nrow * nblock * sizeof(block_iq3_xxs);
}
void quantize_row_iq3_xxs_ref(const float * restrict x, block_iq3_xxs * restrict y, int64_t k) {
void quantize_row_iq3_xxs_ref(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
quantize_row_iq3_xxs_impl(256, x, y, k, NULL);
}
static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, void * restrict vy, int n,
const float * restrict quant_weights,
static void quantize_row_iq3_s_impl(int block_size, const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int n,
const float * GGML_RESTRICT quant_weights,
float * scales,
float * weight,
float * xval,
@ -3958,7 +3958,7 @@ static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, vo
}
#define IQ3S_BLOCK_SIZE 32
size_t quantize_iq3_s(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_iq3_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
GGML_ASSERT(n_per_row%QK_K == 0);
int64_t nblock = n_per_row/QK_K;
float scales[QK_K/IQ3S_BLOCK_SIZE];
@ -3980,7 +3980,7 @@ size_t quantize_iq3_s(const float * restrict src, void * restrict dst, int64_t n
return nrow * nblock * sizeof(block_iq3_s);
}
void quantize_row_iq3_s_ref(const float * restrict x, block_iq3_s * restrict y, int64_t k) {
void quantize_row_iq3_s_ref(const float * GGML_RESTRICT x, block_iq3_s * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
quantize_iq3_s(x, y, 1, k, NULL);
}
@ -3988,8 +3988,8 @@ void quantize_row_iq3_s_ref(const float * restrict x, block_iq3_s * restrict y,
// =================================== 1.5 bpw ===================================================
static int iq1_find_best_neighbour(const uint16_t * restrict neighbours, const uint64_t * restrict grid,
const float * restrict xval, const float * restrict weight, float * scale, int8_t * restrict L, int ngrid) {
static int iq1_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float * scale, int8_t * GGML_RESTRICT L, int ngrid) {
int num_neighbors = neighbours[0];
GGML_ASSERT(num_neighbors > 0);
float best_score = -FLT_MAX;
@ -4048,8 +4048,8 @@ static int iq1_find_best_neighbour(const uint16_t * restrict neighbours, const u
return grid_index;
}
static int iq1_find_best_neighbour2(const uint16_t * restrict neighbours, const uint64_t * restrict grid,
const float * restrict xval, const float * restrict weight, float scale, const float * restrict xg, int8_t * restrict L, int ngrid) {
static int iq1_find_best_neighbour2(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, const float * GGML_RESTRICT xg, int8_t * GGML_RESTRICT L, int ngrid) {
int num_neighbors = neighbours[0];
GGML_ASSERT(num_neighbors > 0);
float best_score = FLT_MAX;
@ -4113,7 +4113,7 @@ static int iq1_sort_helper(const void * left, const void * right) {
#define IQ1S_BLOCK_SIZE 32
#define IQ1M_BLOCK_SIZE 16
static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights,
static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights,
float * scales,
float * weight,
float * sumx,
@ -4271,7 +4271,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
}
}
size_t quantize_iq1_s(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_iq1_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
GGML_ASSERT(n_per_row%QK_K == 0);
float scales[QK_K/IQ1S_BLOCK_SIZE];
float weight[IQ1S_BLOCK_SIZE];
@ -4291,7 +4291,7 @@ size_t quantize_iq1_s(const float * restrict src, void * restrict dst, int64_t n
return nrow * nblock * sizeof(block_iq1_s);
}
static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights,
static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights,
float * scales,
float * weight,
float * pairs,
@ -4539,7 +4539,7 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
}
}
size_t quantize_iq1_m(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_iq1_m(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
GGML_ASSERT(n_per_row%QK_K == 0);
float scales[QK_K/IQ1M_BLOCK_SIZE];
float weight[IQ1M_BLOCK_SIZE];
@ -4570,7 +4570,7 @@ static inline int best_index_int8(int n, const int8_t * val, float x) {
return x - val[mu-1] < val[mu] - x ? mu-1 : mu;
}
static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float * restrict x,
static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float * GGML_RESTRICT x,
ggml_fp16_t * dh, uint8_t * q4, uint16_t * scales_h, uint8_t * scales_l,
float * scales, float * weight, uint8_t * L,
const int8_t * values,
@ -4681,7 +4681,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
}
}
size_t quantize_iq4_nl(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_iq4_nl(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
GGML_ASSERT(n_per_row%QK4_NL == 0);
int64_t nblock = n_per_row/QK4_NL;
char * qrow = (char *)dst;
@ -4703,8 +4703,8 @@ size_t quantize_iq4_nl(const float * restrict src, void * restrict dst, int64_t
return nrow * nblock * sizeof(block_iq4_nl);
}
//void quantize_row_iq4_nl_ref(const float * restrict x, void * restrict vy, int64_t k) {
void quantize_row_iq4_nl_ref(const float * restrict x, block_iq4_nl * restrict y, int64_t k) {
//void quantize_row_iq4_nl_ref(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
void quantize_row_iq4_nl_ref(const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int64_t k) {
GGML_ASSERT(k%QK4_NL == 0);
int64_t nblock = k/QK4_NL;
uint8_t L[QK4_NL];
@ -4719,7 +4719,7 @@ void quantize_row_iq4_nl_ref(const float * restrict x, block_iq4_nl * restrict y
}
}
size_t quantize_iq4_xs(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_iq4_xs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
GGML_ASSERT(n_per_row%QK_K == 0);
int64_t nblock = n_per_row/QK_K;
char * qrow = (char *)dst;
@ -4739,14 +4739,14 @@ size_t quantize_iq4_xs(const float * restrict src, void * restrict dst, int64_t
return nrow * nblock * sizeof(block_iq4_xs);
}
void quantize_row_iq4_xs_ref(const float * restrict x, block_iq4_xs * restrict y, int64_t k) {
void quantize_row_iq4_xs_ref(const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
quantize_iq4_xs(x, y, 1, k, NULL);
}
// =============================== 2.5625 bpw
static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights) {
static void quantize_row_iq2_s_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
const int gindex = iq2_data_index(GGML_TYPE_IQ2_S);
@ -4914,7 +4914,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
}
}
size_t quantize_iq2_s(const float * restrict src, void * restrict dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
size_t quantize_iq2_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
GGML_ASSERT(n_per_row%QK_K == 0);
int64_t nblock = n_per_row/QK_K;
char * qrow = (char *)dst;
@ -4926,7 +4926,7 @@ size_t quantize_iq2_s(const float * restrict src, void * restrict dst, int64_t n
return nrow * nblock * sizeof(block_iq2_s);
}
void quantize_row_iq2_s_ref(const float * restrict x, block_iq2_s * restrict y, int64_t k) {
void quantize_row_iq2_s_ref(const float * GGML_RESTRICT x, block_iq2_s * GGML_RESTRICT y, int64_t k) {
assert(k % QK_K == 0);
quantize_iq2_s(x, y, 1, k, NULL);
}

View file

@ -41,6 +41,7 @@
#include "ggml-sycl/gemm.hpp"
#include "ggml-sycl/sycl_hw.hpp"
#include "ggml-sycl/getrows.hpp"
#include "ggml.h"
static bool g_sycl_loaded = false;
int g_ggml_sycl_debug = 0;
@ -3864,7 +3865,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_UNARY_OP_GELU_QUICK:
case GGML_UNARY_OP_TANH:
case GGML_UNARY_OP_EXP:
return ggml_is_contiguous(op->src[0]);
return ggml_is_contiguous(op->src[0]) && (op->src[0]->type == GGML_TYPE_F32);
default:
return false;
}
@ -3981,23 +3982,24 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_OP_VIEW:
case GGML_OP_PERMUTE:
case GGML_OP_TRANSPOSE:
return true;
case GGML_OP_ADD:
case GGML_OP_ADD1:
case GGML_OP_LOG:
case GGML_OP_SUB:
case GGML_OP_MUL:
case GGML_OP_DIV:
return true;
case GGML_OP_NORM:
case GGML_OP_RMS_NORM:
case GGML_OP_GROUP_NORM:
return ggml_is_contiguous(op->src[0]);
case GGML_OP_SCALE:
case GGML_OP_SQR:
case GGML_OP_SQRT:
case GGML_OP_SIN:
case GGML_OP_COS:
case GGML_OP_CLAMP:
case GGML_OP_LOG:
return (op->src[0]->type == GGML_TYPE_F32);
case GGML_OP_NORM:
case GGML_OP_RMS_NORM:
case GGML_OP_GROUP_NORM:
return ggml_is_contiguous(op->src[0]);
case GGML_OP_SCALE:
return true;
case GGML_OP_CONT:
return op->src[0]->type != GGML_TYPE_BF16;

View file

@ -566,9 +566,9 @@ FILE * ggml_fopen(const char * fname, const char * mode) {
#endif
}
static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float * restrict x, size_t bx, const float * restrict y, size_t by, int nrc);
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t * restrict x, size_t bx, ggml_bf16_t * restrict y, size_t by, int nrc);
static void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * GGML_RESTRICT x, size_t bx, const float * GGML_RESTRICT y, size_t by, int nrc);
static void ggml_vec_dot_f16(int n, float * GGML_RESTRICT s, size_t bs, ggml_fp16_t * GGML_RESTRICT x, size_t bx, ggml_fp16_t * GGML_RESTRICT y, size_t by, int nrc);
static void ggml_vec_dot_bf16(int n, float * GGML_RESTRICT s, size_t bs, ggml_bf16_t * GGML_RESTRICT x, size_t bx, ggml_bf16_t * GGML_RESTRICT y, size_t by, int nrc);
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
[GGML_TYPE_I8] = {

View file

@ -1207,18 +1207,30 @@ extern "C" {
const char * grammar_str,
const char * grammar_root);
/// @details Lazy grammar sampler, introduced in https://github.com/ggml-org/llama.cpp/pull/9639
/// @param trigger_words A list of words that will trigger the grammar sampler. This may be updated to a loose regex syntax (w/ ^) in a near future.
/// @param trigger_tokens A list of tokens that will trigger the grammar sampler.
LLAMA_API struct llama_sampler * llama_sampler_init_grammar_lazy(
DEPRECATED(LLAMA_API struct llama_sampler * llama_sampler_init_grammar_lazy(
const struct llama_vocab * vocab,
const char * grammar_str,
const char * grammar_root,
const char ** trigger_words,
size_t num_trigger_words,
const llama_token * trigger_tokens,
size_t num_trigger_tokens),
"use llama_sampler_init_grammar_lazy_patterns instead");
/// @details Lazy grammar sampler, introduced in https://github.com/ggml-org/llama.cpp/pull/9639
/// @param trigger_patterns A list of patterns that will trigger the grammar sampler. Pattern will be matched from the start of the generation output, and grammar sampler will be fed content starting from its first match group.
/// @param trigger_tokens A list of tokens that will trigger the grammar sampler. Grammar sampler will be fed content starting from the trigger token included.
LLAMA_API struct llama_sampler * llama_sampler_init_grammar_lazy_patterns(
const struct llama_vocab * vocab,
const char * grammar_str,
const char * grammar_root,
const char ** trigger_patterns,
size_t num_trigger_patterns,
const llama_token * trigger_tokens,
size_t num_trigger_tokens);
/// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
LLAMA_API struct llama_sampler * llama_sampler_init_penalties(
int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)

View file

@ -49,7 +49,7 @@ logit_bias_max = 512
dry_seq_break_max = 128
# global vars
KcppVersion = "1.85.1"
KcppVersion = "1.86"
showdebug = True
kcpp_instance = None #global running instance
global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False}

View file

@ -9,7 +9,7 @@ These templates can be updated with the following commands:
./scripts/get_chat_template.py deepseek-ai/DeepSeek-R1-Distill-Qwen-32B > models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
./scripts/get_chat_template.py fireworks-ai/llama-3-firefunction-v2 > models/templates/fireworks-ai-llama-3-firefunction-v2.jinja
./scripts/get_chat_template.py google/gemma-2-2b-it > models/templates/google-gemma-2-2b-it.jinja
./scripts/get_chat_template.py meetkai/functionary-medium-v3. > models/templates/meetkai-functionary-medium-v3.jinja
./scripts/get_chat_template.py meetkai/functionary-medium-v3.1 > models/templates/meetkai-functionary-medium-v3.1.jinja
./scripts/get_chat_template.py meetkai/functionary-medium-v3.2 > models/templates/meetkai-functionary-medium-v3.2.jinja
./scripts/get_chat_template.py meta-llama/Llama-3.1-8B-Instruct > models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja
./scripts/get_chat_template.py meta-llama/Llama-3.2-3B-Instruct > models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja

View file

@ -0,0 +1,12 @@
aiohttp~=3.9.3
pytest~=8.3.3
huggingface_hub~=0.23.2
matplotlib~=3.10.0
numpy~=1.26.4
openai~=1.55.3
pandas~=2.2.3
prometheus-client~=0.20.0
requests~=2.32.3
wget~=3.2
typer~=0.15.1
seaborn~=0.13.2

5
scripts/apple/validate-apps.sh Executable file
View file

@ -0,0 +1,5 @@
#!/bin/bash
./scripts/apple/validate-ios.sh
./scripts/apple/validate-macos.sh
./scripts/apple/validate-visionos.sh
./scripts/apple/validate-tvos.sh

820
scripts/apple/validate-ios.sh Executable file
View file

@ -0,0 +1,820 @@
#!/bin/bash
# validate-ios.sh - Validate iOS Application with embedded llama.xcframework using SwiftUI
# Authentication options (optional) (can be set via environment variables)
# To use: export APPLE_ID=your.email@example.com
# export APPLE_PASSWORD=your-app-specific-password
# ./validate-ios.sh
APPLE_ID=${APPLE_ID:-""}
APPLE_PASSWORD=${APPLE_PASSWORD:-""}
# Ensure the script exits on error
set -e
# Function to print usage instructions
print_usage() {
echo "Usage: ./validate-ios.sh [OPTIONS]"
echo ""
echo "Options:"
echo " --help Show this help message"
echo " --apple-id EMAIL Apple ID email for validation"
echo " --apple-password PWD App-specific password for Apple ID"
echo ""
echo "Environment variables:"
echo " APPLE_ID Apple ID email for validation"
echo " APPLE_PASSWORD App-specific password for Apple ID"
echo ""
echo "Notes:"
echo " - Command line options take precedence over environment variables"
echo " - Authentication is optional. If not provided, alternative validation will be performed"
echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage"
}
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--help)
print_usage
exit 0
;;
--apple-id)
APPLE_ID="$2"
shift 2
;;
--apple-password)
APPLE_PASSWORD="$2"
shift 2
;;
*)
echo "Unknown option: $1"
print_usage
exit 1
;;
esac
done
# Function to clean up in case of error
cleanup() {
# Don't clean up temp files on error to help with debugging
echo "===== iOS Validation Process Failed ====="
exit 1
}
# Set up trap to call cleanup function on error
trap cleanup ERR
set -e # Exit on any error
ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
BUILD_DIR="${ROOT_DIR}/validation-builds/ios"
# Configuration
APP_NAME="iOSLlamaTest"
BUNDLE_ID="org.ggml.iOSLlamaTest"
XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework"
TEMP_DIR="${BUILD_DIR}/temp"
ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive"
IPA_PATH="${BUILD_DIR}/${APP_NAME}.ipa"
VALIDATION_DIR="${BUILD_DIR}/validation"
# Create necessary directories
mkdir -p "${BUILD_DIR}"
mkdir -p "${TEMP_DIR}"
mkdir -p "${VALIDATION_DIR}"
echo "===== iOS Validation Process Started ====="
# 1. Create a simple test app project
echo "Creating test iOS app project..."
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>${APP_NAME}</string>
<key>CFBundleIdentifier</key>
<string>${BUNDLE_ID}</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>${APP_NAME}</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>LSRequiresIPhoneOS</key>
<true/>
<key>UILaunchScreen</key>
<dict/>
<key>UIRequiredDeviceCapabilities</key>
<array>
<string>armv7</string>
</array>
<key>UISupportedInterfaceOrientations</key>
<array>
<string>UIInterfaceOrientationPortrait</string>
</array>
</dict>
</plist>
EOF
# Create SwiftUI app files
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources"
# Create App.swift
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF
import SwiftUI
import llama
@main
struct LlamaTestApp: App {
var body: some Scene {
WindowGroup {
ContentView()
}
}
}
EOF
# Create ContentView.swift
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF
import SwiftUI
import llama
struct ContentView: View {
// Test that we can initialize a llama context params struct
let params = llama_context_default_params()
var body: some View {
VStack(spacing: 20) {
Text("Llama Framework Test")
.font(.largeTitle)
.padding()
Text("llama_context_default_params() created successfully")
.font(.headline)
.multilineTextAlignment(.center)
.padding()
// Display some param values to confirm the framework is working
Text("n_ctx: \(params.n_ctx)")
.font(.body)
Text("n_batch: \(params.n_batch)")
.font(.body)
Spacer()
}
.padding()
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
EOF
# Create project.pbxproj, fixing the framework search paths issues
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 54;
objects = {
/* Begin PBXBuildFile section */
11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; };
33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; };
55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
88888888888888888888888 /* Embed Frameworks */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = "";
dstSubfolderSpec = 10;
files = (
77777777777777777777777 /* llama.xcframework in Embed Frameworks */,
);
name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; };
22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; };
44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; };
/* End PBXFileReference section */
EOF
# Add the rest of the project file with fixed framework search paths
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXFrameworksBuildPhase section */
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
55555555555555555555555 /* llama.xcframework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = {
isa = PBXGroup;
children = (
99999999999999999999999 /* ${APP_NAME}.app */,
);
name = Products;
sourceTree = "<group>";
};
EOF
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = {
isa = PBXGroup;
children = (
66666666666666666666666 /* llama.xcframework */,
);
name = Frameworks;
sourceTree = "<group>";
};
EEEEEEEEEEEEEEEEEEEEEEEE = {
isa = PBXGroup;
children = (
FFFFFFFFFFFFFFFFFFFFFFFF /* iOSLlamaTest */,
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */,
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */,
);
sourceTree = "<group>";
};
FFFFFFFFFFFFFFFFFFFFFFFF /* iOSLlamaTest */ = {
isa = PBXGroup;
children = (
1111111111111111111111AA /* Sources */,
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */,
);
path = "iOSLlamaTest";
sourceTree = "<group>";
};
1111111111111111111111AA /* Sources */ = {
isa = PBXGroup;
children = (
22222222222222222222222 /* App.swift */,
44444444444444444444444 /* ContentView.swift */,
);
path = Sources;
sourceTree = "<group>";
};
/* End PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin PBXNativeTarget section */
3333333333333333333333AA /* ${APP_NAME} */ = {
isa = PBXNativeTarget;
buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */;
buildPhases = (
5555555555555555555555AA /* Sources */,
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */,
6666666666666666666666AA /* Resources */,
88888888888888888888888 /* Embed Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = "${APP_NAME}";
productName = "${APP_NAME}";
productReference = 99999999999999999999999 /* ${APP_NAME}.app */;
productType = "com.apple.product-type.application";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
7777777777777777777777AA /* Project object */ = {
isa = PBXProject;
attributes = {
LastSwiftUpdateCheck = 1240;
LastUpgradeCheck = 1240;
TargetAttributes = {
3333333333333333333333AA = {
CreatedOnToolsVersion = 12.4;
};
};
};
buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */;
compatibilityVersion = "Xcode 12.0";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
Base,
);
mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE;
productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
3333333333333333333333AA /* ${APP_NAME} */,
);
};
/* End PBXProject section */
EOF
# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXResourcesBuildPhase section */
6666666666666666666666AA /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXResourcesBuildPhase section */
/* Begin PBXSourcesBuildPhase section */
5555555555555555555555AA /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
33333333333333333333333 /* ContentView.swift in Sources */,
11111111111111111111111 /* App.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
9999999999999999999999AA /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 16.4;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = iphoneos;
SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
};
name = Debug;
};
AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 16.4;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
SDKROOT = iphoneos;
SWIFT_COMPILATION_MODE = wholemodule;
SWIFT_OPTIMIZATION_LEVEL = "-O";
VALIDATE_PRODUCT = YES;
};
name = Release;
};
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)";
INFOPLIST_FILE = "iOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.iOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Debug;
};
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = (
"$(inherited)",
"$(PROJECT_DIR)",
);
INFOPLIST_FILE = "iOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.iOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Release;
};
/* End XCBuildConfiguration section */
EOF
# Finish the project.pbxproj file
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin XCConfigurationList section */
8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
9999999999999999999999AA /* Debug */,
AAAAAAAAAAAAAAAAAAAAABBB /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */,
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 7777777777777777777777AA /* Project object */;
}
EOF
# 2. Copy XCFramework to test project
echo "Copying XCFramework to test project..."
cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/"
# 3. Build and archive the app
echo "Building and archiving test app..."
cd "${TEMP_DIR}/${APP_NAME}"
# Create a simple xcscheme file to avoid xcodebuild scheme issues
mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes"
cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1240"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
EOF
# Now use xcodebuild with an explicitly defined product name
xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk iphoneos -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet
# 4. Create IPA from archive
echo "Creating IPA from archive..."
mkdir -p "${TEMP_DIR}/Payload"
cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${TEMP_DIR}/Payload/"
# Check and log app structure before zipping
echo "App structure:"
ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/"
echo "Frameworks:"
ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
cd "${TEMP_DIR}"
zip -r "${IPA_PATH}" Payload
# Check embedded provisioning profile
echo "Checking provisioning profile (if any)..."
PROVISIONING_PROFILE=$(find "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" -name "embedded.mobileprovision" 2>/dev/null)
if [ -n "$PROVISIONING_PROFILE" ]; then
echo "Found embedded provisioning profile:"
security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile"
else
echo "No embedded provisioning profile found (expected for ad-hoc builds)"
fi
# 5. Validate the IPA
echo "Validating IPA..."
VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt"
# Check if authentication credentials are provided
AUTH_ARGS=""
if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then
echo "Using Apple ID authentication for validation..."
AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\""
else
echo "No authentication credentials provided. Will perform basic validation."
echo "To use your personal developer account, you can run the script with:"
echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-ios.sh"
echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage"
fi
# Run validation with detailed output
echo "Running validation with altool..."
if [ -n "$AUTH_ARGS" ]; then
# Use eval to properly handle the quoted arguments
eval "xcrun altool --validate-app -f \"${IPA_PATH}\" --type ios --output-format xml $AUTH_ARGS" 2>&1 | tee "${VALIDATION_OUTPUT}"
else
xcrun altool --validate-app -f "${IPA_PATH}" --type ios --output-format xml 2>&1 | tee "${VALIDATION_OUTPUT}"
fi
VALIDATION_RESULT=$?
# Final validation result
FINAL_VALIDATION_RESULT=0
# Check if validation failed because the app isn't in App Store Connect
if grep -q "No suitable application records were found" "${VALIDATION_OUTPUT}"; then
echo "⚠️ App Store Connect Warning: The app bundle identifier is not found in App Store Connect"
echo "This is expected for apps that haven't been registered in App Store Connect yet."
echo "This doesn't indicate a problem with the build or framework."
# Perform alternative validation
echo "Performing alternative validation checks..."
# Check if IPA was created successfully
if [ -f "${IPA_PATH}" ] && [ -s "${IPA_PATH}" ]; then
echo "✅ IPA file created successfully"
else
echo "❌ IPA file not created or empty"
FINAL_VALIDATION_RESULT=1
fi
# Check if app binary exists and is executable
if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ] && [ -x "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ]; then
echo "✅ App binary exists and is executable"
else
echo "❌ App binary missing or not executable"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework was properly embedded
if [ -d "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework" ]; then
echo "✅ llama.framework properly embedded"
else
echo "❌ llama.framework not properly embedded"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework binary exists
if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" ]; then
echo "✅ Framework binary exists"
# Further validate framework by checking architecture
ARCHS=$(lipo -info "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" 2>/dev/null | grep -o "arm64\\|armv7\\|x86_64" | tr '\n' ' ')
if [ -n "$ARCHS" ]; then
echo "✅ Framework architecture(s): $ARCHS"
else
echo "⚠️ Could not determine framework architecture"
fi
else
echo "❌ Framework binary missing"
FINAL_VALIDATION_RESULT=1
fi
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
echo "✅ Alternative validation PASSED: App built successfully with embedded framework"
else
echo "❌ Alternative validation FAILED: Issues found with the app or framework"
fi
elif grep -q "You must specify authentication credentials" "${VALIDATION_OUTPUT}" && [ -z "$AUTH_ARGS" ]; then
echo "✅ iOS Validation PASSED: IPA successfully validated"
echo "Results saved to ${VALIDATION_OUTPUT}"
else
echo "❌ iOS Validation FAILED: IPA validation found issues"
echo "See validation output at ${VALIDATION_OUTPUT}"
echo ""
echo "==== VALIDATION ERRORS ===="
# Try to extract specific errors from the output
if grep -q "Error" "${VALIDATION_OUTPUT}"; then
grep -A 5 "Error" "${VALIDATION_OUTPUT}"
else
# If no specific error found, show the whole log
cat "${VALIDATION_OUTPUT}"
fi
# Additional debugging: check IPA contents
echo ""
echo "==== IPA CONTENTS ===="
mkdir -p "${TEMP_DIR}/ipa_contents"
unzip -q "${IPA_PATH}" -d "${TEMP_DIR}/ipa_contents"
ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/"
# Check for code signing issues
echo ""
echo "==== CODE SIGNING INFO ===="
codesign -vv -d "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app" 2>&1 || echo "Code signing verification failed"
# Check embedded frameworks
echo ""
echo "==== FRAMEWORK INFO ===="
ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
fi
# Don't clean up on error to allow inspection
if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then
echo ""
echo "Temporary files kept for inspection at: ${TEMP_DIR}"
echo "===== iOS Validation Process Failed ====="
exit 1
fi
# Clean up temporary files but keep build artifacts
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
echo "Cleaning up temporary files..."
#rm -rf "${TEMP_DIR}"
fi
echo "===== iOS Validation Process Completed ====="
exit $FINAL_VALIDATION_RESULT

781
scripts/apple/validate-macos.sh Executable file
View file

@ -0,0 +1,781 @@
#!/bin/bash
# validate-macos.sh - Validate macOS Application with embedded llama.xcframework using SwiftUI
# Authentication options (optional) (can be set via environment variables)
# To use: export APPLE_ID=your.email@example.com
# export APPLE_PASSWORD=your-app-specific-password
# ./validate-macos.sh
APPLE_ID=${APPLE_ID:-""}
APPLE_PASSWORD=${APPLE_PASSWORD:-""}
# Ensure the script exits on error
set -e
# Function to print usage instructions
print_usage() {
echo "Usage: ./validate-macos.sh [OPTIONS]"
echo ""
echo "Options:"
echo " --help Show this help message"
echo " --apple-id EMAIL Apple ID email for validation"
echo " --apple-password PWD App-specific password for Apple ID"
echo ""
echo "Environment variables:"
echo " APPLE_ID Apple ID email for validation"
echo " APPLE_PASSWORD App-specific password for Apple ID"
echo ""
echo "Notes:"
echo " - Command line options take precedence over environment variables"
echo " - Authentication is optional. If not provided, alternative validation will be performed"
echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage"
}
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--help)
print_usage
exit 0
;;
--apple-id)
APPLE_ID="$2"
shift 2
;;
--apple-password)
APPLE_PASSWORD="$2"
shift 2
;;
*)
echo "Unknown option: $1"
print_usage
exit 1
;;
esac
done
# Function to clean up in case of error
cleanup() {
# Don't clean up temp files on error to help with debugging
echo "===== macOS Validation Process Failed ====="
exit 1
}
# Set up trap to call cleanup function on error
trap cleanup ERR
set -e # Exit on any error
ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
BUILD_DIR="${ROOT_DIR}/validation-builds/ios"
# Configuration
APP_NAME="MacOSLlamaTest"
BUNDLE_ID="org.ggml.MacOSLlamaTest"
XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework"
TEMP_DIR="${BUILD_DIR}/temp"
ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive"
APP_PATH="${BUILD_DIR}/${APP_NAME}.app"
ZIP_PATH="${BUILD_DIR}/${APP_NAME}.zip"
VALIDATION_DIR="${BUILD_DIR}/validation"
# Create necessary directories
mkdir -p "${BUILD_DIR}"
mkdir -p "${TEMP_DIR}"
mkdir -p "${VALIDATION_DIR}"
echo "===== macOS Validation Process Started ====="
# 1. Create a simple test app project
echo "Creating test macOS app project..."
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>${APP_NAME}</string>
<key>CFBundleIdentifier</key>
<string>${BUNDLE_ID}</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>${APP_NAME}</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>LSMinimumSystemVersion</key>
<string>12.0</string>
<key>NSHumanReadableCopyright</key>
<string>Copyright © 2025 GGML. All rights reserved.</string>
<key>NSPrincipalClass</key>
<string>NSApplication</string>
</dict>
</plist>
EOF
# Create SwiftUI app files
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources"
# Create App.swift
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF
import SwiftUI
import llama
@main
struct LlamaTestApp: App {
var body: some Scene {
WindowGroup {
ContentView()
}
}
}
EOF
# Create ContentView.swift with macOS specific elements
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF
import SwiftUI
import llama
struct ContentView: View {
// Test that we can initialize a llama context params struct
let params = llama_context_default_params()
var body: some View {
VStack(spacing: 20) {
Text("Llama Framework Test on macOS")
.font(.largeTitle)
.padding()
Text("llama_context_default_params() created successfully")
.font(.headline)
.multilineTextAlignment(.center)
.padding()
// Display some param values to confirm the framework is working
Text("n_ctx: \(params.n_ctx)")
.font(.body)
Text("n_batch: \(params.n_batch)")
.font(.body)
Spacer()
}
.padding()
.frame(width: 600, height: 400)
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
EOF
# Create project.pbxproj, fixing the framework search paths issues
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 54;
objects = {
/* Begin PBXBuildFile section */
11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; };
33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; };
55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
88888888888888888888888 /* Embed Frameworks */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = "";
dstSubfolderSpec = 10;
files = (
77777777777777777777777 /* llama.xcframework in Embed Frameworks */,
);
name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; };
22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; };
44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; };
/* End PBXFileReference section */
EOF
# Add the rest of the project file with fixed framework search paths
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXFrameworksBuildPhase section */
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
55555555555555555555555 /* llama.xcframework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = {
isa = PBXGroup;
children = (
99999999999999999999999 /* ${APP_NAME}.app */,
);
name = Products;
sourceTree = "<group>";
};
EOF
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = {
isa = PBXGroup;
children = (
66666666666666666666666 /* llama.xcframework */,
);
name = Frameworks;
sourceTree = "<group>";
};
EEEEEEEEEEEEEEEEEEEEEEEE = {
isa = PBXGroup;
children = (
FFFFFFFFFFFFFFFFFFFFFFFF /* MacOSLlamaTest */,
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */,
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */,
);
sourceTree = "<group>";
};
FFFFFFFFFFFFFFFFFFFFFFFF /* MacOSLlamaTest */ = {
isa = PBXGroup;
children = (
1111111111111111111111AA /* Sources */,
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */,
);
path = "MacOSLlamaTest";
sourceTree = "<group>";
};
1111111111111111111111AA /* Sources */ = {
isa = PBXGroup;
children = (
22222222222222222222222 /* App.swift */,
44444444444444444444444 /* ContentView.swift */,
);
path = Sources;
sourceTree = "<group>";
};
/* End PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin PBXNativeTarget section */
3333333333333333333333AA /* ${APP_NAME} */ = {
isa = PBXNativeTarget;
buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */;
buildPhases = (
5555555555555555555555AA /* Sources */,
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */,
6666666666666666666666AA /* Resources */,
88888888888888888888888 /* Embed Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = "${APP_NAME}";
productName = "${APP_NAME}";
productReference = 99999999999999999999999 /* ${APP_NAME}.app */;
productType = "com.apple.product-type.application";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
7777777777777777777777AA /* Project object */ = {
isa = PBXProject;
attributes = {
LastSwiftUpdateCheck = 1240;
LastUpgradeCheck = 1240;
TargetAttributes = {
3333333333333333333333AA = {
CreatedOnToolsVersion = 12.4;
};
};
};
buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */;
compatibilityVersion = "Xcode 12.0";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
Base,
);
mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE;
productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
3333333333333333333333AA /* ${APP_NAME} */,
);
};
/* End PBXProject section */
EOF
# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS and macOS settings
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXResourcesBuildPhase section */
6666666666666666666666AA /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXResourcesBuildPhase section */
/* Begin PBXSourcesBuildPhase section */
5555555555555555555555AA /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
33333333333333333333333 /* ContentView.swift in Sources */,
11111111111111111111111 /* App.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
9999999999999999999999AA /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
MACOSX_DEPLOYMENT_TARGET = 12.0;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = macosx;
SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
};
name = Debug;
};
AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
MACOSX_DEPLOYMENT_TARGET = 12.0;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
SDKROOT = macosx;
SWIFT_COMPILATION_MODE = wholemodule;
SWIFT_OPTIMIZATION_LEVEL = "-O";
};
name = Release;
};
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
COMBINE_HIDPI_IMAGES = YES;
DEVELOPMENT_TEAM = "";
ENABLE_HARDENED_RUNTIME = YES;
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)";
INFOPLIST_FILE = "MacOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/../Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.MacOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SWIFT_VERSION = 5.0;
};
name = Debug;
};
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
COMBINE_HIDPI_IMAGES = YES;
DEVELOPMENT_TEAM = "";
ENABLE_HARDENED_RUNTIME = YES;
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = (
"$(inherited)",
"$(PROJECT_DIR)",
);
INFOPLIST_FILE = "MacOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/../Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.MacOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SWIFT_VERSION = 5.0;
};
name = Release;
};
/* End XCBuildConfiguration section */
EOF
# Finish the project.pbxproj file
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin XCConfigurationList section */
8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
9999999999999999999999AA /* Debug */,
AAAAAAAAAAAAAAAAAAAAABBB /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */,
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 7777777777777777777777AA /* Project object */;
}
EOF
# 2. Copy XCFramework to test project
echo "Copying XCFramework to test project..."
cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/"
# 3. Build and archive the app
echo "Building and archiving test app..."
cd "${TEMP_DIR}/${APP_NAME}"
# Create a simple xcscheme file to avoid xcodebuild scheme issues
mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes"
cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1240"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
EOF
# Now use xcodebuild with an explicitly defined product name for macOS
xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk macosx -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet
# 4. Create a package for distribution
echo "Creating distributable package from archive..."
cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${APP_PATH}"
# Check and log app structure
echo "App structure:"
ls -la "${APP_PATH}"
echo "Frameworks:"
ls -la "${APP_PATH}/Contents/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
# Create a zip file for potential distribution
cd "${BUILD_DIR}"
zip -r "${ZIP_PATH}" "${APP_NAME}.app"
# Check embedded provisioning profile
echo "Checking provisioning profile (if any)..."
PROVISIONING_PROFILE=$(find "${APP_PATH}/Contents" -name "embedded.provisionprofile" 2>/dev/null)
if [ -n "$PROVISIONING_PROFILE" ]; then
echo "Found embedded provisioning profile:"
security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile"
else
echo "No embedded provisioning profile found (expected for ad-hoc builds)"
fi
# 5. Validate the app
echo "Validating macOS app..."
VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt"
# Check if authentication credentials are provided
AUTH_ARGS=""
if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then
echo "Using Apple ID authentication for validation..."
AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\""
else
echo "No authentication credentials provided. Will perform basic validation."
echo "To use your personal developer account, you can run the script with:"
echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-macos.sh"
echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage"
fi
# For macOS we need to use notarytool or alternative checks because altool doesn't support macOS apps in the same way
echo "Note: For macOS, formal notarization process would require Apple Developer credentials."
echo "Performing alternative validation checks..."
# Final validation result
FINAL_VALIDATION_RESULT=0
# Check if app was created successfully
if [ -d "${APP_PATH}" ] && [ -s "${APP_PATH}/Contents/MacOS/${APP_NAME}" ]; then
echo "✅ App package created successfully"
else
echo "❌ App package not created or binary missing"
FINAL_VALIDATION_RESULT=1
fi
# Check if app binary exists and is executable
if [ -f "${APP_PATH}/Contents/MacOS/${APP_NAME}" ] && [ -x "${APP_PATH}/Contents/MacOS/${APP_NAME}" ]; then
echo "✅ App binary exists and is executable"
else
echo "❌ App binary missing or not executable"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework was properly embedded
if [ -d "${APP_PATH}/Contents/Frameworks/llama.framework" ]; then
echo "✅ llama.framework properly embedded"
else
echo "❌ llama.framework not properly embedded"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework binary exists
if [ -f "${APP_PATH}/Contents/Frameworks/llama.framework/Versions/A/llama" ]; then
echo "✅ Framework binary exists"
# Further validate framework by checking architecture
ARCHS=$(lipo -info "${APP_PATH}/Contents/Frameworks/llama.framework/Versions/A/llama" 2>/dev/null | grep -o "arm64\\|x86_64" | tr '\n' ' ')
if [ -n "$ARCHS" ]; then
echo "✅ Framework architecture(s): $ARCHS"
else
echo "⚠️ Could not determine framework architecture"
fi
else
echo "❌ Framework binary missing"
FINAL_VALIDATION_RESULT=1
fi
# Check code signing
echo ""
echo "==== CODE SIGNING INFO ===="
codesign -vv -d "${APP_PATH}" 2>&1 || echo "Code signing verification not available (expected for ad-hoc builds)"
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
if [ -n "$AUTH_ARGS" ]; then
echo ""
echo "To notarize this app with Apple (requires Apple Developer account):"
echo "xcrun notarytool submit \"${ZIP_PATH}\" --apple-id \"your-apple-id\" --password \"your-app-specific-password\" --team-id \"your-team-id\" --wait"
echo ""
fi
echo "✅ Validation PASSED: macOS app built successfully with embedded framework"
else
echo "❌ Validation FAILED: Issues found with the app or framework"
fi
# Don't clean up on error to allow inspection
if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then
echo ""
echo "Temporary files kept for inspection at: ${TEMP_DIR}"
echo "===== macOS Validation Process Failed ====="
exit 1
fi
# Clean up temporary files but keep build artifacts
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
echo "Cleaning up temporary files..."
#rm -rf "${TEMP_DIR}"
fi
echo "===== macOS Validation Process Completed ====="
echo "App package available at: ${APP_PATH}"
echo "Zipped app available at: ${ZIP_PATH}"
exit $FINAL_VALIDATION_RESULT

813
scripts/apple/validate-tvos.sh Executable file
View file

@ -0,0 +1,813 @@
#!/bin/bash
# validate-tvos.sh - Validate tvOS Application with embedded llama.xcframework using SwiftUI
# Authentication options (optional) (can be set via environment variables)
# To use: export APPLE_ID=your.email@example.com
# export APPLE_PASSWORD=your-app-specific-password
# ./validate-tvos.sh
APPLE_ID=${APPLE_ID:-""}
APPLE_PASSWORD=${APPLE_PASSWORD:-""}
# Ensure the script exits on error
set -e
# Function to print usage instructions
print_usage() {
echo "Usage: ./validate-tvos.sh [OPTIONS]"
echo ""
echo "Options:"
echo " --help Show this help message"
echo " --apple-id EMAIL Apple ID email for validation"
echo " --apple-password PWD App-specific password for Apple ID"
echo ""
echo "Environment variables:"
echo " APPLE_ID Apple ID email for validation"
echo " APPLE_PASSWORD App-specific password for Apple ID"
echo ""
echo "Notes:"
echo " - Command line options take precedence over environment variables"
echo " - Authentication is optional. If not provided, alternative validation will be performed"
echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage"
}
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--help)
print_usage
exit 0
;;
--apple-id)
APPLE_ID="$2"
shift 2
;;
--apple-password)
APPLE_PASSWORD="$2"
shift 2
;;
*)
echo "Unknown option: $1"
print_usage
exit 1
;;
esac
done
# Function to clean up in case of error
cleanup() {
# Don't clean up temp files on error to help with debugging
echo "===== tvOS Validation Process Failed ====="
exit 1
}
# Set up trap to call cleanup function on error
trap cleanup ERR
set -e # Exit on any error
ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
BUILD_DIR="${ROOT_DIR}/validation-builds/ios"
# Configuration
APP_NAME="TVOSLlamaTest"
BUNDLE_ID="org.ggml.TVOSLlamaTest"
XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework"
TEMP_DIR="${BUILD_DIR}/temp"
ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive"
IPA_PATH="${BUILD_DIR}/${APP_NAME}.ipa"
VALIDATION_DIR="${BUILD_DIR}/validation"
# Create necessary directories
mkdir -p "${BUILD_DIR}"
mkdir -p "${TEMP_DIR}"
mkdir -p "${VALIDATION_DIR}"
echo "===== tvOS Validation Process Started ====="
# 1. Create a simple test app project
echo "Creating test tvOS app project..."
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>${APP_NAME}</string>
<key>CFBundleIdentifier</key>
<string>${BUNDLE_ID}</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>${APP_NAME}</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>UIRequiredDeviceCapabilities</key>
<array>
<string>arm64</string>
</array>
</dict>
</plist>
EOF
# Create SwiftUI app files
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources"
# Create App.swift
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF
import SwiftUI
import llama
@main
struct LlamaTestApp: App {
var body: some Scene {
WindowGroup {
ContentView()
}
}
}
EOF
# Create ContentView.swift with tvOS specific elements
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF
import SwiftUI
import llama
struct ContentView: View {
// Test that we can initialize a llama context params struct
let params = llama_context_default_params()
var body: some View {
VStack(spacing: 40) {
Text("Llama Framework Test on tvOS")
.font(.largeTitle)
.padding()
Text("llama_context_default_params() created successfully")
.font(.headline)
.multilineTextAlignment(.center)
.padding()
// Display some param values to confirm the framework is working
Text("n_ctx: \(params.n_ctx)")
.font(.title2)
Text("n_batch: \(params.n_batch)")
.font(.title2)
Spacer()
}
.padding(50)
// Larger size suitable for TV display
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
EOF
# Create project.pbxproj, fixing the framework search paths issues
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 54;
objects = {
/* Begin PBXBuildFile section */
11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; };
33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; };
55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
88888888888888888888888 /* Embed Frameworks */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = "";
dstSubfolderSpec = 10;
files = (
77777777777777777777777 /* llama.xcframework in Embed Frameworks */,
);
name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; };
22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; };
44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; };
/* End PBXFileReference section */
EOF
# Add the rest of the project file with fixed framework search paths
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXFrameworksBuildPhase section */
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
55555555555555555555555 /* llama.xcframework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = {
isa = PBXGroup;
children = (
99999999999999999999999 /* ${APP_NAME}.app */,
);
name = Products;
sourceTree = "<group>";
};
EOF
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = {
isa = PBXGroup;
children = (
66666666666666666666666 /* llama.xcframework */,
);
name = Frameworks;
sourceTree = "<group>";
};
EEEEEEEEEEEEEEEEEEEEEEEE = {
isa = PBXGroup;
children = (
FFFFFFFFFFFFFFFFFFFFFFFF /* TVOSLlamaTest */,
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */,
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */,
);
sourceTree = "<group>";
};
FFFFFFFFFFFFFFFFFFFFFFFF /* TVOSLlamaTest */ = {
isa = PBXGroup;
children = (
1111111111111111111111AA /* Sources */,
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */,
);
path = "TVOSLlamaTest";
sourceTree = "<group>";
};
1111111111111111111111AA /* Sources */ = {
isa = PBXGroup;
children = (
22222222222222222222222 /* App.swift */,
44444444444444444444444 /* ContentView.swift */,
);
path = Sources;
sourceTree = "<group>";
};
/* End PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin PBXNativeTarget section */
3333333333333333333333AA /* ${APP_NAME} */ = {
isa = PBXNativeTarget;
buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */;
buildPhases = (
5555555555555555555555AA /* Sources */,
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */,
6666666666666666666666AA /* Resources */,
88888888888888888888888 /* Embed Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = "${APP_NAME}";
productName = "${APP_NAME}";
productReference = 99999999999999999999999 /* ${APP_NAME}.app */;
productType = "com.apple.product-type.application";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
7777777777777777777777AA /* Project object */ = {
isa = PBXProject;
attributes = {
LastSwiftUpdateCheck = 1240;
LastUpgradeCheck = 1240;
TargetAttributes = {
3333333333333333333333AA = {
CreatedOnToolsVersion = 12.4;
};
};
};
buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */;
compatibilityVersion = "Xcode 12.0";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
Base,
);
mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE;
productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
3333333333333333333333AA /* ${APP_NAME} */,
);
};
/* End PBXProject section */
EOF
# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS and tvOS settings
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXResourcesBuildPhase section */
6666666666666666666666AA /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXResourcesBuildPhase section */
/* Begin PBXSourcesBuildPhase section */
5555555555555555555555AA /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
33333333333333333333333 /* ContentView.swift in Sources */,
11111111111111111111111 /* App.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
9999999999999999999999AA /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
TVOS_DEPLOYMENT_TARGET = 15.0;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = appletvos;
SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
};
name = Debug;
};
AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
TVOS_DEPLOYMENT_TARGET = 15.0;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
SDKROOT = appletvos;
SWIFT_COMPILATION_MODE = wholemodule;
SWIFT_OPTIMIZATION_LEVEL = "-O";
VALIDATE_PRODUCT = YES;
};
name = Release;
};
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)";
INFOPLIST_FILE = "TVOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.TVOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = 3;
};
name = Debug;
};
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = (
"$(inherited)",
"$(PROJECT_DIR)",
);
INFOPLIST_FILE = "TVOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.TVOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = 3;
};
name = Release;
};
/* End XCBuildConfiguration section */
EOF
# Finish the project.pbxproj file
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin XCConfigurationList section */
8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
9999999999999999999999AA /* Debug */,
AAAAAAAAAAAAAAAAAAAAABBB /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */,
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 7777777777777777777777AA /* Project object */;
}
EOF
# 2. Copy XCFramework to test project
echo "Copying XCFramework to test project..."
cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/"
# 3. Build and archive the app
echo "Building and archiving test app..."
cd "${TEMP_DIR}/${APP_NAME}"
# Create a simple xcscheme file to avoid xcodebuild scheme issues
mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes"
cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1240"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
EOF
# Now use xcodebuild with an explicitly defined product name for tvOS
xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk appletvos -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet
# 4. Create IPA from archive
echo "Creating IPA from archive..."
mkdir -p "${TEMP_DIR}/Payload"
cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${TEMP_DIR}/Payload/"
# Check and log app structure before zipping
echo "App structure:"
ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/"
echo "Frameworks:"
ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
cd "${TEMP_DIR}"
zip -r "${IPA_PATH}" Payload
# Check embedded provisioning profile
echo "Checking provisioning profile (if any)..."
PROVISIONING_PROFILE=$(find "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" -name "embedded.mobileprovision" 2>/dev/null)
if [ -n "$PROVISIONING_PROFILE" ]; then
echo "Found embedded provisioning profile:"
security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile"
else
echo "No embedded provisioning profile found (expected for ad-hoc builds)"
fi
# 5. Validate the IPA
echo "Validating IPA..."
VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt"
# Check if authentication credentials are provided
AUTH_ARGS=""
if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then
echo "Using Apple ID authentication for validation..."
AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\""
else
echo "No authentication credentials provided. Will perform basic validation."
echo "To use your personal developer account, you can run the script with:"
echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-tvos.sh"
echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage"
fi
# Run validation with detailed output
echo "Running validation with altool..."
if [ -n "$AUTH_ARGS" ]; then
# Use eval to properly handle the quoted arguments
eval "xcrun altool --validate-app -f \"${IPA_PATH}\" --type tvos --output-format xml $AUTH_ARGS" 2>&1 | tee "${VALIDATION_OUTPUT}"
else
xcrun altool --validate-app -f "${IPA_PATH}" --type tvos --output-format xml 2>&1 | tee "${VALIDATION_OUTPUT}"
fi
VALIDATION_RESULT=$?
# Final validation result
FINAL_VALIDATION_RESULT=0
# Check if validation failed because the app isn't in App Store Connect
if grep -q "No suitable application records were found" "${VALIDATION_OUTPUT}"; then
echo "⚠️ App Store Connect Warning: The app bundle identifier is not found in App Store Connect"
echo "This is expected for apps that haven't been registered in App Store Connect yet."
echo "This doesn't indicate a problem with the build or framework."
# Perform alternative validation
echo "Performing alternative validation checks..."
# Check if IPA was created successfully
if [ -f "${IPA_PATH}" ] && [ -s "${IPA_PATH}" ]; then
echo "✅ IPA file created successfully"
else
echo "❌ IPA file not created or empty"
FINAL_VALIDATION_RESULT=1
fi
# Check if app binary exists and is executable
if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ] && [ -x "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ]; then
echo "✅ App binary exists and is executable"
else
echo "❌ App binary missing or not executable"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework was properly embedded
if [ -d "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework" ]; then
echo "✅ llama.framework properly embedded"
else
echo "❌ llama.framework not properly embedded"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework binary exists
if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" ]; then
echo "✅ Framework binary exists"
# Further validate framework by checking architecture
ARCHS=$(lipo -info "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" 2>/dev/null | grep -o "arm64\\|x86_64" | tr '\n' ' ')
if [ -n "$ARCHS" ]; then
echo "✅ Framework architecture(s): $ARCHS"
else
echo "⚠️ Could not determine framework architecture"
fi
else
echo "❌ Framework binary missing"
FINAL_VALIDATION_RESULT=1
fi
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
echo "✅ Alternative validation PASSED: App built successfully with embedded framework"
else
echo "❌ Alternative validation FAILED: Issues found with the app or framework"
fi
elif grep -q "You must specify authentication credentials" "${VALIDATION_OUTPUT}" && [ -z "$AUTH_ARGS" ]; then
echo "✅ tvOS Validation PASSED: IPA successfully validated"
echo "Results saved to ${VALIDATION_OUTPUT}"
else
echo "❌ tvOS Validation FAILED: IPA validation found issues"
echo "See validation output at ${VALIDATION_OUTPUT}"
echo ""
echo "==== VALIDATION ERRORS ===="
# Try to extract specific errors from the output
if grep -q "Error" "${VALIDATION_OUTPUT}"; then
grep -A 5 "Error" "${VALIDATION_OUTPUT}"
else
# If no specific error found, show the whole log
cat "${VALIDATION_OUTPUT}"
fi
# Additional debugging: check IPA contents
echo ""
echo "==== IPA CONTENTS ===="
mkdir -p "${TEMP_DIR}/ipa_contents"
unzip -q "${IPA_PATH}" -d "${TEMP_DIR}/ipa_contents"
ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/"
# Check for code signing issues
echo ""
echo "==== CODE SIGNING INFO ===="
codesign -vv -d "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app" 2>&1 || echo "Code signing verification failed"
# Check embedded frameworks
echo ""
echo "==== FRAMEWORK INFO ===="
ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
fi
# Don't clean up on error to allow inspection
if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then
echo ""
echo "Temporary files kept for inspection at: ${TEMP_DIR}"
echo "===== tvOS Validation Process Failed ====="
exit 1
fi
# Clean up temporary files but keep build artifacts
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
echo "Cleaning up temporary files..."
#rm -rf "${TEMP_DIR}"
fi
echo "===== tvOS Validation Process Completed ====="
exit $FINAL_VALIDATION_RESULT

View file

@ -0,0 +1,811 @@
#!/bin/bash
# validate-visionos.sh - Validate visionOS Application with embedded llama.xcframework using SwiftUI
# Authentication options (optional) (can be set via environment variables)
# To use: export APPLE_ID=your.email@example.com
# export APPLE_PASSWORD=your-app-specific-password
# ./validate-visionos.sh
APPLE_ID=${APPLE_ID:-""}
APPLE_PASSWORD=${APPLE_PASSWORD:-""}
# Ensure the script exits on error
set -e
# Function to print usage instructions
print_usage() {
echo "Usage: ./validate-visionos.sh [OPTIONS]"
echo ""
echo "Options:"
echo " --help Show this help message"
echo " --apple-id EMAIL Apple ID email for validation"
echo " --apple-password PWD App-specific password for Apple ID"
echo ""
echo "Environment variables:"
echo " APPLE_ID Apple ID email for validation"
echo " APPLE_PASSWORD App-specific password for Apple ID"
echo ""
echo "Notes:"
echo " - Command line options take precedence over environment variables"
echo " - Authentication is optional. If not provided, alternative validation will be performed"
echo " - For APPLE_PASSWORD, use an app-specific password generated at https://appleid.apple.com/account/manage"
}
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--help)
print_usage
exit 0
;;
--apple-id)
APPLE_ID="$2"
shift 2
;;
--apple-password)
APPLE_PASSWORD="$2"
shift 2
;;
*)
echo "Unknown option: $1"
print_usage
exit 1
;;
esac
done
# Function to clean up in case of error
cleanup() {
# Don't clean up temp files on error to help with debugging
echo "===== visionOS Validation Process Failed ====="
exit 1
}
# Set up trap to call cleanup function on error
trap cleanup ERR
set -e # Exit on any error
ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../.." && pwd )"
BUILD_DIR="${ROOT_DIR}/validation-builds/visionos"
# Configuration
APP_NAME="VisionOSLlamaTest"
BUNDLE_ID="org.ggml.VisionOSLlamaTest"
XCFRAMEWORK_PATH="${ROOT_DIR}/build-apple/llama.xcframework"
TEMP_DIR="${BUILD_DIR}/temp"
ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive"
IPA_PATH="${BUILD_DIR}/${APP_NAME}.ipa"
VALIDATION_DIR="${BUILD_DIR}/validation"
# Create necessary directories
mkdir -p "${BUILD_DIR}"
mkdir -p "${TEMP_DIR}"
mkdir -p "${VALIDATION_DIR}"
echo "===== visionOS Validation Process Started ====="
# 1. Create a simple test app project
echo "Creating test visionOS app project..."
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Info.plist" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>${APP_NAME}</string>
<key>CFBundleIdentifier</key>
<string>${BUNDLE_ID}</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>${APP_NAME}</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
</dict>
</plist>
EOF
# Create SwiftUI app files
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources"
# Create App.swift
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/App.swift" << EOF
import SwiftUI
import llama
@main
struct LlamaTestApp: App {
var body: some Scene {
WindowGroup {
ContentView()
}
}
}
EOF
# Create ContentView.swift with visionOS specific elements
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}/Sources/ContentView.swift" << EOF
import SwiftUI
import llama
struct ContentView: View {
// Test that we can initialize a llama context params struct
let params = llama_context_default_params()
var body: some View {
VStack(spacing: 20) {
Text("Llama Framework Test on visionOS")
.font(.largeTitle)
.padding()
Text("llama_context_default_params() created successfully")
.font(.headline)
.multilineTextAlignment(.center)
.padding()
// Display some param values to confirm the framework is working
Text("n_ctx: \(params.n_ctx)")
.font(.body)
Text("n_batch: \(params.n_batch)")
.font(.body)
Spacer()
}
.padding()
.frame(width: 500, height: 400)
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
EOF
# Create project.pbxproj, fixing the framework search paths issues
mkdir -p "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj"
cat > "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 54;
objects = {
/* Begin PBXBuildFile section */
11111111111111111111111 /* App.swift in Sources */ = {isa = PBXBuildFile; fileRef = 22222222222222222222222; };
33333333333333333333333 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 44444444444444444444444; };
55555555555555555555555 /* llama.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
77777777777777777777777 /* llama.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 66666666666666666666666; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
88888888888888888888888 /* Embed Frameworks */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = "";
dstSubfolderSpec = 10;
files = (
77777777777777777777777 /* llama.xcframework in Embed Frameworks */,
);
name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
99999999999999999999999 /* ${APP_NAME}.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "${APP_NAME}.app"; sourceTree = BUILT_PRODUCTS_DIR; };
22222222222222222222222 /* App.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = App.swift; sourceTree = "<group>"; };
44444444444444444444444 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
66666666666666666666666 /* llama.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; path = llama.xcframework; sourceTree = "<group>"; };
/* End PBXFileReference section */
EOF
# Add the rest of the project file with fixed framework search paths
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXFrameworksBuildPhase section */
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
55555555555555555555555 /* llama.xcframework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */ = {
isa = PBXGroup;
children = (
99999999999999999999999 /* ${APP_NAME}.app */,
);
name = Products;
sourceTree = "<group>";
};
EOF
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */ = {
isa = PBXGroup;
children = (
66666666666666666666666 /* llama.xcframework */,
);
name = Frameworks;
sourceTree = "<group>";
};
EEEEEEEEEEEEEEEEEEEEEEEE = {
isa = PBXGroup;
children = (
FFFFFFFFFFFFFFFFFFFFFFFF /* VisionOSLlamaTest */,
CCCCCCCCCCCCCCCCCCCCCCCC /* Products */,
DDDDDDDDDDDDDDDDDDDDDDDD /* Frameworks */,
);
sourceTree = "<group>";
};
FFFFFFFFFFFFFFFFFFFFFFFF /* VisionOSLlamaTest */ = {
isa = PBXGroup;
children = (
1111111111111111111111AA /* Sources */,
AAAAAAAAAAAAAAAAAAAAAAA /* Info.plist */,
);
path = "VisionOSLlamaTest";
sourceTree = "<group>";
};
1111111111111111111111AA /* Sources */ = {
isa = PBXGroup;
children = (
22222222222222222222222 /* App.swift */,
44444444444444444444444 /* ContentView.swift */,
);
path = Sources;
sourceTree = "<group>";
};
/* End PBXGroup section */
EOF
# Continue with the project.pbxproj file, using the APP_NAME variable appropriately
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin PBXNativeTarget section */
3333333333333333333333AA /* ${APP_NAME} */ = {
isa = PBXNativeTarget;
buildConfigurationList = 4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */;
buildPhases = (
5555555555555555555555AA /* Sources */,
BBBBBBBBBBBBBBBBBBBBBBBB /* Frameworks */,
6666666666666666666666AA /* Resources */,
88888888888888888888888 /* Embed Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = "${APP_NAME}";
productName = "${APP_NAME}";
productReference = 99999999999999999999999 /* ${APP_NAME}.app */;
productType = "com.apple.product-type.application";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
7777777777777777777777AA /* Project object */ = {
isa = PBXProject;
attributes = {
LastSwiftUpdateCheck = 1510;
LastUpgradeCheck = 1510;
TargetAttributes = {
3333333333333333333333AA = {
CreatedOnToolsVersion = 15.1;
};
};
};
buildConfigurationList = 8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */;
compatibilityVersion = "Xcode 15.0";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
Base,
);
mainGroup = EEEEEEEEEEEEEEEEEEEEEEEE;
productRefGroup = CCCCCCCCCCCCCCCCCCCCCCCC /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
3333333333333333333333AA /* ${APP_NAME} */,
);
};
/* End PBXProject section */
EOF
# Add the rest of the file with correct FRAMEWORK_SEARCH_PATHS
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << 'EOF'
/* Begin PBXResourcesBuildPhase section */
6666666666666666666666AA /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXResourcesBuildPhase section */
/* Begin PBXSourcesBuildPhase section */
5555555555555555555555AA /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
33333333333333333333333 /* ContentView.swift in Sources */,
11111111111111111111111 /* App.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
9999999999999999999999AA /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = xros;
SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
XROS_DEPLOYMENT_TARGET = 1.0;
};
name = Debug;
};
AAAAAAAAAAAAAAAAAAAAABBB /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
SDKROOT = xros;
SWIFT_COMPILATION_MODE = wholemodule;
SWIFT_OPTIMIZATION_LEVEL = "-O";
VALIDATE_PRODUCT = YES;
XROS_DEPLOYMENT_TARGET = 1.0;
};
name = Release;
};
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = "$(PROJECT_DIR)";
INFOPLIST_FILE = "VisionOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.VisionOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SUPPORTED_PLATFORMS = "xros xrsimulator";
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = "1,2,7";
};
name = Debug;
};
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
FRAMEWORK_SEARCH_PATHS = (
"$(inherited)",
"$(PROJECT_DIR)",
);
INFOPLIST_FILE = "VisionOSLlamaTest/Info.plist";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = "org.ggml.VisionOSLlamaTest";
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = "";
SUPPORTED_PLATFORMS = "xros xrsimulator";
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = "1,2,7";
};
name = Release;
};
/* End XCBuildConfiguration section */
EOF
# Finish the project.pbxproj file
cat >> "${TEMP_DIR}/${APP_NAME}/${APP_NAME}.xcodeproj/project.pbxproj" << EOF
/* Begin XCConfigurationList section */
8888888888888888888888AA /* Build configuration list for PBXProject "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
9999999999999999999999AA /* Debug */,
AAAAAAAAAAAAAAAAAAAAABBB /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
4444444444444444444444AA /* Build configuration list for PBXNativeTarget "${APP_NAME}" */ = {
isa = XCConfigurationList;
buildConfigurations = (
BBBBBBBBBBBBBBBBBBBBBBCCC /* Debug */,
CCCCCCCCCCCCCCCCCCCCCCDDD /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 7777777777777777777777AA /* Project object */;
}
EOF
# 2. Copy XCFramework to test project
echo "Copying XCFramework to test project..."
cp -R "${XCFRAMEWORK_PATH}" "${TEMP_DIR}/${APP_NAME}/"
# 3. Build and archive the app
echo "Building and archiving test app..."
cd "${TEMP_DIR}/${APP_NAME}"
# Create a simple xcscheme file to avoid xcodebuild scheme issues
mkdir -p "${APP_NAME}.xcodeproj/xcshareddata/xcschemes"
cat > "${APP_NAME}.xcodeproj/xcshareddata/xcschemes/${APP_NAME}.xcscheme" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1510"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "3333333333333333333333AA"
BuildableName = "${APP_NAME}.app"
BlueprintName = "${APP_NAME}"
ReferencedContainer = "container:${APP_NAME}.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
EOF
# Now use xcodebuild with an explicitly defined product name for visionOS
xcodebuild -project "${APP_NAME}.xcodeproj" -scheme "${APP_NAME}" -sdk xros -configuration Release archive -archivePath "${ARCHIVE_PATH}" CODE_SIGN_IDENTITY="-" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO PRODUCT_NAME="${APP_NAME}" SWIFT_OPTIMIZATION_LEVEL="-Onone" -quiet
# 4. Create IPA from archive
echo "Creating IPA from archive..."
mkdir -p "${TEMP_DIR}/Payload"
cp -R "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" "${TEMP_DIR}/Payload/"
# Check and log app structure before zipping
echo "App structure:"
ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/"
echo "Frameworks:"
ls -la "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
cd "${TEMP_DIR}"
zip -r "${IPA_PATH}" Payload
# Check embedded provisioning profile
echo "Checking provisioning profile (if any)..."
PROVISIONING_PROFILE=$(find "${ARCHIVE_PATH}/Products/Applications/${APP_NAME}.app" -name "embedded.mobileprovision" 2>/dev/null)
if [ -n "$PROVISIONING_PROFILE" ]; then
echo "Found embedded provisioning profile:"
security cms -D -i "$PROVISIONING_PROFILE" || echo "Unable to decode provisioning profile"
else
echo "No embedded provisioning profile found (expected for ad-hoc builds)"
fi
# 5. Validate the IPA
echo "Validating IPA..."
VALIDATION_OUTPUT="${VALIDATION_DIR}/validation_output.txt"
# Check if authentication credentials are provided
AUTH_ARGS=""
if [ -n "$APPLE_ID" ] && [ -n "$APPLE_PASSWORD" ]; then
echo "Using Apple ID authentication for validation..."
AUTH_ARGS="--username \"$APPLE_ID\" --password \"$APPLE_PASSWORD\""
else
echo "No authentication credentials provided. Will perform basic validation."
echo "To use your personal developer account, you can run the script with:"
echo " APPLE_ID='your.email@example.com' APPLE_PASSWORD='your-app-specific-password' ./validate-visionos.sh"
echo "Note: You need to create an app-specific password at https://appleid.apple.com/account/manage"
fi
# Run validation with detailed output
echo "Running validation with altool..."
if [ -n "$AUTH_ARGS" ]; then
# Use eval to properly handle the quoted arguments
eval "xcrun altool --validate-app -f \"${IPA_PATH}\" --type visionos --output-format xml $AUTH_ARGS" 2>&1 | tee "${VALIDATION_OUTPUT}"
else
xcrun altool --validate-app -f "${IPA_PATH}" --type visionos --output-format xml 2>&1 | tee "${VALIDATION_OUTPUT}"
fi
VALIDATION_RESULT=$?
# Final validation result
FINAL_VALIDATION_RESULT=0
# Check if validation failed because the app isn't in App Store Connect
if grep -q "No suitable application records were found" "${VALIDATION_OUTPUT}"; then
echo "⚠️ App Store Connect Warning: The app bundle identifier is not found in App Store Connect"
echo "This is expected for apps that haven't been registered in App Store Connect yet."
echo "This doesn't indicate a problem with the build or framework."
# Perform alternative validation
echo "Performing alternative validation checks..."
# Check if IPA was created successfully
if [ -f "${IPA_PATH}" ] && [ -s "${IPA_PATH}" ]; then
echo "✅ IPA file created successfully"
else
echo "❌ IPA file not created or empty"
FINAL_VALIDATION_RESULT=1
fi
# Check if app binary exists and is executable
if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ] && [ -x "${TEMP_DIR}/Payload/${APP_NAME}.app/${APP_NAME}" ]; then
echo "✅ App binary exists and is executable"
else
echo "❌ App binary missing or not executable"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework was properly embedded
if [ -d "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework" ]; then
echo "✅ llama.framework properly embedded"
else
echo "❌ llama.framework not properly embedded"
FINAL_VALIDATION_RESULT=1
fi
# Check if framework binary exists
if [ -f "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" ]; then
echo "✅ Framework binary exists"
# Further validate framework by checking architecture
ARCHS=$(lipo -info "${TEMP_DIR}/Payload/${APP_NAME}.app/Frameworks/llama.framework/llama" 2>/dev/null | grep -o "arm64\\|x86_64" | tr '\n' ' ')
if [ -n "$ARCHS" ]; then
echo "✅ Framework architecture(s): $ARCHS"
else
echo "⚠️ Could not determine framework architecture"
fi
else
echo "❌ Framework binary missing"
FINAL_VALIDATION_RESULT=1
fi
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
echo "✅ Alternative validation PASSED: App built successfully with embedded framework"
else
echo "❌ Alternative validation FAILED: Issues found with the app or framework"
fi
elif grep -q "You must specify authentication credentials" "${VALIDATION_OUTPUT}" && [ -z "$AUTH_ARGS" ]; then
echo "✅ visionOS Validation PASSED: IPA successfully validated"
echo "Results saved to ${VALIDATION_OUTPUT}"
else
echo "❌ visionOS Validation FAILED: IPA validation found issues"
echo "See validation output at ${VALIDATION_OUTPUT}"
echo ""
echo "==== VALIDATION ERRORS ===="
# Try to extract specific errors from the output
if grep -q "Error" "${VALIDATION_OUTPUT}"; then
grep -A 5 "Error" "${VALIDATION_OUTPUT}"
else
# If no specific error found, show the whole log
cat "${VALIDATION_OUTPUT}"
fi
# Additional debugging: check IPA contents
echo ""
echo "==== IPA CONTENTS ===="
mkdir -p "${TEMP_DIR}/ipa_contents"
unzip -q "${IPA_PATH}" -d "${TEMP_DIR}/ipa_contents"
ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/"
# Check for code signing issues
echo ""
echo "==== CODE SIGNING INFO ===="
codesign -vv -d "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app" 2>&1 || echo "Code signing verification failed"
# Check embedded frameworks
echo ""
echo "==== FRAMEWORK INFO ===="
ls -la "${TEMP_DIR}/ipa_contents/Payload/${APP_NAME}.app/Frameworks/" 2>/dev/null || echo "No Frameworks directory found"
fi
# Don't clean up on error to allow inspection
if [ $FINAL_VALIDATION_RESULT -ne 0 ]; then
echo ""
echo "Temporary files kept for inspection at: ${TEMP_DIR}"
echo "===== visionOS Validation Process Failed ====="
exit 1
fi
# Clean up temporary files but keep build artifacts
if [ $FINAL_VALIDATION_RESULT -eq 0 ]; then
echo "Cleaning up temporary files..."
#rm -rf "${TEMP_DIR}"
fi
echo "===== visionOS Validation Process Completed ====="
exit $FINAL_VALIDATION_RESULT

368
scripts/tool_bench.py Executable file
View file

@ -0,0 +1,368 @@
#!/usr/bin/env uv run
'''
Simplistic tool call benchmarks for llama-server and ollama.
Essentially runs the tests at server/examples/server/tests/unit/test_tool_call.py N times, at different temperatures and on different backends (current llama-server, baseline llama-server and ollama),
and plots the results of multiple runs (from same .jsonl file or multiple ones) as a success rate heatmap.
Simple usage example:
cmake -B build -DLLAMA_CURL=1 && cmake --build build --config Release -j -t llama-server
export LLAMA_SERVER_BIN_PATH=$PWD/build/bin/llama-server
export LLAMA_CACHE=${LLAMA_CACHE:-$HOME/Library/Caches/llama.cpp}
./scripts/tool_bench.py run --n 30 --temp -1 --temp 0 --temp 1 --model "Qwen 2.5 1.5B Q4_K_M" --output qwen1.5b.jsonl --hf bartowski/Qwen2.5-1.5B-Instruct-GGUF --ollama qwen2.5:1.5b-instruct-q4_K_M
./scripts/tool_bench.py run --n 30 --temp -1 --temp 0 --temp 1 --model "Qwen 2.5 Coder 7B Q4_K_M" --output qwenc7b.jsonl --hf bartowski/Qwen2.5-Coder-7B-Instruct-GGUF --ollama qwen2.5-coder:7b
./scripts/tool_bench.py plot *.jsonl # Opens window w/ heatmap
./scripts/tool_bench.py plot qwen*.jsonl --output qwen.png # Saves heatmap to qwen.png
(please see ./scripts/tool_bench.sh for a more complete example)
'''
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "pytest",
# "pandas",
# "matplotlib",
# "seaborn",
# "requests",
# "wget",
# "typer",
# ]
# ///
from contextlib import contextmanager
from pathlib import Path
import re
from statistics import mean, median
from typing import Annotated, Dict, List, Optional, Tuple
import atexit
import json
import logging
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import subprocess
import sys
import time
import typer
sys.path.insert(0, Path(__file__).parent.parent.as_posix())
if True:
from examples.server.tests.utils import ServerProcess
from examples.server.tests.unit.test_tool_call import TIMEOUT_SERVER_START, do_test_calc_result, do_test_hello_world, do_test_weather
@contextmanager
def scoped_server(sp: ServerProcess):
def stop():
nonlocal sp
if sp is not None:
sp.stop()
sp = None # type: ignore
atexit.register(stop)
yield sp
stop()
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
app = typer.Typer()
@app.command()
def plot(files: List[Path], output: Optional[Path] = None, test_regex: Optional[str] = None, server_regex: Optional[str] = None):
lines: List[Dict] = []
for file in files:
if not file.exists():
logger.error(f"File not found: {file}")
continue
try:
with file.open() as f:
raw_data = f.read()
logger.info(f"Reading {file} ({len(raw_data)} bytes)")
for line_num, line in enumerate(raw_data.split('\n'), 1):
line = line.strip()
if not line:
continue
try:
record = json.loads(line)
lines.append(record)
except json.JSONDecodeError as e:
logger.warning(f"Invalid JSON at {file}:{line_num} - {e}")
except Exception as e:
logger.error(f"Error processing {file}: {e}")
if not lines:
raise Exception("No valid data was loaded")
data_dict: Dict[Tuple, float] = {}
models: List[str] = []
temps = set()
tests = set()
server_names = set()
total_counts = set()
for rec in lines:
try:
model = rec["model"]
temp = rec["temp"]
server_name = rec["server_name"]
test = rec["test"]
success = rec["success_ratio"]
success_count = rec["success_count"]
failure_count = rec["failure_count"]
total_count = success_count + failure_count
total_counts.add(total_count)
if test_regex and not re.search(test_regex, test):
continue
if server_regex and not re.search(server_regex, server_name):
continue
data_dict[(model, temp, server_name, test)] = success
if model not in models:
models.append(model)
temps.add(temp)
tests.add(test)
server_names.add(server_name)
except KeyError as e:
logger.warning(f"Missing required field in record: {e}")
if len(total_counts) > 1:
logger.warning(f"Total counts are not consistent: {total_counts}")
# Sort the collected values
temps = list(sorted(temps, key=lambda x: x if x is not None else -1))
tests = list(sorted(tests))
server_names = list(sorted(server_names))
logger.info(f"Processed {len(lines)} lines")
logger.info(f"Found {len(data_dict)} valid data points")
logger.info(f"Models: {models}")
logger.info(f"Temperatures: {temps}")
logger.info(f"Tests: {tests}")
logger.info(f"Servers: {server_names}")
matrix: list[list[float]] = []
index: list[str] = []
all_cols = [
(server_name, test)
for server_name in server_names
for test in tests
]
for model in models:
for temp in temps:
index.append(f"{model} @ {temp}")
row_vals = [
data_dict.get((model, temp, server_name, test), np.nan)
for server_name, test in all_cols
]
matrix.append(row_vals)
columns: list[str] = [f"{server_name}\n{test}" for server_name, test in all_cols]
df = pd.DataFrame(matrix, index=np.array(index), columns=np.array(columns))
plt.figure(figsize=(12, 6))
sns.heatmap(
df, annot=True, cmap="RdYlGn", vmin=0.0, vmax=1.0, cbar=True, fmt=".2f", center=0.5, square=True, linewidths=0.5,
cbar_kws={"label": "Success Ratio"},
)
plt.title(f"Tool Call Bench (n = {str(min(total_counts)) if len(total_counts) == 1 else f'{min(total_counts)}-{max(total_counts)}'})\nSuccess Ratios by Server & Test", pad=20)
plt.xlabel("Server & Test", labelpad=10)
plt.ylabel("Model @ Temperature", labelpad=10)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
if output:
plt.savefig(output, dpi=300, bbox_inches='tight')
logger.info(f"Plot saved to {output}")
else:
plt.show()
@app.command()
def run(
output: Annotated[Path, typer.Option(help="Output JSON file")],
model: Annotated[Optional[str], typer.Option(help="Name of the model to test (server agnostic)")] = None,
hf: Annotated[Optional[str], typer.Option(help="GGUF huggingface model repo id (+ optional quant) to test w/ llama-server")] = None,
chat_template: Annotated[Optional[str], typer.Option(help="Chat template override for llama-server")] = None,
ollama: Annotated[Optional[str], typer.Option(help="Ollama model tag to test")] = None,
llama_baseline: Annotated[Optional[str], typer.Option(help="llama-server baseline binary path to use as baseline")] = None,
n: Annotated[int, typer.Option(help="Number of times to run each test")] = 10,
temp: Annotated[Optional[List[float]], typer.Option(help="Set of temperatures to test")] = None,
top_p: Annotated[Optional[float], typer.Option(help="top_p")] = None,
top_k: Annotated[Optional[int], typer.Option(help="top_k")] = None,
ctk: Annotated[Optional[str], typer.Option(help="ctk")] = None,
ctv: Annotated[Optional[str], typer.Option(help="ctv")] = None,
fa: Annotated[Optional[bool], typer.Option(help="fa")] = None,
seed: Annotated[Optional[int], typer.Option(help="Random seed")] = None,
port: Annotated[int, typer.Option(help="llama-server port")] = 8084,
force: Annotated[bool, typer.Option(help="Force overwrite of output file")] = False,
append: Annotated[bool, typer.Option(help="Append to output file")] = False,
test_hello_world: Annotated[bool, typer.Option(help="Whether to run the hello world test")] = True,
test_weather: Annotated[bool, typer.Option(help="Whether to run the weather test")] = True,
test_calc_result: Annotated[bool, typer.Option(help="Whether to run the calc result test")] = False,
):
# Check only one of output and append
n_predict = 512 # High because of DeepSeek R1
# n_ctx = 8192
n_ctx = 2048
assert force or append or not output.exists(), f"Output file already exists: {output}; use --force to overwrite"
with output.open('a' if append else 'w') as output_file:
def run(server: ServerProcess, *, server_name: str, model_id: str, temp: Optional[float] = None, output_kwargs={}, request_kwargs={}):
request_kwargs = {**request_kwargs}
if temp is not None:
request_kwargs['temperature'] = temp
if top_p is not None:
request_kwargs['top_p'] = top_p
if top_k is not None:
request_kwargs['top_k'] = top_k
if seed is not None:
request_kwargs['seed'] = seed
request_kwargs['cache_prompt'] = False
tests = {}
if test_hello_world:
tests["hello world"] = lambda server: do_test_hello_world(server, **request_kwargs)
if test_weather:
tests["weather"] = lambda server: do_test_weather(server, **request_kwargs)
if test_calc_result:
tests["calc result"] = lambda server: do_test_calc_result(server, None, 512, **request_kwargs)
for test_name, test in tests.items():
success_count = 0
failure_count = 0
failures = []
success_times = []
failure_times = []
logger.info(f"Running {test_name} ({server_name}, {model}): ")
for i in range(n):
start_time = time.time()
def elapsed():
return time.time() - start_time
try:
test(server)
success_times.append(elapsed())
success_count += 1
logger.info('success')
except Exception as e:
logger.error(f'failure: {e}')
failure_count += 1
failure_times.append(elapsed())
failures.append(str(e))
# import traceback
# traceback.print_exc()
output_file.write(json.dumps({**output_kwargs, **dict(
model=model,
server_name=server_name,
model_id=model_id,
test=test_name,
temp=t,
top_p=top_p,
top_k=top_k,
ctk=ctk,
ctv=ctv,
seed=seed,
success_ratio=float(success_count) / n,
avg_time=mean(success_times + failure_times),
median_time=median(success_times + failure_times),
success_count=success_count,
success_times=success_times,
failure_count=failure_count,
failure_times=failure_times,
failures=list(set(failures)),
)}) + '\n')
output_file.flush()
for t in [None] if temp is None else [t if t >= 0 else None for t in temp]:
if hf is not None:
servers: list[Tuple[str, Optional[str]]] = [('llama-server', None)]
if llama_baseline is not None:
servers.append(('llama-server (baseline)', llama_baseline))
for server_name, server_path in servers:
server = ServerProcess()
server.n_ctx = n_ctx
server.n_slots = 1
server.jinja = True
server.ctk = ctk
server.ctv = ctv
server.fa = fa
server.n_predict = n_predict
server.model_hf_repo = hf
server.model_hf_file = None
server.chat_template = chat_template
server.server_path = server_path
if port is not None:
server.server_port = port
# server.debug = True
with scoped_server(server):
server.start(timeout_seconds=TIMEOUT_SERVER_START)
for ignore_chat_grammar in [False]:
run(
server,
server_name=server_name,
model_id=hf,
temp=t,
output_kwargs=dict(
chat_template=chat_template,
),
request_kwargs=dict(
ignore_chat_grammar=ignore_chat_grammar,
),
)
if ollama is not None:
server = ServerProcess()
server.server_port = 11434
server.server_host = "localhost"
subprocess.check_call(["ollama", "pull", ollama])
with scoped_server(server):
run(
server,
server_name="ollama",
model_id=ollama,
temp=t,
output_kwargs=dict(
chat_template=None,
),
request_kwargs=dict(
model=ollama,
max_tokens=n_predict,
num_ctx = n_ctx,
),
)
if __name__ == "__main__":
app()

66
scripts/tool_bench.sh Executable file
View file

@ -0,0 +1,66 @@
#!/bin/bash
set -euo pipefail
cmake --build build -j
export LLAMA_CACHE=${LLAMA_CACHE:-$HOME/Library/Caches/llama.cpp}
export LLAMA_SERVER_BIN_PATH=$PWD/build/bin/llama-server
if [ ! -x "$LLAMA_SERVER_BIN_PATH" ]; then
echo "Could not find llama-server binary at $LLAMA_SERVER_BIN_PATH"
exit 1
fi
if [ ! -d "$LLAMA_CACHE" ]; then
echo "Could not find llama cache at $LLAMA_CACHE, please set LLAMA_CACHE explicitly."
exit 1
fi
export ARGS=(
--llama-baseline="$(which llama-server)"
--n 30
--temp -1 # Leaves temperature parameter unset (use the server's default, e.g. 0.6 for ollama)
--temp 0
--temp 0.5
--temp 0.75
--temp 1
--temp 1.5
--temp 2
--temp 5
"$@"
)
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 0.5B Q4_K_M" --output ../qwenc0.5b.jsonl --hf bartowski/Qwen2.5-Coder-0.5B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:0.5b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 1.5B Q4_K_M" --output ../qwenc1.5b.jsonl --hf bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:1.5b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 3B Q4_K_M" --output ../qwenc3b.jsonl --hf bartowski/Qwen2.5-Coder-3B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:3b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 7B Q4_K_M" --output ../qwenc7b.jsonl --hf bartowski/Qwen2.5-Coder-7B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:7b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 Coder 32B Q4_K_M" --output ../qwenc32b.jsonl --hf bartowski/Qwen2.5-Coder-32B-Instruct-GGUF:Q4_K_M --ollama qwen2.5-coder:32B-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 1.5B Q4_K_M" --output ../qwen1.5b.jsonl --hf bartowski/Qwen2.5-1.5B-Instruct-GGUF:Q4_K_M --ollama qwen2.5:1.5b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 3B Q4_K_M" --output ../qwen3b.jsonl --hf bartowski/Qwen2.5-3B-Instruct-GGUF:Q4_K_M --ollama qwen2.5:3b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Qwen 2.5 7B Q4_K_M" --output ../qwen7b.jsonl --hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M --ollama qwen2.5:7b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.2 Instruct 1B Q4_K_M" --output ../llama1b.jsonl --hf bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M --ollama llama3.2:1b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.2 Instruct 3B Q4_K_M" --output ../llama3b.jsonl --hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M --ollama llama3.2:3b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.1 Instruct 8B Q4_K_M" --output ../llama8b.jsonl --hf bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M --ollama llama3.1:8b-instruct-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Llama 3.3 70B Q4_K_M" --output ../llama70b.jsonl --hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Mistral Nemo Q4_K_M" --output ../nemo.jsonl --hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M --ollama mistral-nemo:12b-instruct-2407-q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "Hermes 3 Llama 3.1 8B Q4_K_M" --output ../hermes3.jsonl --hf bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M --ollama hermes3:8b-llama3.1-q4_K_M --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use )
./scripts/tool_bench.py run ${ARGS[@]} --model "Hermes 2 Pro Llama 3 8B Q4_K_M" --output ../hermes2.jsonl --hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M --ollama hermes2:8b-llama3-q4_K_M --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B tool_use )
./scripts/tool_bench.py run ${ARGS[@]} --model "Functionary Small V3.2 Q4_K_M" --output ../funct3.2.jsonl --hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M
./scripts/tool_bench.py run ${ARGS[@]} --model "FireFunction V2 IQ1_M" --output ../firef2.jsonl --hf bartowski/firefunction-v2-GGUF:IQ1_M --chat-template-file <( python scripts/get_chat_template.py fireworks-ai/llama-3-firefunction-v2 tool_use )
./scripts/tool_bench.py run ${ARGS[@]} --model "Command R7B 12-2024 Q6_K_L" --output ../c4ai.jsonl --hf bartowski/c4ai-command-r7b-12-2024-GGUF:Q6_K_L --chat-template-file <( python scripts/get_chat_template.py CohereForAI/c4ai-command-r7b-12-2024 tool_use )
./scripts/tool_bench.py run ${ARGS[@]} --model "Gemma 2 2B Q8_0" --output ../gemma2.jsonl --hf bartowski/gemma-2-2b-it-GGUF:Q8_0
./scripts/tool_bench.py run ${ARGS[@]} --model "Phi 4 Instruct Q4_K_M" --output ../phi4.jsonl --hf bartowski/phi-4-GGUF:Q4_K_M # --ollama phi4
./scripts/tool_bench.py run ${ARGS[@]} --model "Phi 3.5 Mini Instruct Q4_K_M" --output ../phi3.5.jsonl --hf bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M # --ollama phi3.5:3.8b-mini-instruct-q4_K_M
# ./scripts/tool_bench.py run ${ARGS[@]} --model "DeepSeek R1 Distill Qwen 7B Q6_K_L" --output ../dsqw7.jsonl --hf bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K_L --chat-template-file <( python scripts/get_chat_template.py NousResearch/DeepSeek-R1-Distill-Qwen-7B tool_use )
# ./scripts/tool_bench.py run ${ARGS[@]} --model "DeepSeek R1 Distill Qwen 32B Q4_K_M" --output ../dsqw32.jsonl --hf bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M --chat-template-file <( python scripts/get_chat_template.py NousResearch/DeepSeek-R1-Distill-Qwen-32B tool_use )
for f in ../*.jsonl; do
./scripts/tool_bench.py plot "$f" --output ${f%.jsonl}.png || true
done

View file

@ -4,6 +4,7 @@
#include <map>
#include <sstream>
#include <algorithm>
#if __cplusplus >= 202000L
#define LU8(x) (const char*)(u8##x)

View file

@ -969,7 +969,7 @@ struct llama_grammar * llama_grammar_init_impl(
/* .awaiting_trigger = */ false,
/* .trigger_buffer = */ "",
/* .trigger_tokens = */ {},
/* .trigger_words = */ {},
/* .trigger_patterns = */ {},
};
}
@ -978,19 +978,15 @@ struct llama_grammar * llama_grammar_init_impl(
const char * grammar_str,
const char * grammar_root,
bool lazy,
const char ** trigger_words,
size_t num_trigger_words,
const char ** trigger_patterns,
size_t num_trigger_patterns,
const llama_token * trigger_tokens,
size_t num_trigger_tokens) {
llama_grammar_parser parser;
// if there is a grammar, parse it
if (!parser.parse(grammar_str)) {
return nullptr;
}
// will be empty (default) if there are parse errors
if (parser.rules.empty()) {
// rules will be empty (default) if there are parse errors
if (!parser.parse(grammar_str) || parser.rules.empty()) {
fprintf(stderr, "%s: failed to parse grammar\n", __func__);
return nullptr;
}
@ -1054,14 +1050,16 @@ struct llama_grammar * llama_grammar_init_impl(
} while (true);
std::vector<llama_token> vec_trigger_tokens;
std::vector<std::string> vec_trigger_words;
std::vector<llama_grammar_trigger_pattern> vec_trigger_patterns;
for (size_t i = 0; i < num_trigger_tokens; i++) {
GGML_ASSERT(trigger_tokens != nullptr);
vec_trigger_tokens.push_back(trigger_tokens[i]);
}
for (size_t i = 0; i < num_trigger_words; i++) {
GGML_ASSERT(trigger_words != nullptr);
vec_trigger_words.push_back(trigger_words[i]);
for (size_t i = 0; i < num_trigger_patterns; i++) {
GGML_ASSERT(trigger_patterns != nullptr);
auto & trigger = vec_trigger_patterns.emplace_back();
trigger.pattern = trigger_patterns[i];
trigger.regex = std::regex(trigger.pattern);
}
// Important: vec_rules has to be moved here, not copied, because stacks contains
@ -1076,7 +1074,7 @@ struct llama_grammar * llama_grammar_init_impl(
/* .awaiting_trigger = */ lazy,
/* .trigger_buffer = */ "",
std::move(vec_trigger_tokens),
std::move(vec_trigger_words),
std::move(vec_trigger_patterns),
};
}
@ -1089,7 +1087,7 @@ void llama_grammar_free_impl(struct llama_grammar * grammar) {
}
struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & grammar) {
llama_grammar * result = new llama_grammar {
auto * result = new llama_grammar {
grammar.vocab,
grammar.rules,
grammar.stacks,
@ -1098,7 +1096,7 @@ struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & gra
grammar.awaiting_trigger,
grammar.trigger_buffer,
grammar.trigger_tokens,
grammar.trigger_words,
grammar.trigger_patterns,
};
// redirect elements in stacks to point to new rules
@ -1173,16 +1171,18 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token
LLAMA_LOG_DEBUG("Grammar triggered on token %u (`%s`)", token, piece.c_str());
return;
} else {
// TODO: consider a smarter incremental substring search algorithm (store last position to search from).
grammar.trigger_buffer += piece;
for (const auto & word : grammar.trigger_words) {
auto pos = grammar.trigger_buffer.find(word);
if (pos != std::string::npos) {
std::smatch match;
for (const auto & trigger_pattern : grammar.trigger_patterns) {
if (std::regex_match(grammar.trigger_buffer, match, trigger_pattern.regex)) {
grammar.awaiting_trigger = false;
auto constrained_str = grammar.trigger_buffer.substr(pos);
// get from the first match to the end of the string
auto constrained_str = grammar.trigger_buffer.substr(match.position(1));
// std::string constrained_str(match[1].first, grammar.trigger_buffer.end());
grammar.trigger_buffer.clear();
llama_grammar_accept_str(grammar, constrained_str);
LLAMA_LOG_DEBUG("Grammar triggered on word `%s`", word.c_str());
LLAMA_LOG_DEBUG("Grammar triggered on regex: '%s'\n", constrained_str.c_str());
return;
}
}

View file

@ -3,6 +3,7 @@
#include "llama.h"
#include <map>
#include <regex>
#include <string>
#include <vector>
@ -105,6 +106,11 @@ struct llama_grammar_parser {
void print(FILE * file);
};
struct llama_grammar_trigger_pattern {
std::string pattern;
std::regex regex;
};
struct llama_grammar {
// note: allow null vocab for testing (not great)
const llama_vocab * vocab;
@ -122,7 +128,10 @@ struct llama_grammar {
bool awaiting_trigger = false; // Initialized to true for lazy grammars only
std::string trigger_buffer; // Output buffered by lazy grammar. Will be cleared once trigger is found.
std::vector<llama_token> trigger_tokens; // Tokens that trigger a lazy grammar, or tokens to force printing of (even if special).
std::vector<std::string> trigger_words;
std::vector<llama_grammar_trigger_pattern>
trigger_patterns; // Regular expressions that trigger a lazy grammar. Must be a full match of the entire generated
// string, and the grammar will be given the string from the first match group onwards.
};
//
@ -141,8 +150,8 @@ struct llama_grammar * llama_grammar_init_impl(
const char * grammar_str,
const char * grammar_root,
bool lazy,
const char ** trigger_words,
size_t num_trigger_words,
const char ** trigger_patterns,
size_t num_trigger_patterns,
const llama_token * trigger_tokens,
size_t num_trigger_tokens);

View file

@ -6,6 +6,7 @@
#include <set>
#include <vector>
#include <algorithm>
struct llama_kv_cell {
llama_pos pos = -1;

View file

@ -8,6 +8,7 @@
#include <climits>
#include <stdexcept>
#include <cerrno>
#include <algorithm>
#ifdef __has_include
#if __has_include(<unistd.h>)
@ -34,6 +35,10 @@
#include <io.h>
#endif
#if defined(__APPLE__)
#include <TargetConditionals.h>
#endif
// TODO: consider moving to llama-impl.h if needed in more places
#if defined(_WIN32)
static std::string llama_format_win_err(DWORD err) {
@ -475,7 +480,11 @@ struct llama_mlock::impl {
char* errmsg = std::strerror(errno);
bool suggest = (errno == ENOMEM);
#if defined(TARGET_OS_VISION) || defined(TARGET_OS_TV)
// visionOS/tvOS dont't support RLIMIT_MEMLOCK
// Skip resource limit checks on visionOS/tvOS
suggest = false;
#else
struct rlimit lock_limit;
if (suggest && getrlimit(RLIMIT_MEMLOCK, &lock_limit)) {
suggest = false;
@ -483,6 +492,7 @@ struct llama_mlock::impl {
if (suggest && (lock_limit.rlim_max > lock_limit.rlim_cur + size)) {
suggest = false;
}
#endif
LLAMA_LOG_WARN("warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n%s",
size, this->size, errmsg, suggest ? MLOCK_SUGGESTION : "");

View file

@ -1449,7 +1449,9 @@ static struct llama_sampler * llama_sampler_init_grammar_impl(
const char ** trigger_words,
size_t num_trigger_words,
const llama_token * trigger_tokens,
size_t num_trigger_tokens);
size_t num_trigger_tokens,
const char ** trigger_patterns,
size_t num_trigger_patterns);
static void llama_sampler_grammar_reset(struct llama_sampler * smpl) {
auto * ctx = (llama_sampler_grammar *) smpl->ctx;
@ -1457,12 +1459,14 @@ static void llama_sampler_grammar_reset(struct llama_sampler * smpl) {
return;
}
std::vector<const char *> trigger_words;
for (auto & word : ctx->grammar->trigger_words) {
trigger_words.push_back(word.c_str());
std::vector<const char *> trigger_patterns_c;
trigger_patterns_c.reserve(ctx->grammar->trigger_patterns.size());
for (auto & trigger_pattern : ctx->grammar->trigger_patterns) {
trigger_patterns_c.push_back(trigger_pattern.pattern.c_str());
}
auto * grammar_new = llama_grammar_init_impl(ctx->grammar->vocab, ctx->grammar_str.c_str(), ctx->grammar_root.c_str(),
ctx->grammar->lazy, trigger_words.data(), trigger_words.size(),
ctx->grammar->lazy, trigger_patterns_c.data(), trigger_patterns_c.size(),
ctx->grammar->trigger_tokens.data(), ctx->grammar->trigger_tokens.size());
llama_grammar_free_impl(ctx->grammar);
@ -1472,7 +1476,7 @@ static void llama_sampler_grammar_reset(struct llama_sampler * smpl) {
static struct llama_sampler * llama_sampler_grammar_clone(const struct llama_sampler * smpl) {
const auto * ctx = (const llama_sampler_grammar *) smpl->ctx;
auto * result = llama_sampler_init_grammar_impl(ctx->vocab, nullptr, nullptr, false, nullptr, 0, nullptr, 0);
auto * result = llama_sampler_init_grammar_impl(ctx->vocab, nullptr, nullptr, false, nullptr, 0, nullptr, 0, nullptr, 0);
// copy the state
{
@ -1516,15 +1520,33 @@ static struct llama_sampler * llama_sampler_init_grammar_impl(
const char ** trigger_words,
size_t num_trigger_words,
const llama_token * trigger_tokens,
size_t num_trigger_tokens) {
size_t num_trigger_tokens,
const char ** trigger_patterns,
size_t num_trigger_patterns) {
auto * ctx = new llama_sampler_grammar;
if (grammar_str != nullptr && grammar_str[0] != '\0') {
// TODO: remove trigger_words support.
if (trigger_words != nullptr && num_trigger_words > 0) {
GGML_ASSERT(trigger_patterns == nullptr && num_trigger_patterns == 0);
std::string trigger_pattern("[\\s\\S]*?(");
for (size_t i = 0; i < num_trigger_words; ++i) {
static const std::regex special_chars("[.^$|()*+?\\[\\]{}\\\\]");
if (i > 0) {
trigger_pattern += "|";
}
trigger_pattern += std::regex_replace(trigger_words[i], special_chars, "\\$0");
}
trigger_pattern += ")[\\s\\S]*";
auto trigger_pattern_c = trigger_pattern.c_str();
trigger_patterns = &trigger_pattern_c;
num_trigger_patterns = 1;
}
*ctx = {
/* .vocab = */ vocab,
/* .grammar_str = */ grammar_str,
/* .grammar_root = */ grammar_root,
/* .grammar = */ llama_grammar_init_impl(vocab, grammar_str, grammar_root, lazy, trigger_words, num_trigger_words, trigger_tokens, num_trigger_tokens),
/* .grammar = */ llama_grammar_init_impl(vocab, grammar_str, grammar_root, lazy, trigger_patterns, num_trigger_patterns, trigger_tokens, num_trigger_tokens),
};
} else {
*ctx = {
@ -1545,7 +1567,7 @@ struct llama_sampler * llama_sampler_init_grammar(
const struct llama_vocab * vocab,
const char * grammar_str,
const char * grammar_root) {
return llama_sampler_init_grammar_impl(vocab, grammar_str, grammar_root, /* lazy= */ false, nullptr, 0, nullptr, 0);
return llama_sampler_init_grammar_impl(vocab, grammar_str, grammar_root, /* lazy= */ false, nullptr, 0, nullptr, 0, nullptr, 0);
}
struct llama_sampler * llama_sampler_init_grammar_lazy(
@ -1556,7 +1578,18 @@ struct llama_sampler * llama_sampler_init_grammar_lazy(
size_t num_trigger_words,
const llama_token * trigger_tokens,
size_t num_trigger_tokens) {
return llama_sampler_init_grammar_impl(vocab, grammar_str, grammar_root, /* lazy= */ true, trigger_words, num_trigger_words, trigger_tokens, num_trigger_tokens);
return llama_sampler_init_grammar_impl(vocab, grammar_str, grammar_root, /* lazy= */ true, trigger_words, num_trigger_words, trigger_tokens, num_trigger_tokens, nullptr, 0);
}
struct llama_sampler * llama_sampler_init_grammar_lazy_patterns(
const struct llama_vocab * vocab,
const char * grammar_str,
const char * grammar_root,
const char ** trigger_patterns,
size_t num_trigger_patterns,
const llama_token * trigger_tokens,
size_t num_trigger_tokens) {
return llama_sampler_init_grammar_impl(vocab, grammar_str, grammar_root, /* lazy= */ true, nullptr, 0, trigger_tokens, num_trigger_tokens, trigger_patterns, num_trigger_patterns);
}
// penalties

View file

@ -18,6 +18,7 @@
#include <regex>
#include <set>
#include <unordered_map>
#include <cctype>
//
// helpers