diff --git a/.devops/openvino.Dockerfile b/.devops/openvino.Dockerfile deleted file mode 100644 index e22ef16c7..000000000 --- a/.devops/openvino.Dockerfile +++ /dev/null @@ -1,138 +0,0 @@ -ARG OPENVINO_VERSION_MAJOR=2026.0 -ARG OPENVINO_VERSION_FULL=2026.0.0.20965.c6d6a13a886 -ARG UBUNTU_VERSION=24.04 - -# Optional proxy build arguments - empty by default -ARG http_proxy= -ARG https_proxy= - -## Build Image -FROM ubuntu:${UBUNTU_VERSION} AS build - -# Pass proxy args to build stage -ARG http_proxy -ARG https_proxy - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - ca-certificates \ - gnupg \ - wget \ - git \ - cmake \ - ninja-build \ - build-essential \ - libtbb12 \ - libssl-dev \ - ocl-icd-opencl-dev \ - opencl-headers \ - opencl-clhpp-headers \ - intel-opencl-icd && \ - rm -rf /var/lib/apt/lists/* - -# Install OpenVINO for Ubuntu 24.04 -ARG OPENVINO_VERSION_MAJOR -ARG OPENVINO_VERSION_FULL -RUN mkdir -p /opt/intel && \ - wget https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \ - tar -xf openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \ - mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \ - cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \ - echo "Y" | ./install_dependencies/install_openvino_dependencies.sh && \ - cd - && \ - ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino - -ENV OpenVINO_DIR=/opt/intel/openvino - -WORKDIR /app - -COPY . . - -# Build Stage -RUN bash -c "source ${OpenVINO_DIR}/setupvars.sh && \ - cmake -B build/ReleaseOV -G Ninja \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_OPENVINO=ON && \ - cmake --build build/ReleaseOV -j$(nproc)" - -# Copy all necessary libraries -RUN mkdir -p /app/lib && \ - find build/ReleaseOV -name '*.so*' -exec cp {} /app/lib \; && \ - find ${OpenVINO_DIR}/runtime/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \; 2>/dev/null || \ - find ${OpenVINO_DIR}/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \; - -# Create runtime directories and copy binaries -RUN mkdir -p /app/full \ - && cp build/ReleaseOV/bin/* /app/full/ \ - && cp *.py /app/full \ - && cp -r gguf-py /app/full \ - && cp -r requirements /app/full \ - && cp requirements.txt /app/full \ - && cp .devops/tools.sh /app/full/tools.sh - -## Base Runtime Image -FROM ubuntu:${UBUNTU_VERSION} AS base - -# Pass proxy args to runtime stage -ARG http_proxy -ARG https_proxy - -RUN apt-get update \ - && apt-get install -y libgomp1 libtbb12 curl\ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - -COPY --from=build /app/lib/ /app/ - -### Full (all binaries) -FROM base AS full - -ARG http_proxy -ARG https_proxy - -COPY --from=build /app/full /app/ - -WORKDIR /app - -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - git \ - python3 \ - python3-venv \ - python3-pip && \ - python3 -m venv /ov-venv && \ - /ov-venv/bin/pip install --no-cache-dir --upgrade pip setuptools wheel && \ - /ov-venv/bin/pip install --no-cache-dir -r requirements.txt && \ - apt-get autoremove -y && \ - apt-get clean && \ - rm -rf /tmp/* /var/tmp/* && \ - find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \ - find /var/cache -type f -delete - -ENTRYPOINT ["/bin/bash", "-c", "source /ov-venv/bin/activate && exec /app/tools.sh \"$@\"", "--"] - - -### Light, CLI only -FROM base AS light - -COPY --from=build /app/full/llama-cli /app/ - -WORKDIR /app - -ENTRYPOINT [ "/app/llama-cli" ] - -### Server, Server only -FROM base AS server - -ENV LLAMA_ARG_HOST=0.0.0.0 - -COPY --from=build /app/full/llama-server /app/ - -WORKDIR /app - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/app/llama-server" ] diff --git a/common/jinja/parser.cpp b/common/jinja/parser.cpp index 4ae447744..2b25654a7 100644 --- a/common/jinja/parser.cpp +++ b/common/jinja/parser.cpp @@ -539,6 +539,9 @@ private: statement_ptr step = slices.size() > 2 ? std::move(slices[2]) : nullptr; return mk_stmt(start_pos, std::move(start), std::move(stop), std::move(step)); } + if (slices.empty()) { + return mk_stmt(start_pos); + } return std::move(slices[0]); } diff --git a/common/jinja/runtime.cpp b/common/jinja/runtime.cpp index dce5bbae3..2232790c3 100644 --- a/common/jinja/runtime.cpp +++ b/common/jinja/runtime.cpp @@ -771,10 +771,15 @@ value member_expression::execute_impl(context & ctx) { } JJ_DEBUG("Member expression on object type %s, property type %s", object->type().c_str(), property->type().c_str()); - ensure_key_type_allowed(property); - value val = mk_val("object_property"); + if (property->is_undefined()) { + JJ_DEBUG("%s", "Member expression property is undefined, returning undefined"); + return val; + } + + ensure_key_type_allowed(property); + if (is_val(object)) { JJ_DEBUG("%s", "Accessing property on undefined object, returning undefined"); return val; diff --git a/common/jinja/runtime.h b/common/jinja/runtime.h index 17a6dff5a..3ca5f1754 100644 --- a/common/jinja/runtime.h +++ b/common/jinja/runtime.h @@ -263,6 +263,14 @@ struct comment_statement : public statement { // Expressions +// Represents an omitted expression in a computed member, e.g. `a[]`. +struct blank_expression : public expression { + std::string type() const override { return "BlankExpression"; } + value execute_impl(context &) override { + return mk_val(); + } +}; + struct member_expression : public expression { statement_ptr object; statement_ptr property; diff --git a/ggml/src/ggml-cuda/argsort.cu b/ggml/src/ggml-cuda/argsort.cu index 4896669c3..38fdf3678 100644 --- a/ggml/src/ggml-cuda/argsort.cu +++ b/ggml/src/ggml-cuda/argsort.cu @@ -47,9 +47,11 @@ void argsort_f32_i32_cuda_cub(ggml_cuda_pool & pool, #ifdef STRIDED_ITERATOR_AVAILABLE auto offset_iterator = cuda::make_strided_iterator(cuda::make_counting_iterator(0), ncols); #else - ggml_cuda_pool_alloc offsets_alloc(pool, nrows + 1); + // offset_iterator needs to populate nrows + 1 elements, so we also have to ceildiv nrows + 1 by block_size + const int nrows_offset = nrows + 1; + ggml_cuda_pool_alloc offsets_alloc(pool, nrows_offset); int * offset_iterator = offsets_alloc.get(); - const dim3 offset_grid((nrows + block_size - 1) / block_size); + const dim3 offset_grid((nrows_offset + block_size - 1) / block_size); init_offsets<<>>(offset_iterator, ncols, nrows); #endif CUDA_CHECK(cudaMemcpyAsync(temp_keys, x, ncols * nrows * sizeof(float), cudaMemcpyDeviceToDevice, stream));