mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-09 19:46:11 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/cpu.Dockerfile # .devops/cuda-new.Dockerfile # .devops/cuda.Dockerfile # .devops/intel.Dockerfile # .devops/musa.Dockerfile # .devops/rocm.Dockerfile # .devops/vulkan.Dockerfile # .github/workflows/docker.yml # docs/docker.md # ggml/src/ggml-opencl/CMakeLists.txt # ggml/src/ggml-opencl/ggml-opencl.cpp # ggml/src/ggml-opencl/kernels/cvt.cl # ggml/src/ggml-rpc/ggml-rpc.cpp # tests/test-backend-ops.cpp # tests/test-jinja.cpp
This commit is contained in:
commit
56c21bac04
5 changed files with 22 additions and 142 deletions
|
|
@ -1,138 +0,0 @@
|
|||
ARG OPENVINO_VERSION_MAJOR=2026.0
|
||||
ARG OPENVINO_VERSION_FULL=2026.0.0.20965.c6d6a13a886
|
||||
ARG UBUNTU_VERSION=24.04
|
||||
|
||||
# Optional proxy build arguments - empty by default
|
||||
ARG http_proxy=
|
||||
ARG https_proxy=
|
||||
|
||||
## Build Image
|
||||
FROM ubuntu:${UBUNTU_VERSION} AS build
|
||||
|
||||
# Pass proxy args to build stage
|
||||
ARG http_proxy
|
||||
ARG https_proxy
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
gnupg \
|
||||
wget \
|
||||
git \
|
||||
cmake \
|
||||
ninja-build \
|
||||
build-essential \
|
||||
libtbb12 \
|
||||
libssl-dev \
|
||||
ocl-icd-opencl-dev \
|
||||
opencl-headers \
|
||||
opencl-clhpp-headers \
|
||||
intel-opencl-icd && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install OpenVINO for Ubuntu 24.04
|
||||
ARG OPENVINO_VERSION_MAJOR
|
||||
ARG OPENVINO_VERSION_FULL
|
||||
RUN mkdir -p /opt/intel && \
|
||||
wget https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
|
||||
tar -xf openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
|
||||
mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
|
||||
cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
|
||||
echo "Y" | ./install_dependencies/install_openvino_dependencies.sh && \
|
||||
cd - && \
|
||||
ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
|
||||
|
||||
ENV OpenVINO_DIR=/opt/intel/openvino
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
# Build Stage
|
||||
RUN bash -c "source ${OpenVINO_DIR}/setupvars.sh && \
|
||||
cmake -B build/ReleaseOV -G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DGGML_OPENVINO=ON && \
|
||||
cmake --build build/ReleaseOV -j$(nproc)"
|
||||
|
||||
# Copy all necessary libraries
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build/ReleaseOV -name '*.so*' -exec cp {} /app/lib \; && \
|
||||
find ${OpenVINO_DIR}/runtime/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \; 2>/dev/null || \
|
||||
find ${OpenVINO_DIR}/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \;
|
||||
|
||||
# Create runtime directories and copy binaries
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/ReleaseOV/bin/* /app/full/ \
|
||||
&& cp *.py /app/full \
|
||||
&& cp -r gguf-py /app/full \
|
||||
&& cp -r requirements /app/full \
|
||||
&& cp requirements.txt /app/full \
|
||||
&& cp .devops/tools.sh /app/full/tools.sh
|
||||
|
||||
## Base Runtime Image
|
||||
FROM ubuntu:${UBUNTU_VERSION} AS base
|
||||
|
||||
# Pass proxy args to runtime stage
|
||||
ARG http_proxy
|
||||
ARG https_proxy
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y libgomp1 libtbb12 curl\
|
||||
&& apt autoremove -y \
|
||||
&& apt clean -y \
|
||||
&& rm -rf /tmp/* /var/tmp/* \
|
||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
||||
&& find /var/cache -type f -delete
|
||||
|
||||
COPY --from=build /app/lib/ /app/
|
||||
|
||||
### Full (all binaries)
|
||||
FROM base AS full
|
||||
|
||||
ARG http_proxy
|
||||
ARG https_proxy
|
||||
|
||||
COPY --from=build /app/full /app/
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
git \
|
||||
python3 \
|
||||
python3-venv \
|
||||
python3-pip && \
|
||||
python3 -m venv /ov-venv && \
|
||||
/ov-venv/bin/pip install --no-cache-dir --upgrade pip setuptools wheel && \
|
||||
/ov-venv/bin/pip install --no-cache-dir -r requirements.txt && \
|
||||
apt-get autoremove -y && \
|
||||
apt-get clean && \
|
||||
rm -rf /tmp/* /var/tmp/* && \
|
||||
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
|
||||
find /var/cache -type f -delete
|
||||
|
||||
ENTRYPOINT ["/bin/bash", "-c", "source /ov-venv/bin/activate && exec /app/tools.sh \"$@\"", "--"]
|
||||
|
||||
|
||||
### Light, CLI only
|
||||
FROM base AS light
|
||||
|
||||
COPY --from=build /app/full/llama-cli /app/
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENTRYPOINT [ "/app/llama-cli" ]
|
||||
|
||||
### Server, Server only
|
||||
FROM base AS server
|
||||
|
||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
||||
|
||||
COPY --from=build /app/full/llama-server /app/
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||
|
||||
ENTRYPOINT [ "/app/llama-server" ]
|
||||
|
|
@ -539,6 +539,9 @@ private:
|
|||
statement_ptr step = slices.size() > 2 ? std::move(slices[2]) : nullptr;
|
||||
return mk_stmt<slice_expression>(start_pos, std::move(start), std::move(stop), std::move(step));
|
||||
}
|
||||
if (slices.empty()) {
|
||||
return mk_stmt<blank_expression>(start_pos);
|
||||
}
|
||||
return std::move(slices[0]);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -771,10 +771,15 @@ value member_expression::execute_impl(context & ctx) {
|
|||
}
|
||||
|
||||
JJ_DEBUG("Member expression on object type %s, property type %s", object->type().c_str(), property->type().c_str());
|
||||
ensure_key_type_allowed(property);
|
||||
|
||||
value val = mk_val<value_undefined>("object_property");
|
||||
|
||||
if (property->is_undefined()) {
|
||||
JJ_DEBUG("%s", "Member expression property is undefined, returning undefined");
|
||||
return val;
|
||||
}
|
||||
|
||||
ensure_key_type_allowed(property);
|
||||
|
||||
if (is_val<value_undefined>(object)) {
|
||||
JJ_DEBUG("%s", "Accessing property on undefined object, returning undefined");
|
||||
return val;
|
||||
|
|
|
|||
|
|
@ -263,6 +263,14 @@ struct comment_statement : public statement {
|
|||
|
||||
// Expressions
|
||||
|
||||
// Represents an omitted expression in a computed member, e.g. `a[]`.
|
||||
struct blank_expression : public expression {
|
||||
std::string type() const override { return "BlankExpression"; }
|
||||
value execute_impl(context &) override {
|
||||
return mk_val<value_undefined>();
|
||||
}
|
||||
};
|
||||
|
||||
struct member_expression : public expression {
|
||||
statement_ptr object;
|
||||
statement_ptr property;
|
||||
|
|
|
|||
|
|
@ -47,9 +47,11 @@ void argsort_f32_i32_cuda_cub(ggml_cuda_pool & pool,
|
|||
#ifdef STRIDED_ITERATOR_AVAILABLE
|
||||
auto offset_iterator = cuda::make_strided_iterator(cuda::make_counting_iterator(0), ncols);
|
||||
#else
|
||||
ggml_cuda_pool_alloc<int> offsets_alloc(pool, nrows + 1);
|
||||
// offset_iterator needs to populate nrows + 1 elements, so we also have to ceildiv nrows + 1 by block_size
|
||||
const int nrows_offset = nrows + 1;
|
||||
ggml_cuda_pool_alloc<int> offsets_alloc(pool, nrows_offset);
|
||||
int * offset_iterator = offsets_alloc.get();
|
||||
const dim3 offset_grid((nrows + block_size - 1) / block_size);
|
||||
const dim3 offset_grid((nrows_offset + block_size - 1) / block_size);
|
||||
init_offsets<<<offset_grid, block_size, 0, stream>>>(offset_iterator, ncols, nrows);
|
||||
#endif
|
||||
CUDA_CHECK(cudaMemcpyAsync(temp_keys, x, ncols * nrows * sizeof(float), cudaMemcpyDeviceToDevice, stream));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue