mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/full-cuda.Dockerfile # .devops/full-rocm.Dockerfile # .devops/full.Dockerfile # .devops/llama-cpp-clblast.srpm.spec # .devops/llama-cpp-cuda.srpm.spec # .devops/llama-cpp.srpm.spec # .devops/nix/package.nix # .devops/server-cuda.Dockerfile # .devops/server-intel.Dockerfile # .devops/server-rocm.Dockerfile # .devops/server-vulkan.Dockerfile # .devops/server.Dockerfile # .github/workflows/build.yml # .github/workflows/code-coverage.yml # .github/workflows/docker.yml # .github/workflows/editorconfig.yml # .github/workflows/gguf-publish.yml # .github/workflows/nix-ci-aarch64.yml # .github/workflows/nix-ci.yml # .github/workflows/python-check-requirements.yml # .github/workflows/python-lint.yml # .github/workflows/server.yml # .github/workflows/zig-build.yml # CMakeLists.txt # Makefile # README-sycl.md # README.md # ci/run.sh # examples/gguf-split/gguf-split.cpp # flake.lock # flake.nix # llama.cpp # scripts/compare-llama-bench.py # scripts/sync-ggml-am.sh # scripts/sync-ggml.last # scripts/sync-ggml.sh # tests/CMakeLists.txt # tests/test-backend-ops.cpp # tests/test-chat-template.cpp
This commit is contained in:
commit
81ac0e5656
58 changed files with 32809 additions and 7635 deletions
300
.github/workflows/bench.yml
vendored
Normal file
300
.github/workflows/bench.yml
vendored
Normal file
|
@ -0,0 +1,300 @@
|
||||||
|
# Benchmark
|
||||||
|
name: Benchmark
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
gpu-series:
|
||||||
|
description: 'Azure GPU series to run with'
|
||||||
|
required: true
|
||||||
|
type: choice
|
||||||
|
options:
|
||||||
|
- Standard_NC4as_T4_v3
|
||||||
|
- Standard_NC24ads_A100_v4
|
||||||
|
- Standard_NC80adis_H100_v5
|
||||||
|
sha:
|
||||||
|
description: 'Commit SHA1 to build'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
duration:
|
||||||
|
description: 'Duration of the bench'
|
||||||
|
type: string
|
||||||
|
default: 10m
|
||||||
|
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
||||||
|
pull_request_target:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
||||||
|
schedule:
|
||||||
|
- cron: '04 2 * * *'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}-${{ github.event.inputs.sha }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
bench-server-baseline:
|
||||||
|
runs-on: Standard_NC4as_T4_v3
|
||||||
|
env:
|
||||||
|
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
|
||||||
|
N_USERS: 8
|
||||||
|
DURATION: 10m
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
model: [phi-2]
|
||||||
|
ftype: [q4_0, q8_0, f16]
|
||||||
|
include:
|
||||||
|
- model: phi-2
|
||||||
|
ftype: q4_0
|
||||||
|
pr_comment_enabled: "true"
|
||||||
|
|
||||||
|
if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.head_ref == 'master' || github.ref_name == 'master' || github.event.push.ref == 'refs/heads/master' }}
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
||||||
|
|
||||||
|
- name: Install python env
|
||||||
|
id: pipenv
|
||||||
|
run: |
|
||||||
|
cd examples/server/bench
|
||||||
|
python3 -m venv venv
|
||||||
|
source venv/bin/activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
- name: Prometheus
|
||||||
|
id: install_prometheus
|
||||||
|
run: |
|
||||||
|
wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
|
||||||
|
tar xzf prometheus*.tar.gz --strip-components=1
|
||||||
|
./prometheus --config.file=examples/server/bench/prometheus.yml &
|
||||||
|
while ! nc -z localhost 9090; do
|
||||||
|
sleep 0.1
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.21'
|
||||||
|
|
||||||
|
- name: Install k6 and xk6-sse
|
||||||
|
id: k6_installation
|
||||||
|
run: |
|
||||||
|
cd examples/server/bench
|
||||||
|
go install go.k6.io/xk6/cmd/xk6@latest
|
||||||
|
xk6 build master \
|
||||||
|
--with github.com/phymbert/xk6-sse
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
set -eux
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake .. \
|
||||||
|
-DLLAMA_NATIVE=OFF \
|
||||||
|
-DLLAMA_BUILD_SERVER=ON \
|
||||||
|
-DLLAMA_CURL=ON \
|
||||||
|
-DLLAMA_CUBLAS=ON \
|
||||||
|
-DCUDAToolkit_ROOT=/usr/local/cuda \
|
||||||
|
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
|
||||||
|
-DCMAKE_CUDA_ARCHITECTURES=75 \
|
||||||
|
-DLLAMA_FATAL_WARNINGS=OFF \
|
||||||
|
-DLLAMA_ALL_WARNINGS=OFF \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release;
|
||||||
|
cmake --build . --config Release -j $(nproc) --target server
|
||||||
|
|
||||||
|
- name: Download the dataset
|
||||||
|
id: download_dataset
|
||||||
|
run: |
|
||||||
|
cd examples/server/bench
|
||||||
|
wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
|
||||||
|
|
||||||
|
- name: Server bench
|
||||||
|
id: server_bench
|
||||||
|
run: |
|
||||||
|
set -eux
|
||||||
|
|
||||||
|
cd examples/server/bench
|
||||||
|
source venv/bin/activate
|
||||||
|
python bench.py \
|
||||||
|
--runner-label ${{ env.RUNNER_LABEL }} \
|
||||||
|
--name ${{ github.job }} \
|
||||||
|
--branch ${{ github.head_ref || github.ref_name }} \
|
||||||
|
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
|
||||||
|
--scenario script.js \
|
||||||
|
--duration ${{ github.event.inputs.duration || env.DURATION }} \
|
||||||
|
--hf-repo ggml-org/models \
|
||||||
|
--hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
|
||||||
|
--model-path-prefix /models \
|
||||||
|
--parallel ${{ env.N_USERS }} \
|
||||||
|
-ngl 33 \
|
||||||
|
--batch-size 2048 \
|
||||||
|
--ubatch-size 256 \
|
||||||
|
--ctx-size 16384 \
|
||||||
|
--n-prompts 1000 \
|
||||||
|
--max-prompt-tokens 1024 \
|
||||||
|
--max-tokens 2048
|
||||||
|
|
||||||
|
cat results.github.env >> $GITHUB_ENV
|
||||||
|
|
||||||
|
# Remove dataset as we do not want it in the artefact
|
||||||
|
rm ShareGPT_V3_unfiltered_cleaned_split.json
|
||||||
|
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
||||||
|
compression-level: 9
|
||||||
|
path: |
|
||||||
|
examples/server/bench/*.jpg
|
||||||
|
examples/server/bench/*.json
|
||||||
|
examples/server/bench/*.log
|
||||||
|
|
||||||
|
- name: Commit status
|
||||||
|
uses: Sibz/github-status-action@v1
|
||||||
|
with:
|
||||||
|
authToken: ${{secrets.GITHUB_TOKEN}}
|
||||||
|
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
|
||||||
|
context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
||||||
|
description: |
|
||||||
|
${{ env.BENCH_RESULTS }}
|
||||||
|
state: 'success'
|
||||||
|
|
||||||
|
- name: Upload benchmark images
|
||||||
|
uses: devicons/public-upload-to-imgur@v2.2.2
|
||||||
|
continue-on-error: true # Important as it looks unstable: 503
|
||||||
|
id: imgur_step
|
||||||
|
with:
|
||||||
|
client_id: ${{secrets.IMGUR_CLIENT_ID}}
|
||||||
|
path: |
|
||||||
|
examples/server/bench/prompt_tokens_seconds.jpg
|
||||||
|
examples/server/bench/predicted_tokens_seconds.jpg
|
||||||
|
examples/server/bench/kv_cache_usage_ratio.jpg
|
||||||
|
examples/server/bench/requests_processing.jpg
|
||||||
|
|
||||||
|
- name: Extract mermaid
|
||||||
|
id: set_mermaid
|
||||||
|
run: |
|
||||||
|
set -eux
|
||||||
|
|
||||||
|
cd examples/server/bench
|
||||||
|
PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
|
||||||
|
echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
||||||
|
echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
|
||||||
|
echo "EOF" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
|
||||||
|
echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
||||||
|
echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
|
||||||
|
echo "EOF" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
|
||||||
|
echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
|
||||||
|
echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
|
||||||
|
echo "EOF" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
|
||||||
|
echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
|
||||||
|
echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
|
||||||
|
echo "EOF" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Extract image url
|
||||||
|
id: extract_image_url
|
||||||
|
continue-on-error: true
|
||||||
|
run: |
|
||||||
|
set -eux
|
||||||
|
|
||||||
|
echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
|
||||||
|
echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
|
||||||
|
echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
|
||||||
|
echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Comment PR
|
||||||
|
uses: mshick/add-pr-comment@v2
|
||||||
|
id: comment_pr
|
||||||
|
if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
|
||||||
|
with:
|
||||||
|
message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
||||||
|
message: |
|
||||||
|
<p align="center">
|
||||||
|
|
||||||
|
📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
|
||||||
|
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
|
||||||
|
<summary>Expand details for performance related PR only</summary>
|
||||||
|
|
||||||
|
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
|
||||||
|
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
|
||||||
|
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
|
||||||
|
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
|
||||||
|
- ${{ env.BENCH_GRAPH_XLABEL }}
|
||||||
|
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
|
||||||
|
<img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />
|
||||||
|
|
||||||
|
<details>
|
||||||
|
|
||||||
|
<summary>More</summary>
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
${{ env.PROMPT_TOKENS_SECONDS }}
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>More</summary>
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
${{ env.PREDICTED_TOKENS_SECONDS }}
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
|
||||||
|
<summary>Details</summary>
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
|
||||||
|
<img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>More</summary>
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
${{ env.KV_CACHE_USAGE_RATIO }}
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>More</summary>
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
${{ env.REQUESTS_PROCESSING }}
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
</p>
|
||||||
|
</details>
|
||||||
|
</details>
|
67
SECURITY.md
Normal file
67
SECURITY.md
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
# Security Policy
|
||||||
|
|
||||||
|
- [**Using llama.cpp securely**](#using-llamacpp-securely)
|
||||||
|
- [Untrusted models](#untrusted-models)
|
||||||
|
- [Untrusted inputs](#untrusted-inputs)
|
||||||
|
- [Data privacy](#data-privacy)
|
||||||
|
- [Untrusted environments or networks](#untrusted-environments-or-networks)
|
||||||
|
- [Multi-Tenant environments](#multi-tenant-environments)
|
||||||
|
- [**Reporting a vulnerability**](#reporting-a-vulnerability)
|
||||||
|
|
||||||
|
## Using llama.cpp securely
|
||||||
|
|
||||||
|
### Untrusted models
|
||||||
|
Be careful when running untrusted models. This classification includes models created by unknown developers or utilizing data obtained from unknown sources.
|
||||||
|
|
||||||
|
*Always execute untrusted models within a secure, isolated environment such as a sandbox* (e.g., containers, virtual machines). This helps protect your system from potentially malicious code.
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> The trustworthiness of a model is not binary. You must always determine the proper level of caution depending on the specific model and how it matches your use case and risk tolerance.
|
||||||
|
|
||||||
|
### Untrusted inputs
|
||||||
|
|
||||||
|
Some models accept various input formats (text, images, audio, etc.). The libraries converting these inputs have varying security levels, so it's crucial to isolate the model and carefully pre-process inputs to mitigate script injection risks.
|
||||||
|
|
||||||
|
For maximum security when handling untrusted inputs, you may need to employ the following:
|
||||||
|
|
||||||
|
* Sandboxing: Isolate the environment where the inference happens.
|
||||||
|
* Pre-analysis: Check how the model performs by default when exposed to prompt injection (e.g. using [fuzzing for prompt injection](https://github.com/FonduAI/awesome-prompt-injection?tab=readme-ov-file#tools)). This will give you leads on how hard you will have to work on the next topics.
|
||||||
|
* Updates: Keep both LLaMA C++ and your libraries updated with the latest security patches.
|
||||||
|
* Input Sanitation: Before feeding data to the model, sanitize inputs rigorously. This involves techniques such as:
|
||||||
|
* Validation: Enforce strict rules on allowed characters and data types.
|
||||||
|
* Filtering: Remove potentially malicious scripts or code fragments.
|
||||||
|
* Encoding: Convert special characters into safe representations.
|
||||||
|
* Verification: Run tooling that identifies potential script injections (e.g. [models that detect prompt injection attempts](https://python.langchain.com/docs/guides/safety/hugging_face_prompt_injection)).
|
||||||
|
|
||||||
|
### Data privacy
|
||||||
|
|
||||||
|
To protect sensitive data from potential leaks or unauthorized access, it is crucial to sandbox the model execution. This means running the model in a secure, isolated environment, which helps mitigate many attack vectors.
|
||||||
|
|
||||||
|
### Untrusted environments or networks
|
||||||
|
|
||||||
|
If you can't run your models in a secure and isolated environment or if it must be exposed to an untrusted network, make sure to take the following security precautions:
|
||||||
|
* Confirm the hash of any downloaded artifact (e.g. pre-trained model weights) matches a known-good value
|
||||||
|
* Encrypt your data if sending it over the network.
|
||||||
|
|
||||||
|
### Multi-Tenant environments
|
||||||
|
|
||||||
|
If you intend to run multiple models in parallel with shared memory, it is your responsibility to ensure the models do not interact or access each other's data. The primary areas of concern are tenant isolation, resource allocation, model sharing and hardware attacks.
|
||||||
|
|
||||||
|
1. Tenant Isolation: Models should run separately with strong isolation methods to prevent unwanted data access. Separating networks is crucial for isolation, as it prevents unauthorized access to data or models and malicious users from sending graphs to execute under another tenant's identity.
|
||||||
|
|
||||||
|
1. Resource Allocation: A denial of service caused by one model can impact the overall system health. Implement safeguards like rate limits, access controls, and health monitoring.
|
||||||
|
|
||||||
|
1. Model Sharing: In a multitenant model sharing design, tenants and users must understand the security risks of running code provided by others. Since there are no reliable methods to detect malicious models, sandboxing the model execution is the recommended approach to mitigate the risk.
|
||||||
|
|
||||||
|
1. Hardware Attacks: GPUs or TPUs can also be attacked. [Researches](https://scholar.google.com/scholar?q=gpu+side+channel) has shown that side channel attacks on GPUs are possible, which can make data leak from other models or processes running on the same system at the same time.
|
||||||
|
|
||||||
|
## Reporting a vulnerability
|
||||||
|
|
||||||
|
Beware that none of the topics under [Using llama.cpp securely](#using-llamacpp-securely) are considered vulnerabilities of LLaMA C++.
|
||||||
|
|
||||||
|
<!-- normal version -->
|
||||||
|
However, If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released.
|
||||||
|
|
||||||
|
Please disclose it as a private [security advisory](https://github.com/ggerganov/llama.cpp/security/advisories/new).
|
||||||
|
|
||||||
|
A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure.
|
|
@ -1063,8 +1063,8 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||||
params.ignore_eos = true;
|
params.ignore_eos = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (arg == "--no-penalize-nl") {
|
if (arg == "--penalize-nl") {
|
||||||
sparams.penalize_nl = false;
|
sparams.penalize_nl = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (arg == "-l" || arg == "--logit-bias") {
|
if (arg == "-l" || arg == "--logit-bias") {
|
||||||
|
@ -1374,7 +1374,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||||
printf(" -dt N, --defrag-thold N\n");
|
printf(" -dt N, --defrag-thold N\n");
|
||||||
printf(" KV cache defragmentation threshold (default: %.1f, < 0 - disabled)\n", params.defrag_thold);
|
printf(" KV cache defragmentation threshold (default: %.1f, < 0 - disabled)\n", params.defrag_thold);
|
||||||
printf(" --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n");
|
printf(" --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n");
|
||||||
printf(" --no-penalize-nl do not penalize newline token\n");
|
printf(" --penalize-nl penalize newline tokens\n");
|
||||||
printf(" --temp N temperature (default: %.1f)\n", (double)sparams.temp);
|
printf(" --temp N temperature (default: %.1f)\n", (double)sparams.temp);
|
||||||
printf(" --all-logits return logits for all tokens in the batch (default: disabled)\n");
|
printf(" --all-logits return logits for all tokens in the batch (default: disabled)\n");
|
||||||
printf(" --hellaswag compute HellaSwag score over random tasks from datafile supplied with -f\n");
|
printf(" --hellaswag compute HellaSwag score over random tasks from datafile supplied with -f\n");
|
||||||
|
@ -1929,11 +1929,6 @@ struct llama_model * llama_load_model_from_url(
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!curl) {
|
|
||||||
fprintf(stderr, "%s: error initializing libcurl\n", __func__);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!llama_download_file(curl, model_url, path_model)) {
|
if (!llama_download_file(curl, model_url, path_model)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -2490,7 +2485,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
|
||||||
fprintf(stream, "n_predict: %d # default: -1 (unlimited)\n", params.n_predict);
|
fprintf(stream, "n_predict: %d # default: -1 (unlimited)\n", params.n_predict);
|
||||||
fprintf(stream, "n_probs: %d # only used by server binary, default: 0\n", sparams.n_probs);
|
fprintf(stream, "n_probs: %d # only used by server binary, default: 0\n", sparams.n_probs);
|
||||||
fprintf(stream, "no_mmap: %s # default: false\n", !params.use_mmap ? "true" : "false");
|
fprintf(stream, "no_mmap: %s # default: false\n", !params.use_mmap ? "true" : "false");
|
||||||
fprintf(stream, "no_penalize_nl: %s # default: false\n", !sparams.penalize_nl ? "true" : "false");
|
fprintf(stream, "penalize_nl: %s # default: false\n", sparams.penalize_nl ? "true" : "false");
|
||||||
fprintf(stream, "ppl_output_type: %d # default: 0\n", params.ppl_output_type);
|
fprintf(stream, "ppl_output_type: %d # default: 0\n", params.ppl_output_type);
|
||||||
fprintf(stream, "ppl_stride: %d # default: 0\n", params.ppl_stride);
|
fprintf(stream, "ppl_stride: %d # default: 0\n", params.ppl_stride);
|
||||||
fprintf(stream, "presence_penalty: %f # default: 0.0\n", sparams.penalty_present);
|
fprintf(stream, "presence_penalty: %f # default: 0.0\n", sparams.penalty_present);
|
||||||
|
|
|
@ -23,7 +23,7 @@ if 'NO_LOCAL_GGUF' not in os.environ:
|
||||||
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
|
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
|
||||||
import gguf
|
import gguf
|
||||||
|
|
||||||
from convert import HfVocab
|
from convert import LlamaHfVocab, permute
|
||||||
|
|
||||||
|
|
||||||
###### MODEL DEFINITIONS ######
|
###### MODEL DEFINITIONS ######
|
||||||
|
@ -230,7 +230,7 @@ class Model(ABC):
|
||||||
def _set_vocab_gpt2(self):
|
def _set_vocab_gpt2(self):
|
||||||
dir_model = self.dir_model
|
dir_model = self.dir_model
|
||||||
hparams = self.hparams
|
hparams = self.hparams
|
||||||
tokens: list[bytearray] = []
|
tokens: list[str] = []
|
||||||
toktypes: list[int] = []
|
toktypes: list[int] = []
|
||||||
|
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
@ -243,8 +243,7 @@ class Model(ABC):
|
||||||
|
|
||||||
for i in range(vocab_size):
|
for i in range(vocab_size):
|
||||||
if i not in reverse_vocab:
|
if i not in reverse_vocab:
|
||||||
pad_token = f"[PAD{i}]".encode('utf-8')
|
tokens.append(f"[PAD{i}]")
|
||||||
tokens.append(bytearray(pad_token))
|
|
||||||
toktypes.append(gguf.TokenType.USER_DEFINED)
|
toktypes.append(gguf.TokenType.USER_DEFINED)
|
||||||
elif reverse_vocab[i] in added_vocab:
|
elif reverse_vocab[i] in added_vocab:
|
||||||
tokens.append(reverse_vocab[i])
|
tokens.append(reverse_vocab[i])
|
||||||
|
@ -266,7 +265,7 @@ class Model(ABC):
|
||||||
def _set_vocab_qwen(self):
|
def _set_vocab_qwen(self):
|
||||||
dir_model = self.dir_model
|
dir_model = self.dir_model
|
||||||
hparams = self.hparams
|
hparams = self.hparams
|
||||||
tokens: list[bytearray] = []
|
tokens: list[str] = []
|
||||||
toktypes: list[int] = []
|
toktypes: list[int] = []
|
||||||
|
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
@ -291,8 +290,7 @@ class Model(ABC):
|
||||||
|
|
||||||
for i in range(vocab_size):
|
for i in range(vocab_size):
|
||||||
if i not in reverse_vocab:
|
if i not in reverse_vocab:
|
||||||
pad_token = f"[PAD{i}]".encode("utf-8")
|
tokens.append(f"[PAD{i}]")
|
||||||
tokens.append(bytearray(pad_token))
|
|
||||||
toktypes.append(gguf.TokenType.USER_DEFINED)
|
toktypes.append(gguf.TokenType.USER_DEFINED)
|
||||||
elif reverse_vocab[i] in added_vocab:
|
elif reverse_vocab[i] in added_vocab:
|
||||||
tokens.append(reverse_vocab[i])
|
tokens.append(reverse_vocab[i])
|
||||||
|
@ -325,8 +323,7 @@ class Model(ABC):
|
||||||
toktypes: list[int] = []
|
toktypes: list[int] = []
|
||||||
|
|
||||||
if not tokenizer_path.is_file():
|
if not tokenizer_path.is_file():
|
||||||
print(f'Error: Missing {tokenizer_path}', file=sys.stderr)
|
raise FileNotFoundError(f"File not found: {tokenizer_path}")
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
tokenizer = SentencePieceProcessor(str(tokenizer_path))
|
tokenizer = SentencePieceProcessor(str(tokenizer_path))
|
||||||
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
|
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
|
||||||
|
@ -372,12 +369,8 @@ class Model(ABC):
|
||||||
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
|
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
|
||||||
special_vocab.add_to_gguf(self.gguf_writer)
|
special_vocab.add_to_gguf(self.gguf_writer)
|
||||||
|
|
||||||
def _set_vocab_hf(self):
|
def _set_vocab_llama_hf(self):
|
||||||
path = self.dir_model
|
vocab = LlamaHfVocab(self.dir_model)
|
||||||
added_tokens_path = self.dir_model
|
|
||||||
vocab = HfVocab(
|
|
||||||
path, added_tokens_path if added_tokens_path.exists() else None
|
|
||||||
)
|
|
||||||
tokens = []
|
tokens = []
|
||||||
scores = []
|
scores = []
|
||||||
toktypes = []
|
toktypes = []
|
||||||
|
@ -517,6 +510,17 @@ class BloomModel(Model):
|
||||||
class MPTModel(Model):
|
class MPTModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.MPT
|
model_arch = gguf.MODEL_ARCH.MPT
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
try:
|
||||||
|
self._set_vocab_gpt2()
|
||||||
|
except Exception:
|
||||||
|
# Fallback for SEA-LION model
|
||||||
|
self._set_vocab_sentencepiece()
|
||||||
|
self.gguf_writer.add_add_bos_token(False)
|
||||||
|
self.gguf_writer.add_pad_token_id(3)
|
||||||
|
self.gguf_writer.add_eos_token_id(1)
|
||||||
|
self.gguf_writer.add_unk_token_id(0)
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
block_count = self.hparams["n_layers"]
|
block_count = self.hparams["n_layers"]
|
||||||
self.gguf_writer.add_name(self.dir_model.name)
|
self.gguf_writer.add_name(self.dir_model.name)
|
||||||
|
@ -530,7 +534,10 @@ class MPTModel(Model):
|
||||||
self.gguf_writer.add_layer_norm_eps(1e-5)
|
self.gguf_writer.add_layer_norm_eps(1e-5)
|
||||||
if self.hparams["attn_config"]["clip_qkv"] is not None:
|
if self.hparams["attn_config"]["clip_qkv"] is not None:
|
||||||
self.gguf_writer.add_clamp_kqv(self.hparams["attn_config"]["clip_qkv"])
|
self.gguf_writer.add_clamp_kqv(self.hparams["attn_config"]["clip_qkv"])
|
||||||
self.gguf_writer.add_max_alibi_bias(self.hparams["attn_config"]["alibi_bias_max"])
|
if self.hparams["attn_config"]["alibi"]:
|
||||||
|
self.gguf_writer.add_max_alibi_bias(self.hparams["attn_config"]["alibi_bias_max"])
|
||||||
|
else:
|
||||||
|
self.gguf_writer.add_max_alibi_bias(0.0)
|
||||||
|
|
||||||
def write_tensors(self):
|
def write_tensors(self):
|
||||||
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers"))
|
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers"))
|
||||||
|
@ -779,6 +786,148 @@ class BaichuanModel(Model):
|
||||||
return weights[r * n_part:r * n_part + r, ...]
|
return weights[r * n_part:r * n_part + r, ...]
|
||||||
|
|
||||||
|
|
||||||
|
@Model.register("XverseForCausalLM")
|
||||||
|
class XverseModel(Model):
|
||||||
|
model_arch = gguf.MODEL_ARCH.XVERSE
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
assert (self.dir_model / "tokenizer.json").is_file()
|
||||||
|
dir_model = self.dir_model
|
||||||
|
hparams = self.hparams
|
||||||
|
|
||||||
|
tokens: list[bytearray] = []
|
||||||
|
toktypes: list[int] = []
|
||||||
|
|
||||||
|
from transformers import AutoTokenizer
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
||||||
|
vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
|
||||||
|
assert max(tokenizer.vocab.values()) < vocab_size
|
||||||
|
|
||||||
|
reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
|
||||||
|
added_vocab = tokenizer.get_added_vocab()
|
||||||
|
|
||||||
|
for token_id in range(vocab_size):
|
||||||
|
token_text = reverse_vocab[token_id].encode('utf-8')
|
||||||
|
# replace "\x00" to string with length > 0
|
||||||
|
if token_text == b"\x00":
|
||||||
|
toktype = gguf.TokenType.BYTE # special
|
||||||
|
token_text = f"<{token_text}>".encode('utf-8')
|
||||||
|
elif re.fullmatch(br"<0x[0-9A-Fa-f]{2}>", token_text):
|
||||||
|
toktype = gguf.TokenType.BYTE # special
|
||||||
|
elif reverse_vocab[token_id] in added_vocab:
|
||||||
|
if tokenizer.added_tokens_decoder[token_id].special:
|
||||||
|
toktype = gguf.TokenType.CONTROL
|
||||||
|
else:
|
||||||
|
toktype = gguf.TokenType.USER_DEFINED
|
||||||
|
else:
|
||||||
|
toktype = gguf.TokenType.NORMAL
|
||||||
|
|
||||||
|
tokens.append(token_text)
|
||||||
|
toktypes.append(toktype)
|
||||||
|
|
||||||
|
self.gguf_writer.add_tokenizer_model("llama")
|
||||||
|
self.gguf_writer.add_token_list(tokens)
|
||||||
|
self.gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
|
special_vocab = gguf.SpecialVocab(dir_model, n_vocab=len(tokens))
|
||||||
|
special_vocab.add_to_gguf(self.gguf_writer)
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
block_count = self.hparams["num_hidden_layers"]
|
||||||
|
head_count = self.hparams["num_attention_heads"]
|
||||||
|
head_count_kv = self.hparams.get("num_key_value_heads", head_count)
|
||||||
|
hf_repo = self.hparams.get("_name_or_path", "")
|
||||||
|
|
||||||
|
ctx_length = 0
|
||||||
|
if "max_sequence_length" in self.hparams:
|
||||||
|
ctx_length = self.hparams["max_sequence_length"]
|
||||||
|
elif "max_position_embeddings" in self.hparams:
|
||||||
|
ctx_length = self.hparams["max_position_embeddings"]
|
||||||
|
elif "model_max_length" in self.hparams:
|
||||||
|
ctx_length = self.hparams["model_max_length"]
|
||||||
|
else:
|
||||||
|
print("gguf: can not find ctx length parameter.")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
self.gguf_writer.add_name(self.dir_model.name)
|
||||||
|
self.gguf_writer.add_source_hf_repo(hf_repo)
|
||||||
|
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
|
||||||
|
self.gguf_writer.add_context_length(ctx_length)
|
||||||
|
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
||||||
|
self.gguf_writer.add_block_count(block_count)
|
||||||
|
self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
|
||||||
|
self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
|
||||||
|
self.gguf_writer.add_head_count(head_count)
|
||||||
|
self.gguf_writer.add_head_count_kv(head_count_kv)
|
||||||
|
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
|
||||||
|
|
||||||
|
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
|
||||||
|
if self.hparams["rope_scaling"].get("type") == "linear":
|
||||||
|
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
|
||||||
|
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
|
||||||
|
|
||||||
|
def write_tensors(self):
|
||||||
|
# Collect tensors from generator object
|
||||||
|
model_kv = dict(self.get_tensors())
|
||||||
|
block_count = self.hparams["num_hidden_layers"]
|
||||||
|
head_count = self.hparams["num_attention_heads"]
|
||||||
|
tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
|
||||||
|
head_count_kv = self.hparams.get("num_key_value_heads", head_count)
|
||||||
|
|
||||||
|
for name, data_torch in model_kv.items():
|
||||||
|
# we don't need these
|
||||||
|
if name.endswith(".rotary_emb.inv_freq"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
old_dtype = data_torch.dtype
|
||||||
|
|
||||||
|
# convert any unsupported data types to float32
|
||||||
|
if data_torch.dtype not in (torch.float16, torch.float32):
|
||||||
|
data_torch = data_torch.to(torch.float32)
|
||||||
|
|
||||||
|
# HF models permute some of the tensors, so we need to undo that
|
||||||
|
if name.endswith(("q_proj.weight")):
|
||||||
|
data_torch = self._reverse_hf_permute(data_torch, head_count, head_count)
|
||||||
|
if name.endswith(("k_proj.weight")):
|
||||||
|
data_torch = self._reverse_hf_permute(data_torch, head_count, head_count_kv)
|
||||||
|
|
||||||
|
data = data_torch.squeeze().numpy()
|
||||||
|
|
||||||
|
# map tensor names
|
||||||
|
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
|
||||||
|
if new_name is None:
|
||||||
|
print(f"Can not map tensor {name!r}")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
n_dims = len(data.shape)
|
||||||
|
data_dtype = data.dtype
|
||||||
|
|
||||||
|
# if f32 desired, convert any float16 to float32
|
||||||
|
if self.ftype == 0 and data_dtype == np.float16:
|
||||||
|
data = data.astype(np.float32)
|
||||||
|
|
||||||
|
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
|
||||||
|
if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1:
|
||||||
|
data = data.astype(np.float32)
|
||||||
|
|
||||||
|
# if f16 desired, convert any float32 2-dim weight tensors to float16
|
||||||
|
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
|
||||||
|
data = data.astype(np.float16)
|
||||||
|
|
||||||
|
print(f"{name} -> {new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
|
||||||
|
self.gguf_writer.add_tensor(new_name, data)
|
||||||
|
|
||||||
|
def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
|
||||||
|
if n_kv_head is not None and n_head != n_kv_head:
|
||||||
|
n_head //= n_kv_head
|
||||||
|
|
||||||
|
return (
|
||||||
|
weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
|
||||||
|
.swapaxes(1, 2)
|
||||||
|
.reshape(weights.shape)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@Model.register("FalconForCausalLM", "RWForCausalLM")
|
@Model.register("FalconForCausalLM", "RWForCausalLM")
|
||||||
class FalconModel(Model):
|
class FalconModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.FALCON
|
model_arch = gguf.MODEL_ARCH.FALCON
|
||||||
|
@ -1058,12 +1207,120 @@ class StableLMModel(Model):
|
||||||
self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_eps", "norm_eps"]))
|
self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_eps", "norm_eps"]))
|
||||||
|
|
||||||
|
|
||||||
@Model.register("MixtralForCausalLM")
|
@Model.register("LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
|
||||||
class MixtralModel(Model):
|
class LlamaModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.LLAMA
|
model_arch = gguf.MODEL_ARCH.LLAMA
|
||||||
|
|
||||||
def set_vocab(self):
|
def set_vocab(self):
|
||||||
self._set_vocab_sentencepiece()
|
try:
|
||||||
|
self. _set_vocab_sentencepiece()
|
||||||
|
except FileNotFoundError:
|
||||||
|
self._set_vocab_llama_hf()
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
super().set_gguf_parameters()
|
||||||
|
hparams = self.hparams
|
||||||
|
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
|
||||||
|
self.gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
|
||||||
|
|
||||||
|
# Same as super class, but permuting q_proj, k_proj
|
||||||
|
def write_tensors(self):
|
||||||
|
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))
|
||||||
|
tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
|
||||||
|
n_head = self.hparams.get("num_attention_heads")
|
||||||
|
n_kv_head = self.hparams.get("num_key_value_heads")
|
||||||
|
n_experts = self.hparams.get("num_local_experts")
|
||||||
|
experts = dict()
|
||||||
|
for name, data_torch in self.get_tensors():
|
||||||
|
# we don't need these
|
||||||
|
if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq")):
|
||||||
|
continue
|
||||||
|
|
||||||
|
old_dtype = data_torch.dtype
|
||||||
|
|
||||||
|
# convert any unsupported data types to float32
|
||||||
|
if data_torch.dtype not in (torch.float16, torch.float32):
|
||||||
|
data_torch = data_torch.to(torch.float32)
|
||||||
|
|
||||||
|
data = data_torch.numpy()
|
||||||
|
|
||||||
|
if name.endswith("q_proj.weight"):
|
||||||
|
data = permute(data, n_head, n_head)
|
||||||
|
if name.endswith("k_proj.weight"):
|
||||||
|
data = permute(data, n_head, n_kv_head)
|
||||||
|
|
||||||
|
data = data.squeeze()
|
||||||
|
|
||||||
|
# process the experts separately
|
||||||
|
if name.find("block_sparse_moe.experts") != -1:
|
||||||
|
experts[name] = data
|
||||||
|
if len(experts) >= n_experts:
|
||||||
|
# merge the experts into a single 3d tensor
|
||||||
|
for bid in range(block_count):
|
||||||
|
for wid in range(1, 4):
|
||||||
|
full = True
|
||||||
|
for xid in range(n_experts):
|
||||||
|
ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.w{wid}.weight"
|
||||||
|
if ename not in experts:
|
||||||
|
full = False
|
||||||
|
break
|
||||||
|
if not full:
|
||||||
|
continue
|
||||||
|
|
||||||
|
datas = []
|
||||||
|
for xid in range(n_experts):
|
||||||
|
ename = f"model.layers.{bid}.block_sparse_moe.experts.{xid}.w{wid}.weight"
|
||||||
|
datas.append(experts[ename])
|
||||||
|
del experts[ename]
|
||||||
|
|
||||||
|
data = np.stack(datas, axis=0)
|
||||||
|
data_dtype = data.dtype
|
||||||
|
|
||||||
|
if self.ftype == 0 and data_dtype == np.float16:
|
||||||
|
data = data.astype(np.float32)
|
||||||
|
|
||||||
|
if self.ftype == 1 and data_dtype == np.float32:
|
||||||
|
data = data.astype(np.float16)
|
||||||
|
|
||||||
|
merged_name = f"layers.{bid}.feed_forward.experts.w{wid}.weight"
|
||||||
|
|
||||||
|
new_name = tensor_map.get_name(merged_name, try_suffixes=(".weight", ".bias"))
|
||||||
|
if new_name is None:
|
||||||
|
print(f"Can not map tensor {name!r}")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
print(f"{new_name}, n_dims = {len(data.shape)}, shape = {data.shape} --> {data.dtype}")
|
||||||
|
|
||||||
|
self.gguf_writer.add_tensor(new_name, data)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# map tensor names
|
||||||
|
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
|
||||||
|
if new_name is None:
|
||||||
|
print(f"Can not map tensor {name!r}")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
n_dims = len(data.shape)
|
||||||
|
data_dtype = data.dtype
|
||||||
|
|
||||||
|
# if f32 desired, convert any float16 to float32
|
||||||
|
if self.ftype == 0 and data_dtype == np.float16:
|
||||||
|
data = data.astype(np.float32)
|
||||||
|
|
||||||
|
# 1d tensors need to be converted to float32
|
||||||
|
if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1:
|
||||||
|
data = data.astype(np.float32)
|
||||||
|
|
||||||
|
# if f16 desired, convert any float32 2-dim weight tensors to float16
|
||||||
|
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
|
||||||
|
data = data.astype(np.float16)
|
||||||
|
|
||||||
|
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
|
||||||
|
|
||||||
|
self.gguf_writer.add_tensor(new_name, data)
|
||||||
|
|
||||||
|
if len(experts) > 0:
|
||||||
|
raise ValueError(f"Unprocessed experts: {experts.keys()}")
|
||||||
|
|
||||||
|
|
||||||
@Model.register("GrokForCausalLM")
|
@Model.register("GrokForCausalLM")
|
||||||
|
@ -1080,6 +1337,92 @@ class GrokModel(Model):
|
||||||
super().set_gguf_parameters()
|
super().set_gguf_parameters()
|
||||||
self.gguf_writer.add_name("Grok")
|
self.gguf_writer.add_name("Grok")
|
||||||
|
|
||||||
|
def write_tensors(self):
|
||||||
|
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))
|
||||||
|
tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
|
||||||
|
n_experts = self.hparams.get("num_local_experts")
|
||||||
|
experts = dict()
|
||||||
|
for name, data_torch in self.get_tensors():
|
||||||
|
# we don't need these
|
||||||
|
if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq")):
|
||||||
|
continue
|
||||||
|
|
||||||
|
old_dtype = data_torch.dtype
|
||||||
|
|
||||||
|
# convert any unsupported data types to float32
|
||||||
|
if data_torch.dtype not in (torch.float16, torch.float32):
|
||||||
|
data_torch = data_torch.to(torch.float32)
|
||||||
|
|
||||||
|
data = data_torch.squeeze().numpy()
|
||||||
|
|
||||||
|
# process the experts separately
|
||||||
|
if name.find(".moe.") != -1:
|
||||||
|
experts[name] = data
|
||||||
|
if len(experts) >= n_experts:
|
||||||
|
# merge the experts into a single 3d tensor
|
||||||
|
for bid in range(block_count):
|
||||||
|
for wid in ["linear", "linear_1", "linear_v"]:
|
||||||
|
full = True
|
||||||
|
for xid in range(n_experts):
|
||||||
|
ename = f"transformer.decoder_layer.{bid}.moe.{xid}.{wid}.weight"
|
||||||
|
if ename not in experts:
|
||||||
|
full = False
|
||||||
|
break
|
||||||
|
if not full:
|
||||||
|
continue
|
||||||
|
|
||||||
|
datas = []
|
||||||
|
for xid in range(n_experts):
|
||||||
|
ename = f"transformer.decoder_layer.{bid}.moe.{xid}.{wid}.weight"
|
||||||
|
datas.append(experts[ename])
|
||||||
|
del experts[ename]
|
||||||
|
|
||||||
|
data = np.stack(datas, axis=0)
|
||||||
|
data_dtype = data.dtype
|
||||||
|
|
||||||
|
if self.ftype == 0 and data_dtype == np.float16:
|
||||||
|
data = data.astype(np.float32)
|
||||||
|
|
||||||
|
if self.ftype == 1 and data_dtype == np.float32:
|
||||||
|
data = data.astype(np.float16)
|
||||||
|
|
||||||
|
merged_name = f"transformer.decoder_layer.{bid}.moe.{wid}.weight"
|
||||||
|
|
||||||
|
new_name = tensor_map.get_name(merged_name, try_suffixes=(".weight", ".bias"))
|
||||||
|
if new_name is None:
|
||||||
|
print(f"Can not map tensor {name!r}")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
print(f"{new_name}, n_dims = {len(data.shape)}, shape = {data.shape} --> {data.dtype}")
|
||||||
|
|
||||||
|
self.gguf_writer.add_tensor(new_name, data)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# map tensor names
|
||||||
|
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
|
||||||
|
if new_name is None:
|
||||||
|
print(f"Can not map tensor {name!r}")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
n_dims = len(data.shape)
|
||||||
|
data_dtype = data.dtype
|
||||||
|
|
||||||
|
# if f32 desired, convert any float16 to float32
|
||||||
|
if self.ftype == 0 and data_dtype == np.float16:
|
||||||
|
data = data.astype(np.float32)
|
||||||
|
|
||||||
|
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
|
||||||
|
if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1:
|
||||||
|
data = data.astype(np.float32)
|
||||||
|
|
||||||
|
# if f16 desired, convert any float32 2-dim weight tensors to float16
|
||||||
|
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
|
||||||
|
data = data.astype(np.float16)
|
||||||
|
|
||||||
|
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
|
||||||
|
|
||||||
|
self.gguf_writer.add_tensor(new_name, data)
|
||||||
|
|
||||||
|
|
||||||
@Model.register("MiniCPMForCausalLM")
|
@Model.register("MiniCPMForCausalLM")
|
||||||
class MiniCPMModel(Model):
|
class MiniCPMModel(Model):
|
||||||
|
@ -1099,7 +1442,7 @@ class MiniCPMModel(Model):
|
||||||
self.gguf_writer.add_file_type(self.ftype)
|
self.gguf_writer.add_file_type(self.ftype)
|
||||||
|
|
||||||
def set_vocab(self):
|
def set_vocab(self):
|
||||||
self._set_vocab_hf()
|
self._set_vocab_llama_hf()
|
||||||
|
|
||||||
def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
|
def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
|
||||||
if n_kv_head is not None and n_head != n_kv_head:
|
if n_kv_head is not None and n_head != n_kv_head:
|
||||||
|
@ -1700,11 +2043,8 @@ class BertModel(Model):
|
||||||
self.gguf_writer.add_pooling_type(pooling_type)
|
self.gguf_writer.add_pooling_type(pooling_type)
|
||||||
|
|
||||||
def set_vocab(self):
|
def set_vocab(self):
|
||||||
path = self.dir_model
|
|
||||||
added_tokens_path = self.dir_model if self.dir_model.exists() else None
|
|
||||||
|
|
||||||
# use huggingface vocab to get all tokens
|
# use huggingface vocab to get all tokens
|
||||||
vocab = HfVocab(path, added_tokens_path)
|
vocab = LlamaHfVocab(self.dir_model, ignore_nonllama=True)
|
||||||
tokens, scores, toktypes = zip(*vocab.all_tokens())
|
tokens, scores, toktypes = zip(*vocab.all_tokens())
|
||||||
assert len(tokens) == vocab.vocab_size
|
assert len(tokens) == vocab.vocab_size
|
||||||
self.vocab_size = vocab.vocab_size
|
self.vocab_size = vocab.vocab_size
|
||||||
|
|
|
@ -106,12 +106,12 @@ def main():
|
||||||
tensor_map = gguf.get_tensor_name_map(arch, block_count)
|
tensor_map = gguf.get_tensor_name_map(arch, block_count)
|
||||||
print(tensor_map)
|
print(tensor_map)
|
||||||
for name in tensors.keys():
|
for name in tensors.keys():
|
||||||
data = tensors[name]
|
data_torch = tensors[name]
|
||||||
if name.endswith(".self_attention.rotary_emb.inv_freq"):
|
if name.endswith(".self_attention.rotary_emb.inv_freq"):
|
||||||
continue
|
continue
|
||||||
old_dtype = data.dtype
|
old_dtype = data_torch.dtype
|
||||||
# TODO: FP16 conversion produces garbage outputs. (Q8_0 does not, so..?)
|
# TODO: FP16 conversion produces garbage outputs. (Q8_0 does not, so..?)
|
||||||
data = data.to(torch.float32).squeeze().numpy()
|
data = data_torch.to(torch.float32).squeeze().numpy()
|
||||||
new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
|
new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
|
||||||
if new_name is None:
|
if new_name is None:
|
||||||
print("Can not map tensor '" + name + "'")
|
print("Can not map tensor '" + name + "'")
|
||||||
|
|
366
convert.py
366
convert.py
|
@ -16,13 +16,14 @@ import re
|
||||||
import signal
|
import signal
|
||||||
import struct
|
import struct
|
||||||
import sys
|
import sys
|
||||||
|
import textwrap
|
||||||
import time
|
import time
|
||||||
import zipfile
|
import zipfile
|
||||||
from abc import ABCMeta, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import IO, TYPE_CHECKING, Any, Callable, Iterable, Literal, TypeVar
|
from typing import TYPE_CHECKING, Any, Callable, ClassVar, IO, Iterable, Literal, Protocol, TypeVar, runtime_checkable
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sentencepiece import SentencePieceProcessor
|
from sentencepiece import SentencePieceProcessor
|
||||||
|
@ -43,6 +44,9 @@ ARCH = gguf.MODEL_ARCH.LLAMA
|
||||||
|
|
||||||
DEFAULT_CONCURRENCY = 8
|
DEFAULT_CONCURRENCY = 8
|
||||||
|
|
||||||
|
ADDED_TOKENS_FILE = 'added_tokens.json'
|
||||||
|
FAST_TOKENIZER_FILE = 'tokenizer.json'
|
||||||
|
|
||||||
#
|
#
|
||||||
# data types
|
# data types
|
||||||
#
|
#
|
||||||
|
@ -188,8 +192,10 @@ class Params:
|
||||||
n_layer = next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
|
n_layer = next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
|
||||||
|
|
||||||
if n_layer < 1:
|
if n_layer < 1:
|
||||||
raise Exception("failed to guess 'n_layer'. This model is unknown or unsupported.\n"
|
msg = """\
|
||||||
"Suggestion: provide 'config.json' of the model in the same directory containing model files.")
|
failed to guess 'n_layer'. This model is unknown or unsupported.
|
||||||
|
Suggestion: provide 'config.json' of the model in the same directory containing model files."""
|
||||||
|
raise KeyError(textwrap.dedent(msg))
|
||||||
|
|
||||||
n_head = n_embd // 128 # guessed
|
n_head = n_embd // 128 # guessed
|
||||||
n_mult = 256 # guessed
|
n_mult = 256 # guessed
|
||||||
|
@ -211,7 +217,8 @@ class Params:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
|
def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
|
||||||
config = json.load(open(config_path))
|
with open(config_path) as f:
|
||||||
|
config = json.load(f)
|
||||||
|
|
||||||
rope_scaling_type = f_rope_scale = n_orig_ctx = rope_finetuned = None
|
rope_scaling_type = f_rope_scale = n_orig_ctx = rope_finetuned = None
|
||||||
rope_scaling = config.get("rope_scaling")
|
rope_scaling = config.get("rope_scaling")
|
||||||
|
@ -233,8 +240,10 @@ class Params:
|
||||||
elif "max_position_embeddings" in config:
|
elif "max_position_embeddings" in config:
|
||||||
n_ctx = config["max_position_embeddings"]
|
n_ctx = config["max_position_embeddings"]
|
||||||
else:
|
else:
|
||||||
raise Exception("failed to guess 'n_ctx'. This model is unknown or unsupported.\n"
|
msg = """\
|
||||||
"Suggestion: provide 'config.json' of the model in the same directory containing model files.")
|
failed to guess 'n_ctx'. This model is unknown or unsupported.
|
||||||
|
Suggestion: provide 'config.json' of the model in the same directory containing model files."""
|
||||||
|
raise KeyError(textwrap.dedent(msg))
|
||||||
|
|
||||||
n_experts = None
|
n_experts = None
|
||||||
n_experts_used = None
|
n_experts_used = None
|
||||||
|
@ -265,7 +274,8 @@ class Params:
|
||||||
# {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8, "n_layers": 80, "norm_eps": 1e-05, "vocab_size": -1}
|
# {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8, "n_layers": 80, "norm_eps": 1e-05, "vocab_size": -1}
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
|
def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
|
||||||
config = json.load(open(config_path))
|
with open(config_path) as f:
|
||||||
|
config = json.load(f)
|
||||||
|
|
||||||
n_experts = None
|
n_experts = None
|
||||||
n_experts_used = None
|
n_experts_used = None
|
||||||
|
@ -331,47 +341,86 @@ class Params:
|
||||||
# vocab
|
# vocab
|
||||||
#
|
#
|
||||||
|
|
||||||
class BpeVocab:
|
@runtime_checkable
|
||||||
|
class BaseVocab(Protocol):
|
||||||
|
tokenizer_model: ClassVar[str]
|
||||||
|
name: ClassVar[str]
|
||||||
|
|
||||||
|
|
||||||
|
class NoVocab(BaseVocab):
|
||||||
|
tokenizer_model = "no_vocab"
|
||||||
|
name = "no_vocab"
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return "<NoVocab for a model without integrated vocabulary>"
|
||||||
|
|
||||||
|
|
||||||
|
@runtime_checkable
|
||||||
|
class Vocab(BaseVocab, Protocol):
|
||||||
|
vocab_size: int
|
||||||
|
added_tokens_dict: dict[str, int]
|
||||||
|
added_tokens_list: list[str]
|
||||||
|
fname_tokenizer: Path
|
||||||
|
|
||||||
|
def __init__(self, base_path: Path): ...
|
||||||
|
def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: ...
|
||||||
|
|
||||||
|
|
||||||
|
class BpeVocab(Vocab):
|
||||||
tokenizer_model = "gpt2"
|
tokenizer_model = "gpt2"
|
||||||
name = "bpe"
|
name = "bpe"
|
||||||
|
|
||||||
def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None:
|
def __init__(self, base_path: Path):
|
||||||
self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read())
|
added_tokens: dict[str, int] = {}
|
||||||
if isinstance(self.bpe_tokenizer.get('model'), dict):
|
|
||||||
self.vocab = self.bpe_tokenizer["model"]["vocab"]
|
|
||||||
else:
|
|
||||||
self.vocab = self.bpe_tokenizer
|
|
||||||
added_tokens: dict[str, int]
|
|
||||||
if fname_added_tokens is not None:
|
|
||||||
# FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.
|
|
||||||
added_tokens = json.load(open(fname_added_tokens, encoding="utf-8"))
|
|
||||||
else:
|
|
||||||
# Fall back to trying to find the added tokens in tokenizer.json
|
|
||||||
tokenizer_json_file = fname_tokenizer.parent / 'tokenizer.json'
|
|
||||||
if not tokenizer_json_file.is_file():
|
|
||||||
added_tokens = {}
|
|
||||||
else:
|
|
||||||
tokenizer_json = json.load(open(tokenizer_json_file, encoding="utf-8"))
|
|
||||||
added_tokens = dict(
|
|
||||||
(item['content'], item['id'])
|
|
||||||
for item in tokenizer_json.get('added_tokens', [])
|
|
||||||
# Added tokens here can be duplicates of the main vocabulary.
|
|
||||||
if item['content'] not in self.bpe_tokenizer)
|
|
||||||
|
|
||||||
vocab_size: int = len(self.vocab)
|
if (fname_tokenizer := base_path / 'vocab.json').exists():
|
||||||
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
|
# "slow" tokenizer
|
||||||
actual_ids = sorted(added_tokens.values())
|
with open(fname_tokenizer, encoding="utf-8") as f:
|
||||||
|
self.vocab = json.load(f)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.
|
||||||
|
with open(base_path / ADDED_TOKENS_FILE, encoding="utf-8") as f:
|
||||||
|
added_tokens = json.load(f)
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# "fast" tokenizer
|
||||||
|
fname_tokenizer = base_path / FAST_TOKENIZER_FILE
|
||||||
|
|
||||||
|
# if this fails, FileNotFoundError propagates to caller
|
||||||
|
with open(fname_tokenizer, encoding="utf-8") as f:
|
||||||
|
tokenizer_json = json.load(f)
|
||||||
|
|
||||||
|
tokenizer_model: dict[str, Any] = tokenizer_json['model']
|
||||||
|
if (
|
||||||
|
tokenizer_model['type'] != 'BPE' or tokenizer_model.get('byte_fallback', False)
|
||||||
|
or tokenizer_json['decoder']['type'] != 'ByteLevel'
|
||||||
|
):
|
||||||
|
raise FileNotFoundError('Cannot find GPT-2 BPE tokenizer')
|
||||||
|
|
||||||
|
self.vocab = tokenizer_model["vocab"]
|
||||||
|
|
||||||
|
if (added := tokenizer_json.get('added_tokens')) is not None:
|
||||||
|
# Added tokens here can be duplicates of the main vocabulary.
|
||||||
|
added_tokens = {item['content']: item['id']
|
||||||
|
for item in added
|
||||||
|
if item['content'] not in self.vocab}
|
||||||
|
|
||||||
|
vocab_size = len(self.vocab)
|
||||||
|
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
|
||||||
|
actual_ids = sorted(added_tokens.values())
|
||||||
if expected_ids != actual_ids:
|
if expected_ids != actual_ids:
|
||||||
expected_end_id = vocab_size + len(actual_ids) - 1
|
expected_end_id = vocab_size + len(actual_ids) - 1
|
||||||
raise Exception(f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range {vocab_size} - {expected_end_id}; got {actual_ids}")
|
raise ValueError(f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
|
||||||
|
f"{vocab_size} - {expected_end_id}; got {actual_ids}")
|
||||||
|
|
||||||
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
|
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
|
||||||
self.added_tokens_dict = added_tokens
|
self.added_tokens_dict = added_tokens
|
||||||
self.added_tokens_list = [text for (text, idx) in items]
|
self.added_tokens_list = [text for (text, idx) in items]
|
||||||
self.vocab_size_base: int = vocab_size
|
self.vocab_size_base = vocab_size
|
||||||
self.vocab_size: int = self.vocab_size_base + len(self.added_tokens_list)
|
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
||||||
self.fname_tokenizer = fname_tokenizer
|
self.fname_tokenizer = fname_tokenizer
|
||||||
self.fname_added_tokens = fname_added_tokens
|
|
||||||
|
|
||||||
def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||||
reverse_vocab = {id: encoded_tok for encoded_tok, id in self.vocab.items()}
|
reverse_vocab = {id: encoded_tok for encoded_tok, id in self.vocab.items()}
|
||||||
|
@ -392,19 +441,25 @@ class BpeVocab:
|
||||||
return f"<BpeVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
|
return f"<BpeVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
|
||||||
|
|
||||||
|
|
||||||
class SentencePieceVocab:
|
class SentencePieceVocab(Vocab):
|
||||||
tokenizer_model = "llama"
|
tokenizer_model = "llama"
|
||||||
name = "spm"
|
name = "spm"
|
||||||
|
|
||||||
def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None:
|
def __init__(self, base_path: Path):
|
||||||
self.sentencepiece_tokenizer = SentencePieceProcessor(str(fname_tokenizer))
|
added_tokens: dict[str, int] = {}
|
||||||
added_tokens: dict[str, int]
|
if (fname_tokenizer := base_path / 'tokenizer.model').exists():
|
||||||
if fname_added_tokens is not None:
|
# normal location
|
||||||
added_tokens = json.load(open(fname_added_tokens, encoding="utf-8"))
|
try:
|
||||||
else:
|
with open(base_path / ADDED_TOKENS_FILE, encoding="utf-8") as f:
|
||||||
added_tokens = {}
|
added_tokens = json.load(f)
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
elif not (fname_tokenizer := base_path.parent / 'tokenizer.model').exists():
|
||||||
|
# not found in alternate location either
|
||||||
|
raise FileNotFoundError('Cannot find tokenizer.model')
|
||||||
|
|
||||||
vocab_size: int = self.sentencepiece_tokenizer.vocab_size()
|
self.sentencepiece_tokenizer = SentencePieceProcessor(str(fname_tokenizer))
|
||||||
|
vocab_size = self.sentencepiece_tokenizer.vocab_size()
|
||||||
|
|
||||||
new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size}
|
new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size}
|
||||||
expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens)))
|
expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens)))
|
||||||
|
@ -414,18 +469,17 @@ class SentencePieceVocab:
|
||||||
raise ValueError(f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}")
|
raise ValueError(f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}")
|
||||||
|
|
||||||
# Token pieces that were added to the base vocabulary.
|
# Token pieces that were added to the base vocabulary.
|
||||||
self.added_tokens_dict = added_tokens
|
self.added_tokens_dict = added_tokens
|
||||||
self.added_tokens_list = [new_tokens[id] for id in actual_new_ids]
|
self.added_tokens_list = [new_tokens[id] for id in actual_new_ids]
|
||||||
self.vocab_size_base = vocab_size
|
self.vocab_size_base = vocab_size
|
||||||
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
||||||
self.fname_tokenizer = fname_tokenizer
|
self.fname_tokenizer = fname_tokenizer
|
||||||
self.fname_added_tokens = fname_added_tokens
|
|
||||||
|
|
||||||
def sentencepiece_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
def sentencepiece_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||||
tokenizer = self.sentencepiece_tokenizer
|
tokenizer = self.sentencepiece_tokenizer
|
||||||
for i in range(tokenizer.vocab_size()):
|
for i in range(tokenizer.vocab_size()):
|
||||||
piece = tokenizer.id_to_piece(i)
|
piece = tokenizer.id_to_piece(i)
|
||||||
text: bytes = piece.encode("utf-8")
|
text = piece.encode("utf-8")
|
||||||
score: float = tokenizer.get_score(i)
|
score: float = tokenizer.get_score(i)
|
||||||
|
|
||||||
toktype = gguf.TokenType.NORMAL
|
toktype = gguf.TokenType.NORMAL
|
||||||
|
@ -458,27 +512,42 @@ class SentencePieceVocab:
|
||||||
return f"<SentencePieceVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
|
return f"<SentencePieceVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
|
||||||
|
|
||||||
|
|
||||||
class HfVocab:
|
class LlamaHfVocab(Vocab):
|
||||||
tokenizer_model = "llama"
|
tokenizer_model = "llama"
|
||||||
name = "hfft"
|
name = "hfft"
|
||||||
|
|
||||||
def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None = None) -> None:
|
def __init__(self, base_path: Path, ignore_nonllama: bool = False):
|
||||||
|
fname_tokenizer = base_path / FAST_TOKENIZER_FILE
|
||||||
|
# if this fails, FileNotFoundError propagates to caller
|
||||||
|
with open(fname_tokenizer, encoding='utf-8') as f:
|
||||||
|
tokenizer_json = json.load(f)
|
||||||
|
|
||||||
|
# pre-check so we know if we need transformers
|
||||||
|
tokenizer_model: dict[str, Any] = tokenizer_json['model']
|
||||||
|
if ignore_nonllama:
|
||||||
|
pass # workaround incorrect use of this class for WordPiece
|
||||||
|
elif (
|
||||||
|
tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False)
|
||||||
|
or tokenizer_json['decoder']['type'] != 'Sequence'
|
||||||
|
):
|
||||||
|
raise FileNotFoundError('Cannot find Llama BPE tokenizer')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"To use HfVocab, please install the `transformers` package. "
|
"To use LlamaHfVocab, please install the `transformers` package. "
|
||||||
"You can install it with `pip install transformers`."
|
"You can install it with `pip install transformers`."
|
||||||
) from e
|
) from e
|
||||||
|
|
||||||
print("fname_tokenizer:", fname_tokenizer)
|
|
||||||
# Allow the tokenizer to default to slow or fast versions.
|
# Allow the tokenizer to default to slow or fast versions.
|
||||||
# Explicitly set tokenizer to use local paths.
|
# Explicitly set tokenizer to use local paths.
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(
|
self.tokenizer = AutoTokenizer.from_pretrained(
|
||||||
fname_tokenizer,
|
base_path,
|
||||||
cache_dir=fname_tokenizer,
|
cache_dir=base_path,
|
||||||
local_files_only=True,
|
local_files_only=True,
|
||||||
)
|
)
|
||||||
|
assert self.tokenizer.is_fast # assume tokenizer.json is used
|
||||||
|
|
||||||
# Initialize lists and dictionaries for added tokens
|
# Initialize lists and dictionaries for added tokens
|
||||||
self.added_tokens_list = []
|
self.added_tokens_list = []
|
||||||
|
@ -506,8 +575,7 @@ class HfVocab:
|
||||||
self.vocab_size_base = self.tokenizer.vocab_size
|
self.vocab_size_base = self.tokenizer.vocab_size
|
||||||
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
|
||||||
|
|
||||||
self.fname_tokenizer = fname_tokenizer
|
self.fname_tokenizer = fname_tokenizer
|
||||||
self.fname_added_tokens = fname_added_tokens
|
|
||||||
|
|
||||||
def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||||
reverse_vocab = {
|
reverse_vocab = {
|
||||||
|
@ -559,18 +627,7 @@ class HfVocab:
|
||||||
yield from self.added_tokens()
|
yield from self.added_tokens()
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"<HfVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
|
return f"<LlamaHfVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
|
||||||
|
|
||||||
|
|
||||||
class NoVocab:
|
|
||||||
tokenizer_model = "no_vocab"
|
|
||||||
name = "no_vocab"
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return "<NoVocab for a model without integrated vocabulary>"
|
|
||||||
|
|
||||||
|
|
||||||
Vocab: TypeAlias = "BpeVocab | SentencePieceVocab | HfVocab | NoVocab"
|
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -588,7 +645,7 @@ def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
|
||||||
.reshape(weights.shape))
|
.reshape(weights.shape))
|
||||||
|
|
||||||
|
|
||||||
class Tensor(metaclass=ABCMeta):
|
class Tensor(ABC):
|
||||||
data_type: DataType
|
data_type: DataType
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
@ -610,7 +667,7 @@ def bf16_to_fp32(bf16_arr: np.ndarray[Any, np.dtype[np.uint16]]) -> NDArray:
|
||||||
|
|
||||||
|
|
||||||
class UnquantizedTensor(Tensor):
|
class UnquantizedTensor(Tensor):
|
||||||
def __init__(self, ndarray: NDArray) -> None:
|
def __init__(self, ndarray: NDArray):
|
||||||
assert isinstance(ndarray, np.ndarray)
|
assert isinstance(ndarray, np.ndarray)
|
||||||
self.ndarray = ndarray
|
self.ndarray = ndarray
|
||||||
self.data_type = NUMPY_TYPE_TO_DATA_TYPE[ndarray.dtype]
|
self.data_type = NUMPY_TYPE_TO_DATA_TYPE[ndarray.dtype]
|
||||||
|
@ -689,7 +746,7 @@ class ModelPlus:
|
||||||
model: LazyModel
|
model: LazyModel
|
||||||
paths: list[Path] # Where this was read from.
|
paths: list[Path] # Where this was read from.
|
||||||
format: Literal['ggml', 'torch', 'safetensors', 'none']
|
format: Literal['ggml', 'torch', 'safetensors', 'none']
|
||||||
vocab: Vocab | None # For GGML models (which have vocab built in), the vocab.
|
vocab: BaseVocab | None # For GGML models (which have vocab built in), the vocab.
|
||||||
|
|
||||||
|
|
||||||
def merge_sharded(models: list[LazyModel]) -> LazyModel:
|
def merge_sharded(models: list[LazyModel]) -> LazyModel:
|
||||||
|
@ -698,7 +755,7 @@ def merge_sharded(models: list[LazyModel]) -> LazyModel:
|
||||||
names = {name: None for model in models for name in model}
|
names = {name: None for model in models for name in model}
|
||||||
|
|
||||||
def convert(name: str) -> LazyTensor:
|
def convert(name: str) -> LazyTensor:
|
||||||
lazy_tensors: list[LazyTensor] = [model[name] for model in models]
|
lazy_tensors = [model[name] for model in models]
|
||||||
if len(lazy_tensors) == 1:
|
if len(lazy_tensors) == 1:
|
||||||
# only one file; don't go through this procedure since there might
|
# only one file; don't go through this procedure since there might
|
||||||
# be quantized tensors
|
# be quantized tensors
|
||||||
|
@ -719,7 +776,7 @@ def merge_sharded(models: list[LazyModel]) -> LazyModel:
|
||||||
|
|
||||||
def load() -> UnquantizedTensor:
|
def load() -> UnquantizedTensor:
|
||||||
ndarrays = [load_unquantized(tensor) for tensor in lazy_tensors]
|
ndarrays = [load_unquantized(tensor) for tensor in lazy_tensors]
|
||||||
concatenated: NDArray = np.concatenate(ndarrays, axis=axis)
|
concatenated = np.concatenate(ndarrays, axis=axis)
|
||||||
return UnquantizedTensor(concatenated)
|
return UnquantizedTensor(concatenated)
|
||||||
description = 'concatenated[[' + '] | ['.join(lt.description for lt in lazy_tensors) + ']]'
|
description = 'concatenated[[' + '] | ['.join(lt.description for lt in lazy_tensors) + ']]'
|
||||||
return LazyTensor(load, concatenated_shape, lazy_tensors[0].data_type, description)
|
return LazyTensor(load, concatenated_shape, lazy_tensors[0].data_type, description)
|
||||||
|
@ -771,6 +828,15 @@ def part_lazy(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
|
||||||
return LazyTensor(load, s, lazy_tensor.data_type, 'part ' + lazy_tensor.description)
|
return LazyTensor(load, s, lazy_tensor.data_type, 'part ' + lazy_tensor.description)
|
||||||
|
|
||||||
|
|
||||||
|
def pack_experts_lazy(lazy_tensors: list[LazyTensor]) -> LazyTensor:
|
||||||
|
def load() -> Tensor:
|
||||||
|
tensors = [lazy_tensor.load() for lazy_tensor in lazy_tensors]
|
||||||
|
return UnquantizedTensor(np.array([tensor.ndarray for tensor in tensors]))
|
||||||
|
s = lazy_tensors[0].shape.copy()
|
||||||
|
s.insert(0, len(lazy_tensors))
|
||||||
|
return LazyTensor(load, s, lazy_tensors[0].data_type, 'pack_experts ' + ' | '.join(lt.description for lt in lazy_tensors))
|
||||||
|
|
||||||
|
|
||||||
# Functionality that simulates `torch.load` but where individual tensors are
|
# Functionality that simulates `torch.load` but where individual tensors are
|
||||||
# only loaded into memory on demand, not all at once.
|
# only loaded into memory on demand, not all at once.
|
||||||
# PyTorch can't do this natively as of time of writing:
|
# PyTorch can't do this natively as of time of writing:
|
||||||
|
@ -807,10 +873,10 @@ class LazyUnpickler(pickle.Unpickler):
|
||||||
|
|
||||||
def load(offset: int, elm_count: int) -> NDArray:
|
def load(offset: int, elm_count: int) -> NDArray:
|
||||||
dtype = data_type.dtype
|
dtype = data_type.dtype
|
||||||
fp = self.zip_file.open(info)
|
with self.zip_file.open(info) as fp:
|
||||||
fp.seek(offset * dtype.itemsize)
|
fp.seek(offset * dtype.itemsize)
|
||||||
size = elm_count * dtype.itemsize
|
size = elm_count * dtype.itemsize
|
||||||
data = fp.read(size)
|
data = fp.read(size)
|
||||||
assert len(data) == size
|
assert len(data) == size
|
||||||
return np.frombuffer(data, dtype)
|
return np.frombuffer(data, dtype)
|
||||||
description = f'storage data_type={data_type} path-in-zip={filename} path={self.zip_file.filename}'
|
description = f'storage data_type={data_type} path-in-zip={filename} path={self.zip_file.filename}'
|
||||||
|
@ -831,7 +897,7 @@ class LazyUnpickler(pickle.Unpickler):
|
||||||
def rebuild_from_type_v2(func, new_type, args, state):
|
def rebuild_from_type_v2(func, new_type, args, state):
|
||||||
return func(*args)
|
return func(*args)
|
||||||
|
|
||||||
CLASSES: dict[tuple[str, str], Any] = {
|
CLASSES = {
|
||||||
# getattr used here as a workaround for mypy not being smart enough to determine
|
# getattr used here as a workaround for mypy not being smart enough to determine
|
||||||
# the staticmethods have a __func__ attribute.
|
# the staticmethods have a __func__ attribute.
|
||||||
('torch._tensor', '_rebuild_from_type_v2'): getattr(rebuild_from_type_v2, '__func__'),
|
('torch._tensor', '_rebuild_from_type_v2'): getattr(rebuild_from_type_v2, '__func__'),
|
||||||
|
@ -890,7 +956,7 @@ def lazy_load_safetensors_file(fp: IO[bytes], path: Path) -> ModelPlus:
|
||||||
def must_read(fp: IO[bytes], length: int) -> bytes:
|
def must_read(fp: IO[bytes], length: int) -> bytes:
|
||||||
ret = fp.read(length)
|
ret = fp.read(length)
|
||||||
if len(ret) < length:
|
if len(ret) < length:
|
||||||
raise Exception("unexpectedly reached end of file")
|
raise EOFError("unexpectedly reached end of file")
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
@ -948,13 +1014,14 @@ def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], conc
|
||||||
yield result
|
yield result
|
||||||
|
|
||||||
|
|
||||||
def check_vocab_size(params: Params, vocab: Vocab, pad_vocab: bool = False) -> None:
|
def check_vocab_size(params: Params, vocab: BaseVocab, pad_vocab: bool = False) -> None:
|
||||||
# Handle special case where the model's vocab size is not set
|
# Handle special case where the model's vocab size is not set
|
||||||
if params.n_vocab == -1:
|
if params.n_vocab == -1:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"The model's vocab size is set to -1 in params.json. Please update it manually.{f' Maybe {vocab.vocab_size}?' if hasattr(vocab, 'vocab_size') else ''}"
|
"The model's vocab size is set to -1 in params.json. Please update it manually."
|
||||||
|
+ (f" Maybe {vocab.vocab_size}?" if isinstance(vocab, Vocab) else ""),
|
||||||
)
|
)
|
||||||
if isinstance(vocab, NoVocab):
|
if not isinstance(vocab, Vocab):
|
||||||
return # model has no vocab
|
return # model has no vocab
|
||||||
|
|
||||||
# Check for a vocab size mismatch
|
# Check for a vocab size mismatch
|
||||||
|
@ -979,11 +1046,11 @@ def check_vocab_size(params: Params, vocab: Vocab, pad_vocab: bool = False) -> N
|
||||||
if vocab.vocab_size < params.n_vocab:
|
if vocab.vocab_size < params.n_vocab:
|
||||||
msg += " Add the --pad-vocab option and try again."
|
msg += " Add the --pad-vocab option and try again."
|
||||||
|
|
||||||
raise Exception(msg)
|
raise ValueError(msg)
|
||||||
|
|
||||||
|
|
||||||
class OutputFile:
|
class OutputFile:
|
||||||
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
|
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE):
|
||||||
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
|
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
|
||||||
|
|
||||||
def add_meta_arch(self, params: Params) -> None:
|
def add_meta_arch(self, params: Params) -> None:
|
||||||
|
@ -1034,8 +1101,6 @@ class OutputFile:
|
||||||
self.gguf.add_file_type(params.ftype)
|
self.gguf.add_file_type(params.ftype)
|
||||||
|
|
||||||
def extract_vocabulary_from_model(self, vocab: Vocab) -> tuple[list[bytes], list[float], list[gguf.TokenType]]:
|
def extract_vocabulary_from_model(self, vocab: Vocab) -> tuple[list[bytes], list[float], list[gguf.TokenType]]:
|
||||||
assert not isinstance(vocab, NoVocab)
|
|
||||||
|
|
||||||
tokens = []
|
tokens = []
|
||||||
scores = []
|
scores = []
|
||||||
toktypes = []
|
toktypes = []
|
||||||
|
@ -1135,7 +1200,7 @@ class OutputFile:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def write_all(
|
def write_all(
|
||||||
fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab,
|
fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: BaseVocab, svocab: gguf.SpecialVocab,
|
||||||
concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE,
|
concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE,
|
||||||
pad_vocab: bool = False,
|
pad_vocab: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
@ -1145,11 +1210,11 @@ class OutputFile:
|
||||||
|
|
||||||
# meta data
|
# meta data
|
||||||
of.add_meta_arch(params)
|
of.add_meta_arch(params)
|
||||||
if isinstance(vocab, NoVocab):
|
if isinstance(vocab, Vocab):
|
||||||
of.gguf.add_tokenizer_model(vocab.tokenizer_model)
|
|
||||||
else:
|
|
||||||
of.add_meta_vocab(vocab)
|
of.add_meta_vocab(vocab)
|
||||||
of.add_meta_special_vocab(svocab)
|
of.add_meta_special_vocab(svocab)
|
||||||
|
else: # NoVocab
|
||||||
|
of.gguf.add_tokenizer_model(vocab.tokenizer_model)
|
||||||
|
|
||||||
# tensor info
|
# tensor info
|
||||||
for name, lazy_tensor in model.items():
|
for name, lazy_tensor in model.items():
|
||||||
|
@ -1176,7 +1241,7 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
|
||||||
|
|
||||||
name_to_type = {name: lazy_tensor.data_type for (name, lazy_tensor) in model.items()}
|
name_to_type = {name: lazy_tensor.data_type for (name, lazy_tensor) in model.items()}
|
||||||
|
|
||||||
raise Exception(f"Unexpected combination of types: {name_to_type}")
|
raise ValueError(f"Unexpected combination of types: {name_to_type}")
|
||||||
|
|
||||||
|
|
||||||
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
|
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
|
||||||
|
@ -1186,10 +1251,26 @@ def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyM
|
||||||
|
|
||||||
def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) -> LazyModel:
|
def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) -> LazyModel:
|
||||||
tmap = gguf.TensorNameMap(ARCH, params.n_layer)
|
tmap = gguf.TensorNameMap(ARCH, params.n_layer)
|
||||||
should_skip: set[gguf.MODEL_TENSOR] = set(gguf.MODEL_TENSOR_SKIP.get(ARCH, []))
|
should_skip = set(gguf.MODEL_TENSOR_SKIP.get(ARCH, []))
|
||||||
|
|
||||||
tmp = model
|
tmp = model
|
||||||
|
|
||||||
|
# merge experts into one tensor
|
||||||
|
if params.n_experts and params.n_experts > 0:
|
||||||
|
for i_l in range(params.n_layer):
|
||||||
|
for w in range(1, 4):
|
||||||
|
experts = []
|
||||||
|
for e in range(params.n_experts):
|
||||||
|
if f"layers.{i_l}.feed_forward.experts.{e}.w{w}.weight" in model:
|
||||||
|
experts.append(model[f"layers.{i_l}.feed_forward.experts.{e}.w{w}.weight"])
|
||||||
|
del tmp[f"layers.{i_l}.feed_forward.experts.{e}.w{w}.weight"]
|
||||||
|
elif f"model.layers.{i_l}.block_sparse_moe.experts.{e}.w{w}.weight" in model:
|
||||||
|
experts.append(model[f"model.layers.{i_l}.block_sparse_moe.experts.{e}.w{w}.weight"])
|
||||||
|
del tmp[f"model.layers.{i_l}.block_sparse_moe.experts.{e}.w{w}.weight"]
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Expert tensor not found: layers.{i_l}.feed_forward.experts.{e}.w{w}.weight")
|
||||||
|
tmp[f"layers.{i_l}.feed_forward.experts.w{w}.weight"] = pack_experts_lazy(experts)
|
||||||
|
|
||||||
# HF models permut or pack some of the tensors, so we need to undo that
|
# HF models permut or pack some of the tensors, so we need to undo that
|
||||||
for i in itertools.count():
|
for i in itertools.count():
|
||||||
if f"model.layers.{i}.self_attn.q_proj.weight" in model:
|
if f"model.layers.{i}.self_attn.q_proj.weight" in model:
|
||||||
|
@ -1213,8 +1294,7 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
|
||||||
if skip_unknown:
|
if skip_unknown:
|
||||||
print(f"Unexpected tensor name: {name} - skipping")
|
print(f"Unexpected tensor name: {name} - skipping")
|
||||||
continue
|
continue
|
||||||
else:
|
raise ValueError(f"Unexpected tensor name: {name}. Use --skip-unknown to ignore it (e.g. LLaVA)")
|
||||||
raise Exception(f"Unexpected tensor name: {name}. Use --skip-unknown to ignore it (e.g. LLaVA)")
|
|
||||||
|
|
||||||
if tensor_type in should_skip:
|
if tensor_type in should_skip:
|
||||||
print(f"skipping tensor {name_new}")
|
print(f"skipping tensor {name_new}")
|
||||||
|
@ -1231,7 +1311,7 @@ def nth_multifile_path(path: Path, n: int) -> Path | None:
|
||||||
the nth path in the model.
|
the nth path in the model.
|
||||||
'''
|
'''
|
||||||
# Support the following patterns:
|
# Support the following patterns:
|
||||||
patterns: list[tuple[str, str]] = [
|
patterns = [
|
||||||
# - x.00.pth, x.01.pth, etc.
|
# - x.00.pth, x.01.pth, etc.
|
||||||
(r'\.[0-9]{2}\.pth$', f'.{n:02}.pth'),
|
(r'\.[0-9]{2}\.pth$', f'.{n:02}.pth'),
|
||||||
# - x-00001-of-00002.bin, x-00002-of-00002.bin, etc.
|
# - x-00001-of-00002.bin, x-00002-of-00002.bin, etc.
|
||||||
|
@ -1277,9 +1357,9 @@ def load_some_model(path: Path) -> ModelPlus:
|
||||||
globs = ["consolidated.00.pth", "pytorch_model-00001-of-*.bin", "*.pt", "pytorch_model.bin"]
|
globs = ["consolidated.00.pth", "pytorch_model-00001-of-*.bin", "*.pt", "pytorch_model.bin"]
|
||||||
files = [file for glob in globs for file in path.glob(glob)]
|
files = [file for glob in globs for file in path.glob(glob)]
|
||||||
if not files:
|
if not files:
|
||||||
raise Exception(f"Can't find model in directory {path}")
|
raise FileNotFoundError(f"Can't find model in directory {path}")
|
||||||
if len(files) > 1:
|
if len(files) > 1:
|
||||||
raise Exception(f"Found multiple models in {path}, not sure which to pick: {files}")
|
raise ValueError(f"Found multiple models in {path}, not sure which to pick: {files}")
|
||||||
path = files[0]
|
path = files[0]
|
||||||
|
|
||||||
paths = find_multifile_paths(path)
|
paths = find_multifile_paths(path)
|
||||||
|
@ -1293,36 +1373,14 @@ def load_some_model(path: Path) -> ModelPlus:
|
||||||
|
|
||||||
|
|
||||||
class VocabFactory:
|
class VocabFactory:
|
||||||
_FILES = {"spm": "tokenizer.model", "bpe": "vocab.json", "hfft": "tokenizer.json"}
|
_VOCAB_CLASSES: list[type[Vocab]] = [SentencePieceVocab, BpeVocab, LlamaHfVocab]
|
||||||
|
|
||||||
def __init__(self, path: Path):
|
def __init__(self, path: Path):
|
||||||
self.path = path
|
self.path = path
|
||||||
self.file_paths = self._detect_files()
|
|
||||||
print(f"Found vocab files: {self.file_paths}")
|
|
||||||
|
|
||||||
def _detect_files(self) -> dict[str, Path | None]:
|
def _create_special_vocab(self, vocab: BaseVocab, model_parent_path: Path) -> gguf.SpecialVocab:
|
||||||
def locate(file: str) -> Path | None:
|
|
||||||
if (path := self.path / file).exists():
|
|
||||||
return path
|
|
||||||
if (path := self.path.parent / file).exists():
|
|
||||||
return path
|
|
||||||
return None
|
|
||||||
|
|
||||||
return {vt: locate(f) for vt, f in self._FILES.items()}
|
|
||||||
|
|
||||||
def _select_file(self, vocab_types: list[str]) -> tuple[str, Path]:
|
|
||||||
for vtype in vocab_types:
|
|
||||||
try:
|
|
||||||
path = self.file_paths[vtype]
|
|
||||||
except KeyError:
|
|
||||||
raise ValueError(f"Unsupported vocabulary type {vtype}") from None
|
|
||||||
if path is not None:
|
|
||||||
return vtype, path
|
|
||||||
raise FileNotFoundError(f"Could not find any of {[self._FILES[vt] for vt in vocab_types]}")
|
|
||||||
|
|
||||||
def _create_special_vocab(self, vocab: Vocab, model_parent_path: Path) -> gguf.SpecialVocab:
|
|
||||||
load_merges = vocab.name == "bpe"
|
load_merges = vocab.name == "bpe"
|
||||||
n_vocab = vocab.vocab_size if hasattr(vocab, "vocab_size") else None
|
n_vocab = vocab.vocab_size if isinstance(vocab, Vocab) else None
|
||||||
return gguf.SpecialVocab(
|
return gguf.SpecialVocab(
|
||||||
model_parent_path,
|
model_parent_path,
|
||||||
load_merges=load_merges,
|
load_merges=load_merges,
|
||||||
|
@ -1331,27 +1389,29 @@ class VocabFactory:
|
||||||
)
|
)
|
||||||
|
|
||||||
def _create_vocab_by_path(self, vocab_types: list[str]) -> Vocab:
|
def _create_vocab_by_path(self, vocab_types: list[str]) -> Vocab:
|
||||||
vocab_type, path = self._select_file(vocab_types)
|
vocab_classes: dict[str, type[Vocab]] = {cls.name: cls for cls in self._VOCAB_CLASSES}
|
||||||
print(f"Loading vocab file {path!r}, type {vocab_type!r}")
|
selected_vocabs: dict[str, type[Vocab]] = {}
|
||||||
|
for vtype in vocab_types:
|
||||||
|
try:
|
||||||
|
selected_vocabs[vtype] = vocab_classes[vtype]
|
||||||
|
except KeyError:
|
||||||
|
raise ValueError(f"Unsupported vocabulary type {vtype}") from None
|
||||||
|
|
||||||
added_tokens_path = path.parent / "added_tokens.json"
|
for vtype, cls in selected_vocabs.items():
|
||||||
if vocab_type == "bpe":
|
try:
|
||||||
return BpeVocab(
|
vocab = cls(self.path)
|
||||||
path, added_tokens_path if added_tokens_path.exists() else None
|
break
|
||||||
)
|
except FileNotFoundError:
|
||||||
if vocab_type == "spm":
|
pass # ignore unavailable tokenizers
|
||||||
return SentencePieceVocab(
|
else:
|
||||||
path, added_tokens_path if added_tokens_path.exists() else None
|
raise FileNotFoundError(f"Could not find a tokenizer matching any of {vocab_types}")
|
||||||
)
|
|
||||||
if vocab_type == "hfft":
|
|
||||||
return HfVocab(
|
|
||||||
path.parent, added_tokens_path if added_tokens_path.exists() else None
|
|
||||||
)
|
|
||||||
raise ValueError(vocab_type)
|
|
||||||
|
|
||||||
def load_vocab(self, vocab_types: list[str], model_parent_path: Path) -> tuple[Vocab, gguf.SpecialVocab]:
|
print(f"Loaded vocab file {vocab.fname_tokenizer!r}, type {vocab.name!r}")
|
||||||
vocab: Vocab
|
return vocab
|
||||||
if len(vocab_types) == 1 and "no_vocab" in vocab_types:
|
|
||||||
|
def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> tuple[BaseVocab, gguf.SpecialVocab]:
|
||||||
|
vocab: BaseVocab
|
||||||
|
if vocab_types is None:
|
||||||
vocab = NoVocab()
|
vocab = NoVocab()
|
||||||
else:
|
else:
|
||||||
vocab = self._create_vocab_by_path(vocab_types)
|
vocab = self._create_vocab_by_path(vocab_types)
|
||||||
|
@ -1408,10 +1468,8 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
|
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
|
||||||
|
|
||||||
args = parser.parse_args(args_in)
|
args = parser.parse_args(args_in)
|
||||||
if args.no_vocab:
|
if args.no_vocab and args.vocab_only:
|
||||||
if args.vocab_only:
|
raise ValueError("--vocab-only does not make sense with --no-vocab")
|
||||||
raise ValueError("no need to specify --vocab-only if using --no-vocab")
|
|
||||||
args.vocab_type = "no_vocab"
|
|
||||||
|
|
||||||
if args.dump_single:
|
if args.dump_single:
|
||||||
model_plus = lazy_load_file(args.model)
|
model_plus = lazy_load_file(args.model)
|
||||||
|
@ -1433,10 +1491,12 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
params = Params.load(model_plus)
|
params = Params.load(model_plus)
|
||||||
if params.n_ctx == -1:
|
if params.n_ctx == -1:
|
||||||
if args.ctx is None:
|
if args.ctx is None:
|
||||||
raise Exception("The model doesn't have a context size, and you didn't specify one with --ctx\n"
|
msg = """\
|
||||||
"Please specify one with --ctx:\n"
|
The model doesn't have a context size, and you didn't specify one with --ctx
|
||||||
" - LLaMA v1: --ctx 2048\n"
|
Please specify one with --ctx:
|
||||||
" - LLaMA v2: --ctx 4096\n")
|
- LLaMA v1: --ctx 2048
|
||||||
|
- LLaMA v2: --ctx 4096"""
|
||||||
|
parser.error(textwrap.dedent(msg))
|
||||||
params.n_ctx = args.ctx
|
params.n_ctx = args.ctx
|
||||||
|
|
||||||
if args.outtype:
|
if args.outtype:
|
||||||
|
@ -1451,9 +1511,11 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
model_parent_path = model_plus.paths[0].parent
|
model_parent_path = model_plus.paths[0].parent
|
||||||
vocab_path = Path(args.vocab_dir or args.model or model_parent_path)
|
vocab_path = Path(args.vocab_dir or args.model or model_parent_path)
|
||||||
vocab_factory = VocabFactory(vocab_path)
|
vocab_factory = VocabFactory(vocab_path)
|
||||||
vocab, special_vocab = vocab_factory.load_vocab(args.vocab_type.split(","), model_parent_path)
|
vocab_types = None if args.no_vocab else args.vocab_type.split(",")
|
||||||
|
vocab, special_vocab = vocab_factory.load_vocab(vocab_types, model_parent_path)
|
||||||
|
|
||||||
if args.vocab_only:
|
if args.vocab_only:
|
||||||
|
assert isinstance(vocab, Vocab)
|
||||||
if not args.outfile:
|
if not args.outfile:
|
||||||
raise ValueError("need --outfile if using --vocab-only")
|
raise ValueError("need --outfile if using --vocab-only")
|
||||||
outfile = args.outfile
|
outfile = args.outfile
|
||||||
|
|
|
@ -10,16 +10,16 @@ There are 2 modes of operation:
|
||||||
- `prompt is shared` - there is a common prompt of size `PP` used by all batches (i.e. `N_KV = PP + B*TG`)
|
- `prompt is shared` - there is a common prompt of size `PP` used by all batches (i.e. `N_KV = PP + B*TG`)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
./batched-bench MODEL_PATH [N_KV_MAX] [IS_PP_SHARED] [NGL] [MMQ] <PP> <TG> <PL>
|
./batched-bench MODEL_PATH [N_KV_MAX] [N_BATCH] [N_UBATCH] [IS_PP_SHARED] [NGL] [MMQ] <PP> <TG> <PL>
|
||||||
|
|
||||||
# LLaMA 7B, F16, N_KV_MAX = 16384 (8GB), prompt not shared
|
# LLaMA 7B, F16, N_KV_MAX = 16384 (8GB), prompt not shared
|
||||||
./batched-bench ./models/llama-7b/ggml-model-f16.gguf 16384 0 99
|
./batched-bench ./models/llama-7b/ggml-model-f16.gguf 16384 2048 512 0 99
|
||||||
|
|
||||||
# LLaMA 7B, Q8_0, N_KV_MAX = 16384 (8GB), prompt is shared
|
# LLaMA 7B, Q8_0, N_KV_MAX = 16384 (8GB), prompt is shared
|
||||||
./batched-bench ./models/llama-7b/ggml-model-q8_0.gguf 16384 1 99
|
./batched-bench ./models/llama-7b/ggml-model-q8_0.gguf 16384 2048 512 1 99
|
||||||
|
|
||||||
# custom set of batches
|
# custom set of batches
|
||||||
./batched-bench ./models/llama-7b/ggml-model-q8_0.gguf 2048 0 999 0 128,256,512 128,256 1,2,4,8,16,32
|
./batched-bench ./models/llama-7b/ggml-model-q8_0.gguf 2048 512 512 0 999 0 128,256,512 128,256 1,2,4,8,16,32
|
||||||
```
|
```
|
||||||
|
|
||||||
## Sample results
|
## Sample results
|
||||||
|
|
|
@ -32,13 +32,15 @@ int main(int argc, char ** argv) {
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
|
|
||||||
if (argc == 1 || argv[1][0] == '-') {
|
if (argc == 1 || argv[1][0] == '-') {
|
||||||
printf("usage: %s MODEL_PATH [N_KV_MAX] [IS_PP_SHARED] [NGL] <PP> <TG> <PL>\n" , argv[0]);
|
printf("usage: %s MODEL_PATH [N_KV_MAX] [N_BATCH] [N_UBATCH] [IS_PP_SHARED] [NGL] <PP> <TG> <PL>\n" , argv[0]);
|
||||||
printf(" <PP>, <TG> and PL are comma-separated lists of numbers without spaces\n\n");
|
printf(" <PP>, <TG> and PL are comma-separated lists of numbers without spaces\n\n");
|
||||||
printf(" example: %s ggml-model-f16.gguf 2048 0 999 128,256,512 128,256 1,2,4,8,16,32\n\n", argv[0]);
|
printf(" example: %s ggml-model-f16.gguf 2048 2048 512 0 999 128,256,512 128,256 1,2,4,8,16,32\n\n", argv[0]);
|
||||||
return 1 ;
|
return 1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
int n_kv_max = 2048;
|
int n_kv_max = 2048;
|
||||||
|
int n_batch = 2048;
|
||||||
|
int n_ubatch = 512;
|
||||||
int is_pp_shared = 0;
|
int is_pp_shared = 0;
|
||||||
int n_gpu_layers = 0;
|
int n_gpu_layers = 0;
|
||||||
|
|
||||||
|
@ -56,23 +58,31 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argc >= 4) {
|
if (argc >= 4) {
|
||||||
is_pp_shared = std::atoi(argv[3]);
|
n_batch = std::atoi(argv[3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argc >= 5) {
|
if (argc >= 5) {
|
||||||
n_gpu_layers = std::atoi(argv[4]);
|
n_ubatch = std::atoi(argv[4]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argc >= 6) {
|
if (argc >= 6) {
|
||||||
n_pp = parse_list(argv[5]);
|
is_pp_shared = std::atoi(argv[5]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argc >= 7) {
|
if (argc >= 7) {
|
||||||
n_tg = parse_list(argv[6]);
|
n_gpu_layers = std::atoi(argv[6]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argc >= 8) {
|
if (argc >= 8) {
|
||||||
n_pl = parse_list(argv[7]);
|
n_pp = parse_list(argv[7]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argc >= 9) {
|
||||||
|
n_tg = parse_list(argv[8]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argc >= 10) {
|
||||||
|
n_pl = parse_list(argv[9]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// init LLM
|
// init LLM
|
||||||
|
@ -100,7 +110,8 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
ctx_params.seed = 1234;
|
ctx_params.seed = 1234;
|
||||||
ctx_params.n_ctx = n_kv_max;
|
ctx_params.n_ctx = n_kv_max;
|
||||||
ctx_params.n_batch = 512;
|
ctx_params.n_batch = n_batch;
|
||||||
|
ctx_params.n_ubatch = n_ubatch;
|
||||||
|
|
||||||
ctx_params.n_threads = params.n_threads;
|
ctx_params.n_threads = params.n_threads;
|
||||||
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||||
|
@ -158,7 +169,7 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_TEE("\n");
|
LOG_TEE("\n");
|
||||||
LOG_TEE("%s: n_kv_max = %d, is_pp_shared = %d, n_gpu_layers = %d, n_threads = %u, n_threads_batch = %u\n", __func__, n_kv_max, is_pp_shared, n_gpu_layers, ctx_params.n_threads, ctx_params.n_threads_batch);
|
LOG_TEE("%s: n_kv_max = %d, n_batch = %d, n_ubatch = %d, is_pp_shared = %d, n_gpu_layers = %d, n_threads = %u, n_threads_batch = %u\n", __func__, n_kv_max, n_batch, n_ubatch, is_pp_shared, n_gpu_layers, ctx_params.n_threads, ctx_params.n_threads_batch);
|
||||||
LOG_TEE("\n");
|
LOG_TEE("\n");
|
||||||
|
|
||||||
LOG_TEE("|%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n", "PP", "TG", "B", "N_KV", "T_PP s", "S_PP t/s", "T_TG s", "S_TG t/s", "T s", "S t/s");
|
LOG_TEE("|%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n", "PP", "TG", "B", "N_KV", "T_PP s", "S_PP t/s", "T_TG s", "S_TG t/s", "T s", "S t/s");
|
||||||
|
|
|
@ -880,7 +880,7 @@ int main(int argc, char ** argv) {
|
||||||
TransformerWeights weights = {};
|
TransformerWeights weights = {};
|
||||||
{
|
{
|
||||||
LOG("%s: Loading llama2c model from %s\n", __func__, params.fn_llama2c_model);
|
LOG("%s: Loading llama2c model from %s\n", __func__, params.fn_llama2c_model);
|
||||||
FILE *file = fopen(params.fn_llama2c_model, "r");
|
FILE * file = fopen(params.fn_llama2c_model, "rb");
|
||||||
if (!file) {
|
if (!file) {
|
||||||
LOG("%s: Unable to open the checkpoint file %s!\n", __func__, params.fn_llama2c_model);
|
LOG("%s: Unable to open the checkpoint file %s!\n", __func__, params.fn_llama2c_model);
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
@ -179,25 +179,27 @@ int main(int argc, char ** argv) {
|
||||||
float * out = emb + p * n_embd;
|
float * out = emb + p * n_embd;
|
||||||
batch_decode(ctx, batch, out, s, n_embd);
|
batch_decode(ctx, batch, out, s, n_embd);
|
||||||
|
|
||||||
// print the first part of the embeddings
|
// print the first part of the embeddings or for a single prompt, the full embedding
|
||||||
fprintf(stdout, "\n");
|
fprintf(stdout, "\n");
|
||||||
for (int j = 0; j < n_prompts; j++) {
|
for (int j = 0; j < n_prompts; j++) {
|
||||||
fprintf(stdout, "embedding %d: ", j);
|
fprintf(stdout, "embedding %d: ", j);
|
||||||
for (int i = 0; i < std::min(16, n_embd); i++) {
|
for (int i = 0; i < (n_prompts > 1 ? std::min(16, n_embd) : n_embd); i++) {
|
||||||
fprintf(stdout, "%9.6f ", emb[j * n_embd + i]);
|
fprintf(stdout, "%9.6f ", emb[j * n_embd + i]);
|
||||||
}
|
}
|
||||||
fprintf(stdout, "\n");
|
fprintf(stdout, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// print cosine similarity matrix
|
// print cosine similarity matrix
|
||||||
fprintf(stdout, "\n");
|
if (n_prompts > 1) {
|
||||||
printf("cosine similarity matrix:\n\n");
|
|
||||||
for (int i = 0; i < n_prompts; i++) {
|
|
||||||
for (int j = 0; j < n_prompts; j++) {
|
|
||||||
float sim = llama_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd);
|
|
||||||
fprintf(stdout, "%6.2f ", sim);
|
|
||||||
}
|
|
||||||
fprintf(stdout, "\n");
|
fprintf(stdout, "\n");
|
||||||
|
printf("cosine similarity matrix:\n\n");
|
||||||
|
for (int i = 0; i < n_prompts; i++) {
|
||||||
|
for (int j = 0; j < n_prompts; j++) {
|
||||||
|
float sim = llama_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd);
|
||||||
|
fprintf(stdout, "%6.2f ", sim);
|
||||||
|
}
|
||||||
|
fprintf(stdout, "\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// clean up
|
// clean up
|
||||||
|
|
5
examples/gbnf-validator/CMakeLists.txt
Normal file
5
examples/gbnf-validator/CMakeLists.txt
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
set(TARGET gbnf-validator)
|
||||||
|
add_executable(${TARGET} gbnf-validator.cpp)
|
||||||
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE common grammar-parser llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
132
examples/gbnf-validator/gbnf-validator.cpp
Normal file
132
examples/gbnf-validator/gbnf-validator.cpp
Normal file
|
@ -0,0 +1,132 @@
|
||||||
|
#define LLAMA_API_INTERNAL
|
||||||
|
|
||||||
|
#include "grammar-parser.h"
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "llama.h"
|
||||||
|
#include "unicode.h"
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
static bool llama_sample_grammar_string(struct llama_grammar * grammar, const std::string & input_str, size_t & error_pos, std::string & error_msg) {
|
||||||
|
auto decoded = decode_utf8(input_str, {});
|
||||||
|
const auto & code_points = decoded.first;
|
||||||
|
|
||||||
|
size_t pos = 0;
|
||||||
|
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
|
||||||
|
auto prev_stacks = grammar->stacks;
|
||||||
|
grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
|
||||||
|
if (grammar->stacks.empty()) {
|
||||||
|
error_pos = pos;
|
||||||
|
error_msg = "Unexpected character '" + unicode_cpt_to_utf8(*it) + "'";
|
||||||
|
grammar->stacks = prev_stacks;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
++pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto & stack : grammar->stacks) {
|
||||||
|
if (stack.empty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
error_pos = pos;
|
||||||
|
error_msg = "Unexpected end of input";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void print_error_message(const std::string & input_str, size_t error_pos, const std::string & error_msg) {
|
||||||
|
fprintf(stdout, "Input string is invalid according to the grammar.\n");
|
||||||
|
fprintf(stdout, "Error: %s at position %zu\n", error_msg.c_str(), error_pos);
|
||||||
|
fprintf(stdout, "\n");
|
||||||
|
fprintf(stdout, "Input string:\n");
|
||||||
|
fprintf(stdout, "%s", input_str.substr(0, error_pos).c_str());
|
||||||
|
if (error_pos < input_str.size()) {
|
||||||
|
fprintf(stdout, "\033[1;31m%c", input_str[error_pos]);
|
||||||
|
if (error_pos+1 < input_str.size()) {
|
||||||
|
fprintf(stdout, "\033[0;31m%s", input_str.substr(error_pos+1).c_str());
|
||||||
|
}
|
||||||
|
fprintf(stdout, "\033[0m\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
if (argc != 3) {
|
||||||
|
fprintf(stdout, "Usage: %s <grammar_filename> <input_filename>\n", argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string grammar_filename = argv[1];
|
||||||
|
const std::string input_filename = argv[2];
|
||||||
|
|
||||||
|
// Read the GBNF grammar file
|
||||||
|
FILE* grammar_file = fopen(grammar_filename.c_str(), "r");
|
||||||
|
if (!grammar_file) {
|
||||||
|
fprintf(stdout, "Failed to open grammar file: %s\n", grammar_filename.c_str());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fseek(grammar_file, 0, SEEK_END);
|
||||||
|
size_t grammar_size = ftell(grammar_file);
|
||||||
|
fseek(grammar_file, 0, SEEK_SET);
|
||||||
|
|
||||||
|
std::string grammar_str(grammar_size, ' ');
|
||||||
|
fread(&grammar_str[0], 1, grammar_size, grammar_file);
|
||||||
|
fclose(grammar_file);
|
||||||
|
|
||||||
|
// Parse the GBNF grammar
|
||||||
|
auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
||||||
|
|
||||||
|
// will be empty (default) if there are parse errors
|
||||||
|
if (parsed_grammar.rules.empty()) {
|
||||||
|
fprintf(stdout, "%s: failed to parse grammar\n", __func__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that there is a "root" node.
|
||||||
|
if (parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()) {
|
||||||
|
fprintf(stdout, "%s: grammar does not contain a 'root' symbol\n", __func__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules());
|
||||||
|
|
||||||
|
// Create the LLAMA grammar
|
||||||
|
auto grammar = llama_grammar_init(
|
||||||
|
grammar_rules.data(),
|
||||||
|
grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
|
||||||
|
|
||||||
|
// Read the input file
|
||||||
|
FILE* input_file = fopen(input_filename.c_str(), "r");
|
||||||
|
if (!input_file) {
|
||||||
|
fprintf(stdout, "Failed to open input file: %s\n", input_filename.c_str());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fseek(input_file, 0, SEEK_END);
|
||||||
|
size_t input_size = ftell(input_file);
|
||||||
|
fseek(input_file, 0, SEEK_SET);
|
||||||
|
|
||||||
|
std::string input_str(input_size, ' ');
|
||||||
|
fread(&input_str[0], 1, input_size, input_file);
|
||||||
|
fclose(input_file);
|
||||||
|
|
||||||
|
// Validate the input string against the grammar
|
||||||
|
size_t error_pos;
|
||||||
|
std::string error_msg;
|
||||||
|
bool is_valid = llama_sample_grammar_string(grammar, input_str, error_pos, error_msg);
|
||||||
|
|
||||||
|
if (is_valid) {
|
||||||
|
fprintf(stdout, "Input string is valid according to the grammar.\n");
|
||||||
|
} else {
|
||||||
|
print_error_message(input_str, error_pos, error_msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up
|
||||||
|
llama_grammar_free(grammar);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -28,9 +28,11 @@ enum split_operation : uint8_t {
|
||||||
|
|
||||||
struct split_params {
|
struct split_params {
|
||||||
split_operation operation = SPLIT_OP_SPLIT;
|
split_operation operation = SPLIT_OP_SPLIT;
|
||||||
|
size_t n_bytes_split = 0;
|
||||||
int n_split_tensors = 128;
|
int n_split_tensors = 128;
|
||||||
std::string input;
|
std::string input;
|
||||||
std::string output;
|
std::string output;
|
||||||
|
bool dry_run = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void split_print_usage(const char * executable) {
|
static void split_print_usage(const char * executable) {
|
||||||
|
@ -41,15 +43,36 @@ static void split_print_usage(const char * executable) {
|
||||||
printf("Apply a GGUF operation on IN to OUT.");
|
printf("Apply a GGUF operation on IN to OUT.");
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("options:\n");
|
printf("options:\n");
|
||||||
printf(" -h, --help show this help message and exit\n");
|
printf(" -h, --help show this help message and exit\n");
|
||||||
printf(" --version show version and build info\n");
|
printf(" --version show version and build info\n");
|
||||||
printf(" --split split GGUF to multiple GGUF (default)\n");
|
printf(" --split split GGUF to multiple GGUF (enabled by default)\n");
|
||||||
printf(" --split-max-tensors max tensors in each split: default(%d)\n", default_params.n_split_tensors);
|
printf(" --merge merge multiple GGUF to a single GGUF\n");
|
||||||
printf(" --merge merge multiple GGUF to a single GGUF\n");
|
printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors);
|
||||||
|
printf(" --split-max-size N(M|G) max size per split\n");
|
||||||
|
printf(" --dry-run only print out a split plan and exit, without writing any new files\n");
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool split_params_parse_ex(int argc, const char ** argv, split_params & params) {
|
// return convert string, for example "128M" or "4G" to number of bytes
|
||||||
|
static size_t split_str_to_n_bytes(std::string str) {
|
||||||
|
size_t n_bytes = 0;
|
||||||
|
int n;
|
||||||
|
if (str.back() == 'M') {
|
||||||
|
sscanf(str.c_str(), "%d", &n);
|
||||||
|
n_bytes = n * 1024 * 1024; // megabytes
|
||||||
|
} else if (str.back() == 'G') {
|
||||||
|
sscanf(str.c_str(), "%d", &n);
|
||||||
|
n_bytes = n * 1024 * 1024 * 1024; // gigabytes
|
||||||
|
} else {
|
||||||
|
throw std::invalid_argument("error: supported units are M (megabytes) or G (gigabytes), but got: " + std::string(1, str.back()));
|
||||||
|
}
|
||||||
|
if (n <= 0) {
|
||||||
|
throw std::invalid_argument("error: size must be a positive value");
|
||||||
|
}
|
||||||
|
return n_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void split_params_parse_ex(int argc, const char ** argv, split_params & params) {
|
||||||
std::string arg;
|
std::string arg;
|
||||||
const std::string arg_prefix = "--";
|
const std::string arg_prefix = "--";
|
||||||
bool invalid_param = false;
|
bool invalid_param = false;
|
||||||
|
@ -62,6 +85,8 @@ static bool split_params_parse_ex(int argc, const char ** argv, split_params & p
|
||||||
}
|
}
|
||||||
|
|
||||||
bool arg_found = false;
|
bool arg_found = false;
|
||||||
|
bool is_op_set = false;
|
||||||
|
bool is_mode_set = false;
|
||||||
if (arg == "-h" || arg == "--help") {
|
if (arg == "-h" || arg == "--help") {
|
||||||
split_print_usage(argv[0]);
|
split_print_usage(argv[0]);
|
||||||
exit(0);
|
exit(0);
|
||||||
|
@ -71,23 +96,46 @@ static bool split_params_parse_ex(int argc, const char ** argv, split_params & p
|
||||||
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
|
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
if (arg == "--dry-run") {
|
||||||
|
arg_found = true;
|
||||||
|
params.dry_run = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_op_set) {
|
||||||
|
throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
|
||||||
|
}
|
||||||
if (arg == "--merge") {
|
if (arg == "--merge") {
|
||||||
arg_found = true;
|
arg_found = true;
|
||||||
|
is_op_set = true;
|
||||||
params.operation = SPLIT_OP_MERGE;
|
params.operation = SPLIT_OP_MERGE;
|
||||||
}
|
}
|
||||||
if (arg == "--split") {
|
if (arg == "--split") {
|
||||||
arg_found = true;
|
arg_found = true;
|
||||||
|
is_op_set = true;
|
||||||
params.operation = SPLIT_OP_SPLIT;
|
params.operation = SPLIT_OP_SPLIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (is_mode_set) {
|
||||||
|
throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
|
||||||
|
}
|
||||||
if (arg == "--split-max-tensors") {
|
if (arg == "--split-max-tensors") {
|
||||||
if (++arg_idx >= argc) {
|
if (++arg_idx >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
arg_found = true;
|
arg_found = true;
|
||||||
|
is_mode_set = true;
|
||||||
params.n_split_tensors = atoi(argv[arg_idx]);
|
params.n_split_tensors = atoi(argv[arg_idx]);
|
||||||
}
|
}
|
||||||
|
if (arg == "--split-max-size") {
|
||||||
|
if (++arg_idx >= argc) {
|
||||||
|
invalid_param = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
arg_found = true;
|
||||||
|
is_mode_set = true;
|
||||||
|
params.n_bytes_split = split_str_to_n_bytes(argv[arg_idx]);
|
||||||
|
}
|
||||||
|
|
||||||
if (!arg_found) {
|
if (!arg_found) {
|
||||||
throw std::invalid_argument("error: unknown argument: " + arg);
|
throw std::invalid_argument("error: unknown argument: " + arg);
|
||||||
|
@ -99,24 +147,17 @@ static bool split_params_parse_ex(int argc, const char ** argv, split_params & p
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argc - arg_idx < 2) {
|
if (argc - arg_idx < 2) {
|
||||||
printf("%s: bad arguments\n", argv[0]);
|
throw std::invalid_argument("error: bad arguments");
|
||||||
split_print_usage(argv[0]);
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
params.input = argv[arg_idx++];
|
params.input = argv[arg_idx++];
|
||||||
params.output = argv[arg_idx++];
|
params.output = argv[arg_idx++];
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool split_params_parse(int argc, const char ** argv, split_params & params) {
|
static bool split_params_parse(int argc, const char ** argv, split_params & params) {
|
||||||
bool result = true;
|
bool result = true;
|
||||||
try {
|
try {
|
||||||
if (!split_params_parse_ex(argc, argv, params)) {
|
split_params_parse_ex(argc, argv, params);
|
||||||
split_print_usage(argv[0]);
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
catch (const std::invalid_argument & ex) {
|
catch (const std::invalid_argument & ex) {
|
||||||
fprintf(stderr, "%s\n", ex.what());
|
fprintf(stderr, "%s\n", ex.what());
|
||||||
|
@ -140,15 +181,11 @@ struct split_strategy {
|
||||||
struct ggml_context * ctx_meta = NULL;
|
struct ggml_context * ctx_meta = NULL;
|
||||||
const int n_tensors;
|
const int n_tensors;
|
||||||
|
|
||||||
const int n_split;
|
// one ctx_out per one output file
|
||||||
int i_split = 0;
|
std::vector<struct gguf_context *> ctx_outs;
|
||||||
|
|
||||||
int i_tensor = 0;
|
// temporary buffer for reading in tensor data
|
||||||
|
std::vector<uint8_t> read_buf;
|
||||||
std::vector<uint8_t> read_data;
|
|
||||||
|
|
||||||
struct gguf_context * ctx_out;
|
|
||||||
std::ofstream fout;
|
|
||||||
|
|
||||||
split_strategy(const split_params & params,
|
split_strategy(const split_params & params,
|
||||||
std::ifstream & f_input,
|
std::ifstream & f_input,
|
||||||
|
@ -158,79 +195,141 @@ struct split_strategy {
|
||||||
f_input(f_input),
|
f_input(f_input),
|
||||||
ctx_gguf(ctx_gguf),
|
ctx_gguf(ctx_gguf),
|
||||||
ctx_meta(ctx_meta),
|
ctx_meta(ctx_meta),
|
||||||
n_tensors(gguf_get_n_tensors(ctx_gguf)),
|
n_tensors(gguf_get_n_tensors(ctx_gguf)) {
|
||||||
n_split(std::ceil(1. * n_tensors / params.n_split_tensors)) {
|
|
||||||
|
// because we need to know list of tensors for each file in advance, we will build all the ctx_out for all output splits
|
||||||
|
int i_split = -1;
|
||||||
|
struct gguf_context * ctx_out = NULL;
|
||||||
|
auto new_ctx_out = [&]() {
|
||||||
|
i_split++;
|
||||||
|
if (ctx_out != NULL) {
|
||||||
|
if (gguf_get_n_tensors(ctx_out) == 0) {
|
||||||
|
fprintf(stderr, "error: one of splits have 0 tensors. Maybe size or tensors limit is too small\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
ctx_outs.push_back(ctx_out);
|
||||||
|
}
|
||||||
|
ctx_out = gguf_init_empty();
|
||||||
|
// Save all metadata in first split only
|
||||||
|
if (i_split == 0) {
|
||||||
|
gguf_set_kv(ctx_out, ctx_gguf);
|
||||||
|
}
|
||||||
|
gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_NO_STR, i_split);
|
||||||
|
gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_COUNT_STR, 0); // placeholder
|
||||||
|
gguf_set_val_i32(ctx_out, LLM_KV_SPLIT_TENSORS_COUNT_STR, n_tensors);
|
||||||
|
};
|
||||||
|
|
||||||
|
// initialize ctx_out for the first split
|
||||||
|
new_ctx_out();
|
||||||
|
|
||||||
|
// process tensors one by one
|
||||||
|
size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata)
|
||||||
|
for (int i = 0; i < n_tensors; ++i) {
|
||||||
|
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
|
||||||
|
// calculate the "imaginary" size = the current size + next tensor size
|
||||||
|
size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
|
||||||
|
size_t next_tensors_size = curr_tensors_size + n_bytes;
|
||||||
|
if (should_split(i, next_tensors_size)) {
|
||||||
|
new_ctx_out();
|
||||||
|
curr_tensors_size = n_bytes;
|
||||||
|
} else {
|
||||||
|
curr_tensors_size = next_tensors_size;
|
||||||
|
}
|
||||||
|
gguf_add_tensor(ctx_out, t);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool should_split() const {
|
// push the last ctx_out
|
||||||
return i_tensor < n_tensors && i_tensor % params.n_split_tensors == 0;
|
ctx_outs.push_back(ctx_out);
|
||||||
|
|
||||||
|
// set the correct n_split for all ctx_out
|
||||||
|
for (auto & ctx : ctx_outs) {
|
||||||
|
gguf_set_val_u16(ctx, LLM_KV_SPLIT_COUNT_STR, ctx_outs.size());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void split_start() {
|
~split_strategy() {
|
||||||
ctx_out = gguf_init_empty();
|
for (auto & ctx_out : ctx_outs) {
|
||||||
|
gguf_free(ctx_out);
|
||||||
// Save all metadata in first split only
|
|
||||||
if (i_split == 0) {
|
|
||||||
gguf_set_kv(ctx_out, ctx_gguf);
|
|
||||||
}
|
}
|
||||||
gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_NO_STR, i_split);
|
|
||||||
gguf_set_val_u16(ctx_out, LLM_KV_SPLIT_COUNT_STR, n_split);
|
|
||||||
gguf_set_val_i32(ctx_out, LLM_KV_SPLIT_TENSORS_COUNT_STR, n_tensors);
|
|
||||||
|
|
||||||
// populate the original tensors, so we get an initial metadata
|
|
||||||
for (int i = i_split * params.n_split_tensors; i < n_tensors && i < (i_split + 1) * params.n_split_tensors; ++i) {
|
|
||||||
struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
|
|
||||||
gguf_add_tensor(ctx_out, meta);
|
|
||||||
}
|
|
||||||
|
|
||||||
char split_path[PATH_MAX] = {0};
|
|
||||||
llama_split_path(split_path, sizeof(split_path), params.output.c_str(), i_split, n_split);
|
|
||||||
|
|
||||||
fprintf(stderr, "%s: %s ...", __func__, split_path);
|
|
||||||
fout = std::ofstream(split_path, std::ios::binary);
|
|
||||||
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
|
||||||
|
|
||||||
auto meta_size = gguf_get_meta_size(ctx_out);
|
|
||||||
|
|
||||||
// placeholder for the meta data
|
|
||||||
::zeros(fout, meta_size);
|
|
||||||
|
|
||||||
i_split++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void next_tensor() {
|
bool should_split(int i_tensor, size_t next_size) {
|
||||||
const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
|
if (params.n_bytes_split > 0) {
|
||||||
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
// split by max size per file
|
||||||
auto n_bytes = ggml_nbytes(t);
|
return next_size > params.n_bytes_split;
|
||||||
|
} else {
|
||||||
if (read_data.size() < n_bytes) {
|
// split by number of tensors per file
|
||||||
read_data.resize(n_bytes);
|
return i_tensor > 0 && i_tensor < n_tensors && i_tensor % params.n_split_tensors == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor);
|
|
||||||
f_input.seekg(offset);
|
|
||||||
f_input.read((char *)read_data.data(), n_bytes);
|
|
||||||
|
|
||||||
t->data = read_data.data();
|
|
||||||
|
|
||||||
// write tensor data + padding
|
|
||||||
fout.write((const char *)t->data, n_bytes);
|
|
||||||
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
|
|
||||||
|
|
||||||
i_tensor++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void split_end() {
|
void print_info() {
|
||||||
// go back to beginning of file and write the updated metadata
|
printf("n_split: %ld\n", ctx_outs.size());
|
||||||
fout.seekp(0);
|
int i_split = 0;
|
||||||
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
|
for (auto & ctx_out : ctx_outs) {
|
||||||
gguf_get_meta_data(ctx_out, data.data());
|
// re-calculate the real gguf size for each split (= metadata size + total size of all tensors)
|
||||||
fout.write((const char *)data.data(), data.size());
|
size_t total_size = gguf_get_meta_size(ctx_out);
|
||||||
|
for (int i = 0; i < gguf_get_n_tensors(ctx_out); ++i) {
|
||||||
|
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_out, i));
|
||||||
|
total_size += ggml_nbytes(t);
|
||||||
|
}
|
||||||
|
total_size = total_size / 1024 / 1024; // convert to megabytes
|
||||||
|
printf("split %05d: n_tensors = %d, total_size = %ldM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
|
||||||
|
i_split++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fout.close();
|
void write() {
|
||||||
gguf_free(ctx_out);
|
int i_split = 0;
|
||||||
|
int n_split = ctx_outs.size();
|
||||||
|
for (auto & ctx_out : ctx_outs) {
|
||||||
|
// construct file path
|
||||||
|
char split_path[PATH_MAX] = {0};
|
||||||
|
llama_split_path(split_path, sizeof(split_path), params.output.c_str(), i_split, n_split);
|
||||||
|
|
||||||
fprintf(stderr, "\033[3Ddone\n");
|
// open the output file
|
||||||
|
printf("Writing file %s ... ", split_path);
|
||||||
|
fflush(stdout);
|
||||||
|
std::ofstream fout = std::ofstream(split_path, std::ios::binary);
|
||||||
|
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
||||||
|
|
||||||
|
// write metadata
|
||||||
|
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
|
||||||
|
gguf_get_meta_data(ctx_out, data.data());
|
||||||
|
fout.write((const char *)data.data(), data.size());
|
||||||
|
|
||||||
|
// write tensors
|
||||||
|
for (int i = 0; i < gguf_get_n_tensors(ctx_out); ++i) {
|
||||||
|
// read tensor meta and prepare buffer
|
||||||
|
const char * t_name = gguf_get_tensor_name(ctx_out, i);
|
||||||
|
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
||||||
|
auto n_bytes = ggml_nbytes(t);
|
||||||
|
read_buf.resize(n_bytes);
|
||||||
|
|
||||||
|
// calculate offset
|
||||||
|
auto i_tensor_in = gguf_find_tensor(ctx_gguf, t_name); // idx of tensor in the input file
|
||||||
|
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor_in);
|
||||||
|
|
||||||
|
// copy tensor from input to output file
|
||||||
|
copy_file_to_file(f_input, fout, offset, n_bytes);
|
||||||
|
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("done\n");
|
||||||
|
// close the file
|
||||||
|
fout.close();
|
||||||
|
i_split++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) {
|
||||||
|
// TODO: detect OS and use copy_file_range() here for better performance
|
||||||
|
if (read_buf.size() < len) {
|
||||||
|
read_buf.resize(len);
|
||||||
|
}
|
||||||
|
f_in.seekg(in_offset);
|
||||||
|
f_in.read((char *)read_buf.data(), len);
|
||||||
|
f_out.write((const char *)read_buf.data(), len);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -254,32 +353,22 @@ static void gguf_split(const split_params & split_params) {
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// prepare the strategy
|
||||||
split_strategy strategy(split_params, f_input, ctx_gguf, ctx_meta);
|
split_strategy strategy(split_params, f_input, ctx_gguf, ctx_meta);
|
||||||
|
int n_split = strategy.ctx_outs.size();
|
||||||
|
strategy.print_info();
|
||||||
|
|
||||||
char first_split_path[PATH_MAX] = {0};
|
if (!split_params.dry_run) {
|
||||||
llama_split_path(first_split_path, sizeof(first_split_path),
|
// write all output splits
|
||||||
split_params.output.c_str(), strategy.i_split, strategy.n_split);
|
strategy.write();
|
||||||
fprintf(stderr, "%s: %s -> %s (%d tensors per file)\n",
|
|
||||||
__func__, split_params.input.c_str(),
|
|
||||||
first_split_path,
|
|
||||||
split_params.n_split_tensors);
|
|
||||||
|
|
||||||
strategy.split_start();
|
|
||||||
|
|
||||||
while (strategy.i_tensor < strategy.n_tensors) {
|
|
||||||
strategy.next_tensor();
|
|
||||||
if (strategy.should_split()) {
|
|
||||||
strategy.split_end();
|
|
||||||
strategy.split_start();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
strategy.split_end();
|
|
||||||
|
|
||||||
|
// done, clean up
|
||||||
gguf_free(ctx_gguf);
|
gguf_free(ctx_gguf);
|
||||||
f_input.close();
|
f_input.close();
|
||||||
|
|
||||||
fprintf(stderr, "%s: %d gguf split written with a total of %d tensors.\n",
|
fprintf(stderr, "%s: %d gguf split written with a total of %d tensors.\n",
|
||||||
__func__, strategy.n_split, strategy.n_tensors);
|
__func__, n_split, strategy.n_tensors);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gguf_merge(const split_params & split_params) {
|
static void gguf_merge(const split_params & split_params) {
|
||||||
|
@ -448,10 +537,6 @@ static void gguf_merge(const split_params & split_params) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, const char ** argv) {
|
int main(int argc, const char ** argv) {
|
||||||
if (argc < 3) {
|
|
||||||
split_print_usage(argv[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
split_params params;
|
split_params params;
|
||||||
split_params_parse(argc, argv, params);
|
split_params_parse(argc, argv, params);
|
||||||
|
|
||||||
|
|
|
@ -99,35 +99,38 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
||||||
|
|
||||||
const float * data = is_host ? (const float *) src1->data : m_src1_data.data();
|
const float * data = is_host ? (const float *) src1->data : m_src1_data.data();
|
||||||
|
|
||||||
|
// this has been adapted to the new format of storing merged experts in a single 3d tensor
|
||||||
|
// ref: https://github.com/ggerganov/llama.cpp/pull/6387
|
||||||
if (t->op == GGML_OP_MUL_MAT_ID) {
|
if (t->op == GGML_OP_MUL_MAT_ID) {
|
||||||
const int idx = ((int32_t *) t->op_params)[0];
|
const int idx = ((int32_t *) t->op_params)[0];
|
||||||
const int n_as = ((int32_t *) t->op_params)[1];
|
const ggml_tensor * ids = t->src[2];
|
||||||
|
const int n_as = src0->ne[2];
|
||||||
|
|
||||||
// the top-k selected expert ids are stored in the src0 tensor
|
// the top-k selected expert ids are stored in the ids tensor
|
||||||
// for simplicity, always copy src0 to host, because it is small
|
// for simplicity, always copy ids to host, because it is small
|
||||||
// take into account that src0 is not contiguous!
|
// take into account that ids is not contiguous!
|
||||||
GGML_ASSERT(src0->ne[1] == src1->ne[1]);
|
GGML_ASSERT(ids->ne[1] == src1->ne[1]);
|
||||||
GGML_ASSERT(n_as*ggml_nrows(src0)*sizeof(int) == GGML_PAD(ggml_nbytes(src0), n_as*sizeof(int)));
|
GGML_ASSERT(n_as*ggml_nrows(ids)*sizeof(int) == GGML_PAD(ggml_nbytes(ids), n_as*sizeof(int)));
|
||||||
m_ids.resize(ggml_nbytes(src0)/sizeof(int));
|
m_ids.resize(ggml_nbytes(ids)/sizeof(int));
|
||||||
ggml_backend_tensor_get(src0, m_ids.data(), 0, ggml_nbytes(src0));
|
ggml_backend_tensor_get(ids, m_ids.data(), 0, ggml_nbytes(ids));
|
||||||
|
|
||||||
|
auto & e = m_stats[wname];
|
||||||
|
|
||||||
|
++e.ncall;
|
||||||
|
// NOTE: since we select top-k experts, the number of calls for the expert tensors will be k times larger
|
||||||
|
// using the following line, we can correct for that if needed by replacing the line above with:
|
||||||
|
//if (idx == t->src[0]->ne[0] - 1) ++e.ncall;
|
||||||
|
|
||||||
// loop over all possible experts, regardless if they are used or not in the batch
|
// loop over all possible experts, regardless if they are used or not in the batch
|
||||||
// this is necessary to guarantee equal number of "ncall" for each tensor
|
|
||||||
for (int ex = 0; ex < n_as; ++ex) {
|
for (int ex = 0; ex < n_as; ++ex) {
|
||||||
src0 = t->src[2 + ex];
|
size_t e_start = ex*src1->ne[0];
|
||||||
wname = filter_tensor_name(src0->name);
|
|
||||||
auto& e = m_stats[wname];
|
|
||||||
if (e.values.empty()) {
|
if (e.values.empty()) {
|
||||||
e.values.resize(src1->ne[0], 0);
|
e.values.resize(src1->ne[0]*n_as, 0);
|
||||||
}
|
}
|
||||||
else if (e.values.size() != (size_t)src1->ne[0]) {
|
else if (e.values.size() != (size_t)src1->ne[0]*n_as) {
|
||||||
fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]);
|
fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as);
|
||||||
exit(1); //GGML_ASSERT(false);
|
exit(1); //GGML_ASSERT(false);
|
||||||
}
|
}
|
||||||
// NOTE: since we select top-k experts, the number of calls for the expert tensors will be k times larger
|
|
||||||
// using the following line, we can correct for that if needed
|
|
||||||
//if (idx == t->src[0]->ne[0] - 1) ++e.ncall;
|
|
||||||
++e.ncall;
|
|
||||||
if (m_params.verbosity > 1) {
|
if (m_params.verbosity > 1) {
|
||||||
printf("%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[1], (int)src1->type);
|
printf("%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[1], (int)src1->type);
|
||||||
}
|
}
|
||||||
|
@ -137,7 +140,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
||||||
if (excur != ex) continue;
|
if (excur != ex) continue;
|
||||||
const float * x = data + row * src1->ne[0];
|
const float * x = data + row * src1->ne[0];
|
||||||
for (int j = 0; j < (int)src1->ne[0]; ++j) {
|
for (int j = 0; j < (int)src1->ne[0]; ++j) {
|
||||||
e.values[j] += x[j]*x[j];
|
e.values[e_start + j] += x[j]*x[j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (e.ncall > m_last_call) {
|
if (e.ncall > m_last_call) {
|
||||||
|
|
|
@ -6,7 +6,7 @@ for more information, please go to [Meituan-AutoML/MobileVLM](https://github.com
|
||||||
|
|
||||||
The implementation is based on llava, and is compatible with llava and mobileVLM. The usage is basically same as llava.
|
The implementation is based on llava, and is compatible with llava and mobileVLM. The usage is basically same as llava.
|
||||||
|
|
||||||
Notice: The overall process of model inference for both **MobileVLM** and **MobileVLM_V2** models is the same, but the process of model conversion is a little different. Therefore, using MobiVLM as an example, the different conversion step will be shown.
|
Notice: The overall process of model inference for both **MobileVLM** and **MobileVLM_V2** models is the same, but the process of model conversion is a little different. Therefore, using **MobileVLM-1.7B** as an example, the different conversion step will be shown.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
Build with cmake or run `make llava-cli` to build it.
|
Build with cmake or run `make llava-cli` to build it.
|
||||||
|
@ -36,7 +36,7 @@ git clone https://huggingface.co/openai/clip-vit-large-patch14-336
|
||||||
python ./examples/llava/llava-surgery.py -m path/to/MobileVLM-1.7B
|
python ./examples/llava/llava-surgery.py -m path/to/MobileVLM-1.7B
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Use `convert-image-encoder-to-gguf.py` with `--projector-type ldp` (for **V2** the arg is `--projector-type ldpv2`) to convert the LLaVA image encoder to GGUF:
|
3. Use `convert-image-encoder-to-gguf.py` with `--projector-type ldp` (for **V2** please use `--projector-type ldpv2`) to convert the LLaVA image encoder to GGUF:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
python ./examples/llava/convert-image-encoder-to-gguf \
|
python ./examples/llava/convert-image-encoder-to-gguf \
|
||||||
|
@ -78,7 +78,7 @@ cd examples/llava/android/build_64
|
||||||
### run on Android
|
### run on Android
|
||||||
refer to `android/adb_run.sh`, modify resources' `name` and `path`
|
refer to `android/adb_run.sh`, modify resources' `name` and `path`
|
||||||
|
|
||||||
## some result on Android with `Snapdragon 888` chip
|
## Some result on Android with `Snapdragon 888` chip
|
||||||
### case 1
|
### case 1
|
||||||
**input**
|
**input**
|
||||||
```sh
|
```sh
|
||||||
|
@ -109,7 +109,6 @@ llama_print_timings: total time = 34731.93 ms
|
||||||
--image /data/local/tmp/cat.jpeg \
|
--image /data/local/tmp/cat.jpeg \
|
||||||
-p "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWhat is in the image? ASSISTANT:"
|
-p "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWhat is in the image? ASSISTANT:"
|
||||||
```
|
```
|
||||||
|
|
||||||
**output**
|
**output**
|
||||||
```sh
|
```sh
|
||||||
encode_image_with_clip: image encoded in 21149.51 ms by CLIP ( 146.87 ms per image patch)
|
encode_image_with_clip: image encoded in 21149.51 ms by CLIP ( 146.87 ms per image patch)
|
||||||
|
@ -121,12 +120,82 @@ llama_print_timings: eval time = 1279.03 ms / 18 runs ( 71.06 m
|
||||||
llama_print_timings: total time = 34570.79 ms
|
llama_print_timings: total time = 34570.79 ms
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Some result on Android with `Snapdragon 778G` chip
|
||||||
|
### MobileVLM-1.7B case
|
||||||
|
#### llava-cli release-b2005
|
||||||
|
**input**
|
||||||
|
```sh
|
||||||
|
/data/local/tmp/llava-cli \
|
||||||
|
-m /data/local/tmp/ggml-model-q4_k.gguf \
|
||||||
|
--mmproj /data/local/tmp/mmproj-model-f16.gguf \
|
||||||
|
-t 4 \
|
||||||
|
--image /data/local/tmp/many_llamas.jpeg \
|
||||||
|
-p "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWhat's that? ASSISTANT:"
|
||||||
|
```
|
||||||
|
**output**
|
||||||
|
```sh
|
||||||
|
encode_image_with_clip: image encoded in 18728.52 ms by CLIP ( 130.06 ms per image patch)
|
||||||
|
system_prompt: A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER:
|
||||||
|
user_prompt: \nWhat's that? ASSISTANT:
|
||||||
|
|
||||||
|
A group of llamas are standing in a green pasture.
|
||||||
|
|
||||||
|
llama_print_timings: load time = 20357.33 ms
|
||||||
|
llama_print_timings: sample time = 2.96 ms / 14 runs ( 0.21 ms per token, 4734.53 tokens per second)
|
||||||
|
llama_print_timings: prompt eval time = 8119.49 ms / 191 tokens ( 42.51 ms per token, 23.52 tokens per second)
|
||||||
|
llama_print_timings: eval time = 1005.75 ms / 14 runs ( 71.84 ms per token, 13.92 tokens per second)
|
||||||
|
llama_print_timings: total time = 28038.34 ms / 205 tokens
|
||||||
|
```
|
||||||
|
#### llava-cli latest-version
|
||||||
|
**input**
|
||||||
|
|
||||||
|
Just the same as above.
|
||||||
|
|
||||||
|
**output**(seems to be much slower)
|
||||||
|
```sh
|
||||||
|
encode_image_with_clip: image embedding created: 144 tokens
|
||||||
|
|
||||||
|
encode_image_with_clip: image encoded in 288268.88 ms by CLIP ( 2001.87 ms per image patch)
|
||||||
|
system_prompt: A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER:
|
||||||
|
user_prompt: \nWhat's that? ASSISTANT:
|
||||||
|
|
||||||
|
It is a group of sheep standing together in a grass field.
|
||||||
|
|
||||||
|
llama_print_timings: load time = 818120.91 ms
|
||||||
|
llama_print_timings: sample time = 3.44 ms / 14 runs ( 0.25 ms per token, 4067.40 tokens per second)
|
||||||
|
llama_print_timings: prompt eval time = 529274.69 ms / 191 tokens ( 2771.07 ms per token, 0.36 tokens per second)
|
||||||
|
llama_print_timings: eval time = 43894.02 ms / 13 runs ( 3376.46 ms per token, 0.30 tokens per second)
|
||||||
|
llama_print_timings: total time = 865441.76 ms / 204 tokens
|
||||||
|
```
|
||||||
|
### MobileVLM_V2-1.7B case
|
||||||
|
#### llava-cli release-2005b
|
||||||
|
**input**
|
||||||
|
|
||||||
|
Just the same as above.
|
||||||
|
|
||||||
|
**output**
|
||||||
|
```sh
|
||||||
|
encode_image_with_clip: image encoded in 20609.61 ms by CLIP ( 143.12 ms per image patch)
|
||||||
|
system_prompt: A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER:
|
||||||
|
user_prompt: \nWhat's that? ASSISTANT:
|
||||||
|
|
||||||
|
This image captures a lively scene of 20 llamas in motion on an expansive, grassy field. The llama is scattered across the landscape with some standing and others sitting down as if taking rest or observing their surroundings from different vantage points within this verdant setting.
|
||||||
|
|
||||||
|
The background offers glimpses into a picturesque town nestled amidst hills under an overcast sky, adding depth to the scene while also emphasizing that distance between these llama and human-made structures like houses or roads in which they roam freely without any barriers around them. The image is framed by text at both right angles on white backgrounds against a contrasting blue backdrop with green foliage, further drawing attention to the llamas amidst their natural habitat while also inviting viewers into this picturesque landscape within town limits of Alta Llama
|
||||||
|
|
||||||
|
llama_print_timings: load time = 22406.77 ms
|
||||||
|
llama_print_timings: sample time = 49.26 ms / 186 runs ( 0.26 ms per token, 3776.27 tokens per second)
|
||||||
|
llama_print_timings: prompt eval time = 9044.54 ms / 191 tokens ( 47.35 ms per token, 21.12 tokens per second)
|
||||||
|
llama_print_timings: eval time = 14497.49 ms / 186 runs ( 77.94 ms per token, 12.83 tokens per second)
|
||||||
|
llama_print_timings: total time = 44411.01 ms / 377 tokens
|
||||||
|
```
|
||||||
|
|
||||||
## Orin compile and run
|
## Orin compile and run
|
||||||
### compile
|
### compile
|
||||||
```sh
|
```sh
|
||||||
make LLAMA_CUDA=1 CUDA_DOCKER_ARCH=sm_87 LLAMA_CUDA_F16=1 -j 32
|
make LLAMA_CUDA=1 CUDA_DOCKER_ARCH=sm_87 LLAMA_CUDA_F16=1 -j 32
|
||||||
```
|
```
|
||||||
|
|
||||||
### run on Orin
|
### run on Orin
|
||||||
### case 1
|
### case 1
|
||||||
**input**
|
**input**
|
||||||
|
@ -175,8 +244,121 @@ llama_print_timings: eval time = 166.65 ms / 11 runs ( 15.15 m
|
||||||
llama_print_timings: total time = 1365.47 ms / 243 tokens
|
llama_print_timings: total time = 1365.47 ms / 243 tokens
|
||||||
```
|
```
|
||||||
|
|
||||||
## Minor shortcomings
|
## Running on Intel(R) Core(TM) i7-10750H
|
||||||
The `n_patch` of output in `ldp` is 1/4 of the input. In order to implement quickly, we uniformly modified `clip_n_patches` function to a quarter. when counting the time consumption, the calculated time will be 4 times bigger than the real cost.
|
### Operating system
|
||||||
|
Ubuntu22.04
|
||||||
|
### compile
|
||||||
|
```sh
|
||||||
|
make -j32
|
||||||
|
```
|
||||||
|
### MobileVLM-1.7B case
|
||||||
|
**input**
|
||||||
|
```sh
|
||||||
|
-m /path/to/ggml-model-q4_k.gguf \
|
||||||
|
--mmproj /path/to/mmproj-model-f16.gguf \
|
||||||
|
--image /path/to/many_llamas.jpeg
|
||||||
|
-p "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWhat's that? ASSISTANT:" \
|
||||||
|
```
|
||||||
|
**output**
|
||||||
|
```sh
|
||||||
|
encode_image_with_clip: image embedding created: 144 tokens
|
||||||
|
|
||||||
|
encode_image_with_clip: image encoded in 2730.94 ms by CLIP ( 18.96 ms per image patch)
|
||||||
|
system_prompt: A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER:
|
||||||
|
user_prompt: \nWhat's that?ASSISTANT:
|
||||||
|
|
||||||
|
A group of llamas are walking together in a field.
|
||||||
|
|
||||||
|
llama_print_timings: load time = 5506.60 ms
|
||||||
|
llama_print_timings: sample time = 0.44 ms / 13 runs ( 0.03 ms per token, 29545.45 tokens per second)
|
||||||
|
llama_print_timings: prompt eval time = 2031.58 ms / 190 tokens ( 10.69 ms per token, 93.52 tokens per second)
|
||||||
|
llama_print_timings: eval time = 438.92 ms / 12 runs ( 36.58 ms per token, 27.34 tokens per second)
|
||||||
|
llama_print_timings: total time = 5990.25 ms / 202 tokens
|
||||||
|
```
|
||||||
|
|
||||||
|
### MobileVLM_V2-1.7B case
|
||||||
|
**input**
|
||||||
|
|
||||||
|
Just the same as above.
|
||||||
|
|
||||||
|
**ouput**
|
||||||
|
```sh
|
||||||
|
encode_image_with_clip: image embedding created: 144 tokens
|
||||||
|
|
||||||
|
encode_image_with_clip: image encoded in 3223.89 ms by CLIP ( 22.39 ms per image patch)
|
||||||
|
system_prompt: A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER:
|
||||||
|
user_prompt: \nWhat's that?ASSISTANT:
|
||||||
|
|
||||||
|
The image captures a tranquil scene in a park, where a group of approximately 20 llamas are gathered. The llamas, a mix of white and black, are standing in a line, their black and white patterns contrasting with the lush green grass of the park. The lamas are arranged in a line, suggesting a social order.
|
||||||
|
|
||||||
|
The park itself is lush and green, with trees dotting the landscape in the background. A sign reading "Llamas Tico Ana" is also visible in the image, possibly indicating the location or the breed of the llamas. The image seems to be taken from a distance, providing a wide view of the scene and the surrounding environment.
|
||||||
|
|
||||||
|
The llamas' positions relative to each other, the sign, and the trees create a harmonious composition. The image does not contain any discernible text. The overall scene is one of peace and natural beauty, with the llamas in their natural habitat, surrounded by the vibrant colors and lush greenery of the park.
|
||||||
|
|
||||||
|
llama_print_timings: load time = 6642.61 ms
|
||||||
|
llama_print_timings: sample time = 8.15 ms / 223 runs ( 0.04 ms per token, 27358.61 tokens per second)
|
||||||
|
llama_print_timings: prompt eval time = 2475.07 ms / 190 tokens ( 13.03 ms per token, 76.77 tokens per second)
|
||||||
|
llama_print_timings: eval time = 8760.60 ms / 222 runs ( 39.46 ms per token, 25.34 tokens per second)
|
||||||
|
llama_print_timings: total time = 15513.95 ms / 412 tokens
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run on Intel(R) Core(TM) Ultra7 115H
|
||||||
|
### operation system
|
||||||
|
Windows11
|
||||||
|
### comiple
|
||||||
|
```sh
|
||||||
|
make -j32
|
||||||
|
```
|
||||||
|
### MobileVLM-1.7B case
|
||||||
|
**input**
|
||||||
|
```sh
|
||||||
|
-m /path/to/ggml-model-q4_k.gguf \
|
||||||
|
--mmproj /path/to/tmp/mmproj-model-f16.gguf \
|
||||||
|
-p "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: <image>\nWhat's that? ASSISTANT:" \
|
||||||
|
```
|
||||||
|
**output**
|
||||||
|
```sh
|
||||||
|
encode_image_with_clip: image encoded in 4902.81 ms by CLIP ( 34.05 ms per image patch)
|
||||||
|
system_prompt: A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER:
|
||||||
|
user_prompt: \nWhat's that? ASSISTANT:
|
||||||
|
|
||||||
|
The image features a group of brown and white llamas standing in a grassy field.
|
||||||
|
|
||||||
|
llama_print_timings: load time = 7441.06 ms
|
||||||
|
llama_print_timings: sample time = 0.72 ms / 19 runs ( 0.04 ms per token, 26279.39 tokens per second)
|
||||||
|
llama_print_timings: prompt eval time = 2090.71 ms / 191 tokens ( 10.95 ms per token, 91.36 tokens per second)
|
||||||
|
llama_print_timings: eval time = 512.35 ms / 18 runs ( 28.46 ms per token, 35.13 tokens per second)
|
||||||
|
llama_print_timings: total time = 7987.23 ms / 209 tokens
|
||||||
|
```
|
||||||
|
|
||||||
|
### MobileVLM_V2-1.7B case
|
||||||
|
**input**
|
||||||
|
|
||||||
|
Just the same as above.
|
||||||
|
|
||||||
|
**output**
|
||||||
|
```sh
|
||||||
|
encode_image_with_clip: image encoded in 4682.44 ms by CLIP ( 32.52 ms per image patch)
|
||||||
|
system_prompt: A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER:
|
||||||
|
user_prompt: \nWhat's that? ASSISTANT:
|
||||||
|
|
||||||
|
This image captures a lively scene of a group of 14 llamas in a grassy field. The llamas, with their distinctive black and white coats, are standing and walking in a line, seemingly engaged in a social activity. One
|
||||||
|
of them, possibly the first in the line, has its back turned, perhaps observing something in the distance.
|
||||||
|
|
||||||
|
The llama in the front of the line stands out due to its black and white coloring, which is quite unusual for llama patterns. The llama in the front also seems to be more aware of its surroundings, as it faces the camera, giving a sense of engagement with the viewer.
|
||||||
|
|
||||||
|
The image is taken from the side of the llama, providing a clear view of the llama in the front and its companions. The lameness in the llama in
|
||||||
|
front is not visible, indicating that it might not be the main focus of the photo.
|
||||||
|
|
||||||
|
The background of the image features a grassy field, with a fence and a tree visible in the distance. The tree appears to be bare, suggesting that it might be during a time of year when most trees are dormant or have shed their leaves.
|
||||||
|
|
||||||
|
|
||||||
|
llama_print_timings: load time = 7015.35 ms
|
||||||
|
llama_print_timings: sample time = 10.61 ms / 256 runs ( 0.04 ms per token, 24119.09 tokens per second)
|
||||||
|
llama_print_timings: prompt eval time = 2052.45 ms / 191 tokens ( 10.75 ms per token, 93.06 tokens per second)
|
||||||
|
llama_print_timings: eval time = 7259.43 ms / 255 runs ( 28.47 ms per token, 35.13 tokens per second)
|
||||||
|
llama_print_timings: total time = 14371.19 ms / 446 tokens
|
||||||
|
```
|
||||||
|
|
||||||
## TODO
|
## TODO
|
||||||
|
|
||||||
|
@ -191,5 +373,5 @@ The `n_patch` of output in `ldp` is 1/4 of the input. In order to implement quic
|
||||||
|
|
||||||
## contributor
|
## contributor
|
||||||
```sh
|
```sh
|
||||||
zhangjidong05, yangyang260, huyiming03, chenxiaotao03
|
zhangjidong05, yangyang260, huyiming03, chenxiaotao03, ZiangWu-77
|
||||||
```
|
```
|
||||||
|
|
|
@ -835,9 +835,10 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
mlp_2 = ggml_pool_2d(ctx0, mlp_2, GGML_OP_POOL_AVG, 2, 2, 2, 2, 0, 0);
|
mlp_2 = ggml_pool_2d(ctx0, mlp_2, GGML_OP_POOL_AVG, 2, 2, 2, 2, 0, 0);
|
||||||
// weight ne = [3, 3, 2048, 1]
|
// weight ne = [3, 3, 2048, 1]
|
||||||
struct ggml_tensor * peg_0 = ggml_conv_depthwise_2d(ctx0, model.mm_model_peg_0_w, mlp_2, 1, 1, 1, 1, 1, 1);
|
struct ggml_tensor * peg_0 = ggml_conv_depthwise_2d(ctx0, model.mm_model_peg_0_w, mlp_2, 1, 1, 1, 1, 1, 1);
|
||||||
peg_0 = ggml_add(ctx0, peg_0, mlp_2);
|
|
||||||
peg_0 = ggml_cont(ctx0, ggml_permute(ctx0, peg_0, 1, 2, 0, 3));
|
peg_0 = ggml_cont(ctx0, ggml_permute(ctx0, peg_0, 1, 2, 0, 3));
|
||||||
peg_0 = ggml_add(ctx0, peg_0, model.mm_model_peg_0_b);
|
peg_0 = ggml_add(ctx0, peg_0, model.mm_model_peg_0_b);
|
||||||
|
mlp_2 = ggml_cont(ctx0, ggml_permute(ctx0, mlp_2, 1, 2, 0, 3));
|
||||||
|
peg_0 = ggml_add(ctx0, peg_0, mlp_2);
|
||||||
peg_0 = ggml_reshape_3d(ctx0, peg_0, peg_0->ne[0], peg_0->ne[1] * peg_0->ne[2], peg_0->ne[3]);
|
peg_0 = ggml_reshape_3d(ctx0, peg_0, peg_0->ne[0], peg_0->ne[1] * peg_0->ne[2], peg_0->ne[3]);
|
||||||
embeddings = peg_0;
|
embeddings = peg_0;
|
||||||
}
|
}
|
||||||
|
@ -1755,7 +1756,7 @@ int clip_n_patches(const struct clip_ctx * ctx) {
|
||||||
|
|
||||||
int n_patches = (params.image_size / params.patch_size) * (params.image_size / params.patch_size);
|
int n_patches = (params.image_size / params.patch_size) * (params.image_size / params.patch_size);
|
||||||
|
|
||||||
if (ctx->proj_type == PROJECTOR_TYPE_LDP) {
|
if (ctx->proj_type == PROJECTOR_TYPE_LDP || ctx->proj_type == PROJECTOR_TYPE_LDPV2) {
|
||||||
n_patches /= 4;
|
n_patches /= 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -296,7 +296,9 @@ These options help improve the performance and memory usage of the LLaMA models.
|
||||||
|
|
||||||
### Batch Size
|
### Batch Size
|
||||||
|
|
||||||
- `-b N, --batch-size N`: Set the batch size for prompt processing (default: 512). This large batch size benefits users who have BLAS installed and enabled it during the build. If you don't have BLAS enabled ("BLAS=0"), you can use a smaller number, such as 8, to see the prompt progress as it's evaluated in some situations.
|
- `-b N, --batch-size N`: Set the batch size for prompt processing (default: `2048`). This large batch size benefits users who have BLAS installed and enabled it during the build. If you don't have BLAS enabled ("BLAS=0"), you can use a smaller number, such as 8, to see the prompt progress as it's evaluated in some situations.
|
||||||
|
|
||||||
|
- `-ub N`, `--ubatch-size N`: physical maximum batch size. This is for pipeline parallelization. Default: `512`.
|
||||||
|
|
||||||
### Prompt Caching
|
### Prompt Caching
|
||||||
|
|
||||||
|
|
|
@ -117,13 +117,13 @@ static void load_imatrix(const std::string & imatrix_file, std::unordered_map<st
|
||||||
std::ifstream in(imatrix_file.c_str(), std::ios::binary);
|
std::ifstream in(imatrix_file.c_str(), std::ios::binary);
|
||||||
if (!in) {
|
if (!in) {
|
||||||
printf("%s: failed to open %s\n",__func__, imatrix_file.c_str());
|
printf("%s: failed to open %s\n",__func__, imatrix_file.c_str());
|
||||||
return;
|
exit(1);
|
||||||
}
|
}
|
||||||
int n_entries;
|
int n_entries;
|
||||||
in.read((char *)&n_entries, sizeof(n_entries));
|
in.read((char *)&n_entries, sizeof(n_entries));
|
||||||
if (in.fail() || n_entries < 1) {
|
if (in.fail() || n_entries < 1) {
|
||||||
printf("%s: no data in file %s\n", __func__, imatrix_file.c_str());
|
printf("%s: no data in file %s\n", __func__, imatrix_file.c_str());
|
||||||
return;
|
exit(1);
|
||||||
}
|
}
|
||||||
for (int i = 0; i < n_entries; ++i) {
|
for (int i = 0; i < n_entries; ++i) {
|
||||||
int len; in.read((char *)&len, sizeof(len));
|
int len; in.read((char *)&len, sizeof(len));
|
||||||
|
@ -131,11 +131,11 @@ static void load_imatrix(const std::string & imatrix_file, std::unordered_map<st
|
||||||
in.read((char *)name_as_vec.data(), len);
|
in.read((char *)name_as_vec.data(), len);
|
||||||
if (in.fail()) {
|
if (in.fail()) {
|
||||||
printf("%s: failed reading name for entry %d from %s\n", __func__, i+1, imatrix_file.c_str());
|
printf("%s: failed reading name for entry %d from %s\n", __func__, i+1, imatrix_file.c_str());
|
||||||
return;
|
exit(1);
|
||||||
}
|
}
|
||||||
name_as_vec[len] = 0;
|
name_as_vec[len] = 0;
|
||||||
std::string name{name_as_vec.data()};
|
std::string name{name_as_vec.data()};
|
||||||
auto & e = imatrix_data[std::move(name)];
|
auto & e = imatrix_data[name];
|
||||||
int ncall;
|
int ncall;
|
||||||
in.read((char *)&ncall, sizeof(ncall));
|
in.read((char *)&ncall, sizeof(ncall));
|
||||||
int nval;
|
int nval;
|
||||||
|
@ -143,18 +143,22 @@ static void load_imatrix(const std::string & imatrix_file, std::unordered_map<st
|
||||||
if (in.fail() || nval < 1) {
|
if (in.fail() || nval < 1) {
|
||||||
printf("%s: failed reading number of values for entry %d\n", __func__, i);
|
printf("%s: failed reading number of values for entry %d\n", __func__, i);
|
||||||
imatrix_data = {};
|
imatrix_data = {};
|
||||||
return;
|
exit(1);
|
||||||
}
|
}
|
||||||
e.resize(nval);
|
e.resize(nval);
|
||||||
in.read((char *)e.data(), nval*sizeof(float));
|
in.read((char *)e.data(), nval*sizeof(float));
|
||||||
if (in.fail()) {
|
if (in.fail()) {
|
||||||
printf("%s: failed reading data for entry %d\n", __func__, i);
|
printf("%s: failed reading data for entry %d\n", __func__, i);
|
||||||
imatrix_data = {};
|
imatrix_data = {};
|
||||||
return;
|
exit(1);
|
||||||
}
|
}
|
||||||
if (ncall > 0) {
|
if (ncall > 0) {
|
||||||
for (auto& v : e) v /= ncall;
|
for (auto& v : e) v /= ncall;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (getenv("LLAMA_TRACE")) {
|
||||||
|
printf("%s: loaded data (size = %6d, ncall = %6d) for '%s'\n", __func__, int(e.size()), ncall, name.c_str());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
printf("%s: loaded %d importance matrix entries from %s\n", __func__, int(imatrix_data.size()), imatrix_file.c_str());
|
printf("%s: loaded %d importance matrix entries from %s\n", __func__, int(imatrix_data.size()), imatrix_file.c_str());
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,52 +16,50 @@ The project is under active development, and we are [looking for feedback and co
|
||||||
|
|
||||||
**Command line options:**
|
**Command line options:**
|
||||||
|
|
||||||
- `--threads N`, `-t N`: Set the number of threads to use during generation. Not used if model layers are offloaded to GPU. The server is using batching, this parameter is used only if one token is to be processed on CPU backend.
|
- `--threads N`, `-t N`: Set the number of threads to use during generation. Not used if model layers are offloaded to GPU. The server is using batching. This parameter is used only if one token is to be processed on CPU backend.
|
||||||
- `-tb N, --threads-batch N`: Set the number of threads to use during batch and prompt processing. If not specified, the number of threads will be set to the number of threads used for generation. Not used if model layers are offloaded to GPU.
|
- `-tb N, --threads-batch N`: Set the number of threads to use during batch and prompt processing. If not specified, the number of threads will be set to the number of threads used for generation. Not used if model layers are offloaded to GPU.
|
||||||
- `--threads-http N`: number of threads in the http server pool to process requests (default: `max(std::thread::hardware_concurrency() - 1, --parallel N + 2)`)
|
- `--threads-http N`: Number of threads in the http server pool to process requests. Default: `max(std::thread::hardware_concurrency() - 1, --parallel N + 2)`
|
||||||
- `-m FNAME`, `--model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.gguf`).
|
- `-m FNAME`, `--model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.gguf`).
|
||||||
- `-mu MODEL_URL --model-url MODEL_URL`: Specify a remote http url to download the file (default: unused).
|
- `-mu MODEL_URL --model-url MODEL_URL`: Specify a remote http url to download the file. Default: unused
|
||||||
- `-hfr REPO, --hf-repo REPO`: Hugging Face model repository (default: unused).
|
- `-hfr REPO, --hf-repo REPO`: Hugging Face model repository. Default: unused
|
||||||
- `-hff FILE, --hf-file FILE`: Hugging Face model file (default: unused).
|
- `-hff FILE, --hf-file FILE`: Hugging Face model file. Default: unused
|
||||||
- `-a ALIAS`, `--alias ALIAS`: Set an alias for the model. The alias will be returned in API responses.
|
- `-a ALIAS`, `--alias ALIAS`: Set an alias for the model. The alias will be returned in API responses.
|
||||||
- `-c N`, `--ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference. The size may differ in other models, for example, baichuan models were build with a context of 4096.
|
- `-c N`, `--ctx-size N`: Set the size of the prompt context. The default is `512`, but LLaMA models were built with a context of `2048`, which will provide better results for longer input/inference. The size may differ in other models, for example, baichuan models were build with a context of `4096`.
|
||||||
- `-ngl N`, `--n-gpu-layers N`: When compiled with GPU support, this option allows offloading some layers to the GPU for computation. Generally results in increased performance.
|
- `-ngl N`, `--n-gpu-layers N`: When compiled with GPU support, this option allows offloading some layers to the GPU for computation. Generally results in increased performance.
|
||||||
- `-mg i, --main-gpu i`: When using multiple GPUs this option controls which GPU is used for small tensors for which the overhead of splitting the computation across all GPUs is not worthwhile. The GPU in question will use slightly more VRAM to store a scratch buffer for temporary results. By default GPU 0 is used.
|
- `-mg i, --main-gpu i`: When using multiple GPUs, this option controls which GPU is used for small tensors for which the overhead of splitting the computation across all GPUs is not worthwhile. The GPU in question will use slightly more VRAM to store a scratch buffer for temporary results. By default, GPU `0` is used.
|
||||||
- `-ts SPLIT, --tensor-split SPLIT`: When using multiple GPUs this option controls how large tensors should be split across all GPUs. `SPLIT` is a comma-separated list of non-negative values that assigns the proportion of data that each GPU should get in order. For example, "3,2" will assign 60% of the data to GPU 0 and 40% to GPU 1. By default the data is split in proportion to VRAM but this may not be optimal for performance.
|
- `-ts SPLIT, --tensor-split SPLIT`: When using multiple GPUs, this option controls how large tensors should be split across all GPUs. `SPLIT` is a comma-separated list of non-negative values that assigns the proportion of data that each GPU should get in order. For example, "3,2" will assign 60% of the data to GPU 0 and 40% to GPU 1. By default, the data is split in proportion to VRAM, but this may not be optimal for performance.
|
||||||
- `-b N`, `--batch-size N`: Set the batch size for prompt processing. Default: `2048`.
|
- `-b N`, `--batch-size N`: Set the batch size for prompt processing. Default: `2048`
|
||||||
- `-ub N`, `--ubatch-size N`: physical maximum batch size. Default: `512`.
|
- `-ub N`, `--ubatch-size N`: Physical maximum batch size. Default: `512`
|
||||||
- `--memory-f32`: Use 32-bit floats instead of 16-bit floats for memory key+value. Not recommended.
|
|
||||||
- `--mlock`: Lock the model in memory, preventing it from being swapped out when memory-mapped.
|
- `--mlock`: Lock the model in memory, preventing it from being swapped out when memory-mapped.
|
||||||
- `--no-mmap`: Do not memory-map the model. By default, models are mapped into memory, which allows the system to load only the necessary parts of the model as needed.
|
- `--no-mmap`: Do not memory-map the model. By default, models are mapped into memory, which allows the system to load only the necessary parts of the model as needed.
|
||||||
- `--numa STRATEGY`: Attempt one of the below optimization strategies that help on some NUMA systems
|
- `--numa STRATEGY`: Attempt one of the below optimization strategies that may help on some NUMA systems
|
||||||
- `--numa distribute`: Spread execution evenly over all nodes
|
- `--numa distribute`: Spread execution evenly over all nodes
|
||||||
- `--numa isolate`: Only spawn threads on CPUs on the node that execution started on
|
- `--numa isolate`: Only spawn threads on CPUs on the node that execution started on
|
||||||
- `--numa numactl`: Use the CPU map provided by numactl
|
- `--numa numactl`: Use the CPU map provided by numactl. If run without this previously, it is recommended to drop the system
|
||||||
if run without this previously, it is recommended to drop the system page cache before using this
|
page cache before using this. See https://github.com/ggerganov/llama.cpp/issues/1437
|
||||||
see https://github.com/ggerganov/llama.cpp/issues/1437
|
|
||||||
|
|
||||||
- `--numa`: Attempt optimizations that help on some NUMA systems.
|
- `--numa`: Attempt optimizations that may help on some NUMA systems.
|
||||||
- `--lora FNAME`: Apply a LoRA (Low-Rank Adaptation) adapter to the model (implies --no-mmap). This allows you to adapt the pretrained model to specific tasks or domains.
|
- `--lora FNAME`: Apply a LoRA (Low-Rank Adaptation) adapter to the model (implies --no-mmap). This allows you to adapt the pretrained model to specific tasks or domains.
|
||||||
- `--lora-base FNAME`: Optional model to use as a base for the layers modified by the LoRA adapter. This flag is used in conjunction with the `--lora` flag, and specifies the base model for the adaptation.
|
- `--lora-base FNAME`: Optional model to use as a base for the layers modified by the LoRA adapter. This flag is used in conjunction with the `--lora` flag, and specifies the base model for the adaptation.
|
||||||
- `-to N`, `--timeout N`: Server read/write timeout in seconds. Default `600`.
|
- `-to N`, `--timeout N`: Server read/write timeout in seconds. Default `600`
|
||||||
- `--host`: Set the hostname or ip address to listen. Default `127.0.0.1`.
|
- `--host`: Set the hostname or ip address to listen. Default `127.0.0.1`
|
||||||
- `--port`: Set the port to listen. Default: `8080`.
|
- `--port`: Set the port to listen. Default: `8080`
|
||||||
- `--path`: path from which to serve static files (default: disabled)
|
- `--path`: Path from which to serve static files. Default: disabled
|
||||||
- `--api-key`: Set an api key for request authorization. By default the server responds to every request. With an api key set, the requests must have the Authorization header set with the api key as Bearer token. May be used multiple times to enable multiple valid keys.
|
- `--api-key`: Set an api key for request authorization. By default, the server responds to every request. With an api key set, the requests must have the Authorization header set with the api key as Bearer token. May be used multiple times to enable multiple valid keys.
|
||||||
- `--api-key-file`: path to file containing api keys delimited by new lines. If set, requests must include one of the keys for access. May be used in conjunction with `--api-key`'s.
|
- `--api-key-file`: Path to file containing api keys delimited by new lines. If set, requests must include one of the keys for access. May be used in conjunction with `--api-key`s.
|
||||||
- `--embedding`: Enable embedding extraction, Default: disabled.
|
- `--embedding`: Enable embedding extraction. Default: disabled
|
||||||
- `-np N`, `--parallel N`: Set the number of slots for process requests (default: 1)
|
- `-np N`, `--parallel N`: Set the number of slots for process requests. Default: `1`
|
||||||
- `-cb`, `--cont-batching`: enable continuous batching (a.k.a dynamic batching) (default: disabled)
|
- `-cb`, `--cont-batching`: Enable continuous batching (a.k.a dynamic batching). Default: disabled
|
||||||
- `-spf FNAME`, `--system-prompt-file FNAME` Set a file to load "a system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime)
|
- `-spf FNAME`, `--system-prompt-file FNAME` Set a file to load a system prompt (initial prompt of all slots). This is useful for chat applications. [See more](#change-system-prompt-on-runtime)
|
||||||
- `--mmproj MMPROJ_FILE`: Path to a multimodal projector file for LLaVA.
|
- `--mmproj MMPROJ_FILE`: Path to a multimodal projector file for LLaVA.
|
||||||
- `--grp-attn-n`: Set the group attention factor to extend context size through self-extend(default: 1=disabled), used together with group attention width `--grp-attn-w`
|
- `--grp-attn-n`: Set the group attention factor to extend context size through self-extend. Used together with group attention width `--grp-attn-w`. Default: `1`, which is disabled.
|
||||||
- `--grp-attn-w`: Set the group attention width to extend context size through self-extend(default: 512), used together with group attention factor `--grp-attn-n`
|
- `--grp-attn-w`: Set the group attention width to extend context size through self-extend. Used together with group attention factor `--grp-attn-n`. Default: `512`
|
||||||
- `-n N, --n-predict N`: Set the maximum tokens to predict (default: -1)
|
- `-n N, --n-predict N`: Set the maximum tokens to predict. Default: `-1`
|
||||||
- `--slots-endpoint-disable`: To disable slots state monitoring endpoint. Slots state may contain user data, prompts included.
|
- `--slots-endpoint-disable`: To disable slots state monitoring endpoint. Slots state may contain user data, prompts included.
|
||||||
- `--metrics`: enable prometheus `/metrics` compatible endpoint (default: disabled)
|
- `--metrics`: enable prometheus `/metrics` compatible endpoint. Default: disabled
|
||||||
- `--chat-template JINJA_TEMPLATE`: Set custom jinja chat template. This parameter accepts a string, not a file name (default: template taken from model's metadata). We only support [some pre-defined templates](https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template)
|
- `--chat-template JINJA_TEMPLATE`: Set custom jinja chat template. This parameter accepts a string, not a file name. Default: template taken from model's metadata. We only support [some pre-defined templates](https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template)
|
||||||
- `--log-disable`: Output logs to stdout only, not to `llama.log`. default: enabled.
|
- `--log-disable`: Output logs to stdout only, not to `llama.log`. Default: enabled
|
||||||
- `--log-format FORMAT`: Define the log output to FORMAT: json or text (default: json)
|
- `--log-format FORMAT`: Define the log output to FORMAT: json or text Default: `json`
|
||||||
|
|
||||||
**If compiled with `LLAMA_SERVER_SSL=ON`**
|
**If compiled with `LLAMA_SERVER_SSL=ON`**
|
||||||
- `--ssl-key-file FNAME`: path to file a PEM-encoded SSL private key
|
- `--ssl-key-file FNAME`: path to file a PEM-encoded SSL private key
|
||||||
|
@ -69,7 +67,7 @@ see https://github.com/ggerganov/llama.cpp/issues/1437
|
||||||
|
|
||||||
## Build
|
## Build
|
||||||
|
|
||||||
server is build alongside everything else from the root of the project
|
`server` is built alongside everything else from the root of the project
|
||||||
|
|
||||||
- Using `make`:
|
- Using `make`:
|
||||||
|
|
||||||
|
@ -85,7 +83,7 @@ server is build alongside everything else from the root of the project
|
||||||
|
|
||||||
## Build with SSL
|
## Build with SSL
|
||||||
|
|
||||||
server can also be built with SSL support using OpenSSL 3
|
`server` can also be built with SSL support using OpenSSL 3
|
||||||
|
|
||||||
- Using `make`:
|
- Using `make`:
|
||||||
|
|
||||||
|
@ -135,7 +133,7 @@ docker run -p 8080:8080 -v /path/to/models:/models --gpus all ghcr.io/ggerganov/
|
||||||
|
|
||||||
## Testing with CURL
|
## Testing with CURL
|
||||||
|
|
||||||
Using [curl](https://curl.se/). On Windows `curl.exe` should be available in the base OS.
|
Using [curl](https://curl.se/). On Windows, `curl.exe` should be available in the base OS.
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
curl --request POST \
|
curl --request POST \
|
||||||
|
@ -159,7 +157,7 @@ mkdir llama-client
|
||||||
cd llama-client
|
cd llama-client
|
||||||
```
|
```
|
||||||
|
|
||||||
Create a index.js file and put inside this:
|
Create a index.js file and put this inside:
|
||||||
|
|
||||||
```javascript
|
```javascript
|
||||||
const prompt = `Building a website can be done in 10 simple steps:`;
|
const prompt = `Building a website can be done in 10 simple steps:`;
|
||||||
|
@ -190,8 +188,8 @@ node index.js
|
||||||
- 503 -> `{"status": "loading model"}` if the model is still being loaded.
|
- 503 -> `{"status": "loading model"}` if the model is still being loaded.
|
||||||
- 500 -> `{"status": "error"}` if the model failed to load.
|
- 500 -> `{"status": "error"}` if the model failed to load.
|
||||||
- 200 -> `{"status": "ok", "slots_idle": 1, "slots_processing": 2 }` if the model is successfully loaded and the server is ready for further requests mentioned below.
|
- 200 -> `{"status": "ok", "slots_idle": 1, "slots_processing": 2 }` if the model is successfully loaded and the server is ready for further requests mentioned below.
|
||||||
- 200 -> `{"status": "no slot available", "slots_idle": 0, "slots_processing": 32}` if no slot are currently available.
|
- 200 -> `{"status": "no slot available", "slots_idle": 0, "slots_processing": 32}` if no slots are currently available.
|
||||||
- 503 -> `{"status": "no slot available", "slots_idle": 0, "slots_processing": 32}` if the query parameter `fail_on_no_slot` is provided and no slot are currently available.
|
- 503 -> `{"status": "no slot available", "slots_idle": 0, "slots_processing": 32}` if the query parameter `fail_on_no_slot` is provided and no slots are currently available.
|
||||||
|
|
||||||
If the query parameter `include_slots` is passed, `slots` field will contain internal slots data except if `--slots-endpoint-disable` is set.
|
If the query parameter `include_slots` is passed, `slots` field will contain internal slots data except if `--slots-endpoint-disable` is set.
|
||||||
|
|
||||||
|
@ -205,75 +203,75 @@ node index.js
|
||||||
- The model's `tokenizer.ggml.add_bos_token` metadata is `true`
|
- The model's `tokenizer.ggml.add_bos_token` metadata is `true`
|
||||||
- The system prompt is empty
|
- The system prompt is empty
|
||||||
|
|
||||||
`temperature`: Adjust the randomness of the generated text (default: 0.8).
|
`temperature`: Adjust the randomness of the generated text. Default: `0.8`
|
||||||
|
|
||||||
`dynatemp_range`: Dynamic temperature range. The final temperature will be in the range of `[temperature - dynatemp_range; temperature + dynatemp_range]` (default: 0.0, 0.0 = disabled).
|
`dynatemp_range`: Dynamic temperature range. The final temperature will be in the range of `[temperature - dynatemp_range; temperature + dynatemp_range]` Default: `0.0`, which is disabled.
|
||||||
|
|
||||||
`dynatemp_exponent`: Dynamic temperature exponent (default: 1.0).
|
`dynatemp_exponent`: Dynamic temperature exponent. Default: `1.0`
|
||||||
|
|
||||||
`top_k`: Limit the next token selection to the K most probable tokens (default: 40).
|
`top_k`: Limit the next token selection to the K most probable tokens. Default: `40`
|
||||||
|
|
||||||
`top_p`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.95).
|
`top_p`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P. Default: `0.95`
|
||||||
|
|
||||||
`min_p`: The minimum probability for a token to be considered, relative to the probability of the most likely token (default: 0.05).
|
`min_p`: The minimum probability for a token to be considered, relative to the probability of the most likely token. Default: `0.05`
|
||||||
|
|
||||||
`n_predict`: Set the maximum number of tokens to predict when generating text. **Note:** May exceed the set limit slightly if the last token is a partial multibyte character. When 0, no tokens will be generated but the prompt is evaluated into the cache. (default: -1, -1 = infinity).
|
`n_predict`: Set the maximum number of tokens to predict when generating text. **Note:** May exceed the set limit slightly if the last token is a partial multibyte character. When 0, no tokens will be generated but the prompt is evaluated into the cache. Default: `-1`, where `-1` is infinity.
|
||||||
|
|
||||||
`n_keep`: Specify the number of tokens from the prompt to retain when the context size is exceeded and tokens need to be discarded.
|
`n_keep`: Specify the number of tokens from the prompt to retain when the context size is exceeded and tokens need to be discarded.
|
||||||
By default, this value is set to 0 (meaning no tokens are kept). Use `-1` to retain all tokens from the prompt.
|
By default, this value is set to `0`, meaning no tokens are kept. Use `-1` to retain all tokens from the prompt.
|
||||||
|
|
||||||
`stream`: It allows receiving each predicted token in real-time instead of waiting for the completion to finish. To enable this, set to `true`.
|
`stream`: It allows receiving each predicted token in real-time instead of waiting for the completion to finish. To enable this, set to `true`.
|
||||||
|
|
||||||
`stop`: Specify a JSON array of stopping strings.
|
`stop`: Specify a JSON array of stopping strings.
|
||||||
These words will not be included in the completion, so make sure to add them to the prompt for the next iteration (default: []).
|
These words will not be included in the completion, so make sure to add them to the prompt for the next iteration. Default: `[]`
|
||||||
|
|
||||||
`tfs_z`: Enable tail free sampling with parameter z (default: 1.0, 1.0 = disabled).
|
`tfs_z`: Enable tail free sampling with parameter z. Default: `1.0`, which is disabled.
|
||||||
|
|
||||||
`typical_p`: Enable locally typical sampling with parameter p (default: 1.0, 1.0 = disabled).
|
`typical_p`: Enable locally typical sampling with parameter p. Default: `1.0`, which is disabled.
|
||||||
|
|
||||||
`repeat_penalty`: Control the repetition of token sequences in the generated text (default: 1.1).
|
`repeat_penalty`: Control the repetition of token sequences in the generated text. Default: `1.1`
|
||||||
|
|
||||||
`repeat_last_n`: Last n tokens to consider for penalizing repetition (default: 64, 0 = disabled, -1 = ctx-size).
|
`repeat_last_n`: Last n tokens to consider for penalizing repetition. Default: `64`, where `0` is disabled and `-1` is ctx-size.
|
||||||
|
|
||||||
`penalize_nl`: Penalize newline tokens when applying the repeat penalty (default: true).
|
`penalize_nl`: Penalize newline tokens when applying the repeat penalty. Default: `true`
|
||||||
|
|
||||||
`presence_penalty`: Repeat alpha presence penalty (default: 0.0, 0.0 = disabled).
|
`presence_penalty`: Repeat alpha presence penalty. Default: `0.0`, which is disabled.
|
||||||
|
|
||||||
`frequency_penalty`: Repeat alpha frequency penalty (default: 0.0, 0.0 = disabled);
|
`frequency_penalty`: Repeat alpha frequency penalty. Default: `0.0`, which is disabled.
|
||||||
|
|
||||||
`penalty_prompt`: This will replace the `prompt` for the purpose of the penalty evaluation. Can be either `null`, a string or an array of numbers representing tokens (default: `null` = use the original `prompt`).
|
`penalty_prompt`: This will replace the `prompt` for the purpose of the penalty evaluation. Can be either `null`, a string or an array of numbers representing tokens. Default: `null`, which is to use the original `prompt`.
|
||||||
|
|
||||||
`mirostat`: Enable Mirostat sampling, controlling perplexity during text generation (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0).
|
`mirostat`: Enable Mirostat sampling, controlling perplexity during text generation. Default: `0`, where `0` is disabled, `1` is Mirostat, and `2` is Mirostat 2.0.
|
||||||
|
|
||||||
`mirostat_tau`: Set the Mirostat target entropy, parameter tau (default: 5.0).
|
`mirostat_tau`: Set the Mirostat target entropy, parameter tau. Default: `5.0`
|
||||||
|
|
||||||
`mirostat_eta`: Set the Mirostat learning rate, parameter eta (default: 0.1).
|
`mirostat_eta`: Set the Mirostat learning rate, parameter eta. Default: `0.1`
|
||||||
|
|
||||||
`grammar`: Set grammar for grammar-based sampling (default: no grammar)
|
`grammar`: Set grammar for grammar-based sampling. Default: no grammar
|
||||||
|
|
||||||
`seed`: Set the random number generator (RNG) seed (default: -1, -1 = random seed).
|
`seed`: Set the random number generator (RNG) seed. Default: `-1`, which is a random seed.
|
||||||
|
|
||||||
`ignore_eos`: Ignore end of stream token and continue generating (default: false).
|
`ignore_eos`: Ignore end of stream token and continue generating. Default: `false`
|
||||||
|
|
||||||
`logit_bias`: Modify the likelihood of a token appearing in the generated text completion. For example, use `"logit_bias": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `"logit_bias": [[15043,-1.0]]` to decrease its likelihood. Setting the value to false, `"logit_bias": [[15043,false]]` ensures that the token `Hello` is never produced. The tokens can also be represented as strings, e.g. `[["Hello, World!",-0.5]]` will reduce the likelihood of all the individual tokens that represent the string `Hello, World!`, just like the `presence_penalty` does. (default: []).
|
`logit_bias`: Modify the likelihood of a token appearing in the generated text completion. For example, use `"logit_bias": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `"logit_bias": [[15043,-1.0]]` to decrease its likelihood. Setting the value to false, `"logit_bias": [[15043,false]]` ensures that the token `Hello` is never produced. The tokens can also be represented as strings, e.g. `[["Hello, World!",-0.5]]` will reduce the likelihood of all the individual tokens that represent the string `Hello, World!`, just like the `presence_penalty` does. Default: `[]`
|
||||||
|
|
||||||
`n_probs`: If greater than 0, the response also contains the probabilities of top N tokens for each generated token (default: 0)
|
`n_probs`: If greater than 0, the response also contains the probabilities of top N tokens for each generated token. Default: `0`
|
||||||
|
|
||||||
`min_keep`: If greater than 0, force samplers to return N possible tokens at minimum (default: 0)
|
`min_keep`: If greater than 0, force samplers to return N possible tokens at minimum. Default: `0`
|
||||||
|
|
||||||
`image_data`: An array of objects to hold base64-encoded image `data` and its `id`s to be reference in `prompt`. You can determine the place of the image in the prompt as in the following: `USER:[img-12]Describe the image in detail.\nASSISTANT:`. In this case, `[img-12]` will be replaced by the embeddings of the image with id `12` in the following `image_data` array: `{..., "image_data": [{"data": "<BASE64_STRING>", "id": 12}]}`. Use `image_data` only with multimodal models, e.g., LLaVA.
|
`image_data`: An array of objects to hold base64-encoded image `data` and its `id`s to be reference in `prompt`. You can determine the place of the image in the prompt as in the following: `USER:[img-12]Describe the image in detail.\nASSISTANT:`. In this case, `[img-12]` will be replaced by the embeddings of the image with id `12` in the following `image_data` array: `{..., "image_data": [{"data": "<BASE64_STRING>", "id": 12}]}`. Use `image_data` only with multimodal models, e.g., LLaVA.
|
||||||
|
|
||||||
`id_slot`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot (default: -1)
|
`id_slot`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot. Default: `-1`
|
||||||
|
|
||||||
`cache_prompt`: Re-use previously cached prompt from the last request if possible. This may prevent re-caching the prompt from scratch. (default: false)
|
`cache_prompt`: Re-use previously cached prompt from the last request if possible. This may prevent re-caching the prompt from scratch. Default: `false`
|
||||||
|
|
||||||
`system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime)
|
`system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime)
|
||||||
|
|
||||||
`samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. (default: `["top_k", "tfs_z", "typical_p", "top_p", "min_p", "temperature"]` - these are all the available values)
|
`samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. Default: `["top_k", "tfs_z", "typical_p", "top_p", "min_p", "temperature"]` - these are all the available values.
|
||||||
|
|
||||||
### Result JSON
|
### Result JSON
|
||||||
|
|
||||||
- Note: When using streaming mode (`stream`) only `content` and `stop` will be returned until end of completion.
|
- Note: When using streaming mode (`stream`), only `content` and `stop` will be returned until end of completion.
|
||||||
|
|
||||||
- `completion_probabilities`: An array of token probabilities for each completion. The array's length is `n_predict`. Each item in the array has the following structure:
|
- `completion_probabilities`: An array of token probabilities for each completion. The array's length is `n_predict`. Each item in the array has the following structure:
|
||||||
|
|
||||||
|
@ -287,7 +285,7 @@ node index.js
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"prob": float,
|
"prob": float,
|
||||||
"tok_str": "<second most likely tonen>"
|
"tok_str": "<second most likely token>"
|
||||||
},
|
},
|
||||||
...
|
...
|
||||||
]
|
]
|
||||||
|
@ -357,14 +355,14 @@ Notice that each `probs` is an array of length `n_probs`.
|
||||||
|
|
||||||
- `assistant_name` - the required assistant name to generate the prompt in case you have specified a system prompt for all slots.
|
- `assistant_name` - the required assistant name to generate the prompt in case you have specified a system prompt for all slots.
|
||||||
- `user_name` - the required anti-prompt to generate the prompt in case you have specified a system prompt for all slots.
|
- `user_name` - the required anti-prompt to generate the prompt in case you have specified a system prompt for all slots.
|
||||||
- `default_generation_settings` - the default generation settings for the `/completion` endpoint, has the same fields as the `generation_settings` response object from the `/completion` endpoint.
|
- `default_generation_settings` - the default generation settings for the `/completion` endpoint, which has the same fields as the `generation_settings` response object from the `/completion` endpoint.
|
||||||
- `total_slots` - the total number of slots for process requests (defined by `--parallel` option)
|
- `total_slots` - the total number of slots for process requests (defined by `--parallel` option)
|
||||||
|
|
||||||
- **POST** `/v1/chat/completions`: OpenAI-compatible Chat Completions API. Given a ChatML-formatted json description in `messages`, it returns the predicted completion. Both synchronous and streaming mode are supported, so scripted and interactive applications work fine. While no strong claims of compatibility with OpenAI API spec is being made, in our experience it suffices to support many apps. Only model with [supported chat template](https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template) can be used optimally with this endpoint. By default, ChatML template will be used.
|
- **POST** `/v1/chat/completions`: OpenAI-compatible Chat Completions API. Given a ChatML-formatted json description in `messages`, it returns the predicted completion. Both synchronous and streaming mode are supported, so scripted and interactive applications work fine. While no strong claims of compatibility with OpenAI API spec is being made, in our experience it suffices to support many apps. Only model with [supported chat template](https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template) can be used optimally with this endpoint. By default, ChatML template will be used.
|
||||||
|
|
||||||
*Options:*
|
*Options:*
|
||||||
|
|
||||||
See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs/api-reference/chat). While some OpenAI-specific features such as function calling aren't supported, llama.cpp `/completion`-specific features such are `mirostat` are supported.
|
See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs/api-reference/chat). While some OpenAI-specific features such as function calling aren't supported, llama.cpp `/completion`-specific features such as `mirostat` are supported.
|
||||||
|
|
||||||
*Examples:*
|
*Examples:*
|
||||||
|
|
||||||
|
@ -514,16 +512,16 @@ Available metrics:
|
||||||
- `llamacpp:tokens_predicted_total`: Number of generation tokens processed.
|
- `llamacpp:tokens_predicted_total`: Number of generation tokens processed.
|
||||||
- `llamacpp:prompt_tokens_seconds`: Average prompt throughput in tokens/s.
|
- `llamacpp:prompt_tokens_seconds`: Average prompt throughput in tokens/s.
|
||||||
- `llamacpp:predicted_tokens_seconds`: Average generation throughput in tokens/s.
|
- `llamacpp:predicted_tokens_seconds`: Average generation throughput in tokens/s.
|
||||||
- `llamacpp:kv_cache_usage_ratio`: KV-cache usage. 1 means 100 percent usage.
|
- `llamacpp:kv_cache_usage_ratio`: KV-cache usage. `1` means 100 percent usage.
|
||||||
- `llamacpp:kv_cache_tokens`: KV-cache tokens.
|
- `llamacpp:kv_cache_tokens`: KV-cache tokens.
|
||||||
- `llamacpp:requests_processing`: Number of request processing.
|
- `llamacpp:requests_processing`: Number of requests processing.
|
||||||
- `llamacpp:requests_deferred`: Number of request deferred.
|
- `llamacpp:requests_deferred`: Number of requests deferred.
|
||||||
|
|
||||||
## More examples
|
## More examples
|
||||||
|
|
||||||
### Change system prompt on runtime
|
### Change system prompt on runtime
|
||||||
|
|
||||||
To use the server example to serve multiple chat-type clients while keeping the same system prompt, you can utilize the option `system_prompt` to achieve that. This only needs to be done once to establish it.
|
To use the server example to serve multiple chat-type clients while keeping the same system prompt, you can utilize the option `system_prompt`. This only needs to be used once.
|
||||||
|
|
||||||
`prompt`: Specify a context that you want all connecting clients to respect.
|
`prompt`: Specify a context that you want all connecting clients to respect.
|
||||||
|
|
||||||
|
@ -562,11 +560,11 @@ bash chat.sh
|
||||||
|
|
||||||
### OAI-like API
|
### OAI-like API
|
||||||
|
|
||||||
The HTTP server supports OAI-like API: https://github.com/openai/openai-openapi
|
The HTTP `server` supports an OAI-like API: https://github.com/openai/openai-openapi
|
||||||
|
|
||||||
### API errors
|
### API errors
|
||||||
|
|
||||||
Server returns error in the same format as OAI: https://github.com/openai/openai-openapi
|
`server` returns errors in the same format as OAI: https://github.com/openai/openai-openapi
|
||||||
|
|
||||||
Example of an error:
|
Example of an error:
|
||||||
|
|
||||||
|
|
|
@ -2,13 +2,15 @@
|
||||||
|
|
||||||
Benchmark is using [k6](https://k6.io/).
|
Benchmark is using [k6](https://k6.io/).
|
||||||
|
|
||||||
##### Install k6
|
##### Install k6 and sse extension
|
||||||
|
|
||||||
Follow instruction from: https://k6.io/docs/get-started/installation/
|
SSE is not supported by default in k6, you have to build k6 with the [xk6-sse](https://github.com/phymbert/xk6-sse) extension.
|
||||||
|
|
||||||
Example for ubuntu:
|
Example:
|
||||||
```shell
|
```shell
|
||||||
snap install k6
|
go install go.k6.io/xk6/cmd/xk6@latest
|
||||||
|
xk6 build master \
|
||||||
|
--with github.com/phymbert/xk6-sse
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Download a dataset
|
#### Download a dataset
|
||||||
|
@ -46,7 +48,7 @@ server --host localhost --port 8080 \
|
||||||
|
|
||||||
For 500 chat completions request with 8 concurrent users during maximum 10 minutes, run:
|
For 500 chat completions request with 8 concurrent users during maximum 10 minutes, run:
|
||||||
```shell
|
```shell
|
||||||
k6 run script.js --duration 10m --iterations 500 --vus 8
|
./k6 run script.js --duration 10m --iterations 500 --vus 8
|
||||||
```
|
```
|
||||||
|
|
||||||
The benchmark values can be overridden with:
|
The benchmark values can be overridden with:
|
||||||
|
@ -86,3 +88,33 @@ K6 metrics might be compared against [server metrics](../README.md), with:
|
||||||
```shell
|
```shell
|
||||||
curl http://localhost:8080/metrics
|
curl http://localhost:8080/metrics
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Using the CI python script
|
||||||
|
The `bench.py` script does several steps:
|
||||||
|
- start the server
|
||||||
|
- define good variable for k6
|
||||||
|
- run k6 script
|
||||||
|
- extract metrics from prometheus
|
||||||
|
|
||||||
|
It aims to be used in the CI, but you can run it manually:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
LLAMA_SERVER_BIN_PATH=../../../cmake-build-release/bin/server python bench.py \
|
||||||
|
--runner-label local \
|
||||||
|
--name local \
|
||||||
|
--branch `git rev-parse --abbrev-ref HEAD` \
|
||||||
|
--commit `git rev-parse HEAD` \
|
||||||
|
--scenario script.js \
|
||||||
|
--duration 5m \
|
||||||
|
--hf-repo ggml-org/models \
|
||||||
|
--hf-file phi-2/ggml-model-q4_0.gguf \
|
||||||
|
--model-path-prefix models \
|
||||||
|
--parallel 4 \
|
||||||
|
-ngl 33 \
|
||||||
|
--batch-size 2048 \
|
||||||
|
--ubatch-size 256 \
|
||||||
|
--ctx-size 4096 \
|
||||||
|
--n-prompts 200 \
|
||||||
|
--max-prompt-tokens 256 \
|
||||||
|
--max-tokens 256
|
||||||
|
```
|
||||||
|
|
308
examples/server/bench/bench.py
Normal file
308
examples/server/bench/bench.py
Normal file
|
@ -0,0 +1,308 @@
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import signal
|
||||||
|
import socket
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import traceback
|
||||||
|
from contextlib import closing
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
import matplotlib.dates
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import requests
|
||||||
|
from statistics import mean
|
||||||
|
|
||||||
|
|
||||||
|
def main(args_in: list[str] | None = None) -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="Start server benchmark scenario")
|
||||||
|
parser.add_argument("--name", type=str, help="Bench name", required=True)
|
||||||
|
parser.add_argument("--runner-label", type=str, help="Runner label", required=True)
|
||||||
|
parser.add_argument("--branch", type=str, help="Branch name", default="detached")
|
||||||
|
parser.add_argument("--commit", type=str, help="Commit name", default="dirty")
|
||||||
|
parser.add_argument("--host", type=str, help="Server listen host", default="0.0.0.0")
|
||||||
|
parser.add_argument("--port", type=int, help="Server listen host", default="8080")
|
||||||
|
parser.add_argument("--model-path-prefix", type=str, help="Prefix where to store the model files", default="models")
|
||||||
|
parser.add_argument("--n-prompts", type=int,
|
||||||
|
help="SERVER_BENCH_N_PROMPTS: total prompts to randomly select in the benchmark", required=True)
|
||||||
|
parser.add_argument("--max-prompt-tokens", type=int,
|
||||||
|
help="SERVER_BENCH_MAX_PROMPT_TOKENS: maximum prompt tokens to filter out in the dataset",
|
||||||
|
required=True)
|
||||||
|
parser.add_argument("--max-tokens", type=int,
|
||||||
|
help="SERVER_BENCH_MAX_CONTEXT: maximum context size of the completions request to filter out in the dataset: prompt + predicted tokens",
|
||||||
|
required=True)
|
||||||
|
parser.add_argument("--hf-repo", type=str, help="Hugging Face model repository", required=True)
|
||||||
|
parser.add_argument("--hf-file", type=str, help="Hugging Face model file", required=True)
|
||||||
|
parser.add_argument("-ngl", "--n-gpu-layers", type=int, help="layers to the GPU for computation", required=True)
|
||||||
|
parser.add_argument("--ctx-size", type=int, help="Set the size of the prompt context", required=True)
|
||||||
|
parser.add_argument("--parallel", type=int, help="Set the number of slots for process requests", required=True)
|
||||||
|
parser.add_argument("--batch-size", type=int, help="Set the batch size for prompt processing", required=True)
|
||||||
|
parser.add_argument("--ubatch-size", type=int, help="physical maximum batch size", required=True)
|
||||||
|
parser.add_argument("--scenario", type=str, help="Scenario to run", required=True)
|
||||||
|
parser.add_argument("--duration", type=str, help="Bench scenario", required=True)
|
||||||
|
|
||||||
|
args = parser.parse_args(args_in)
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
# Start the server and performance scenario
|
||||||
|
try:
|
||||||
|
server_process = start_server(args)
|
||||||
|
except Exception:
|
||||||
|
print("bench: server start error :")
|
||||||
|
traceback.print_exc(file=sys.stdout)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# start the benchmark
|
||||||
|
try:
|
||||||
|
start_benchmark(args)
|
||||||
|
|
||||||
|
iterations = 0
|
||||||
|
with open("results.github.env", 'w') as github_env:
|
||||||
|
# parse output
|
||||||
|
with open('k6-results.json', 'r') as bench_results:
|
||||||
|
# Load JSON data from file
|
||||||
|
data = json.load(bench_results)
|
||||||
|
for metric_name in data['metrics']:
|
||||||
|
for metric_metric in data['metrics'][metric_name]:
|
||||||
|
value = data['metrics'][metric_name][metric_metric]
|
||||||
|
if isinstance(value, float) or isinstance(value, int):
|
||||||
|
value = round(value, 2)
|
||||||
|
data['metrics'][metric_name][metric_metric]=value
|
||||||
|
github_env.write(
|
||||||
|
f"{escape_metric_name(metric_name)}_{escape_metric_name(metric_metric)}={value}\n")
|
||||||
|
iterations = data['root_group']['checks']['success completion']['passes']
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
print("bench: error :")
|
||||||
|
traceback.print_exc(file=sys.stdout)
|
||||||
|
|
||||||
|
# Stop the server
|
||||||
|
if server_process:
|
||||||
|
try:
|
||||||
|
print(f"bench: shutting down server pid={server_process.pid} ...")
|
||||||
|
if os.name == 'nt':
|
||||||
|
interrupt = signal.CTRL_C_EVENT
|
||||||
|
else:
|
||||||
|
interrupt = signal.SIGINT
|
||||||
|
server_process.send_signal(interrupt)
|
||||||
|
server_process.wait(0.5)
|
||||||
|
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
print(f"server still alive after 500ms, force-killing pid={server_process.pid} ...")
|
||||||
|
server_process.kill() # SIGKILL
|
||||||
|
server_process.wait()
|
||||||
|
|
||||||
|
while is_server_listening(args.host, args.port):
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
title = (f"llama.cpp {args.name} on {args.runner_label}\n "
|
||||||
|
f"duration={args.duration} {iterations} iterations")
|
||||||
|
xlabel = (f"{args.hf_repo}/{args.hf_file}\n"
|
||||||
|
f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size} pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
|
||||||
|
f"branch={args.branch} commit={args.commit}")
|
||||||
|
|
||||||
|
# Prometheus
|
||||||
|
end_time = time.time()
|
||||||
|
prometheus_metrics = {}
|
||||||
|
if is_server_listening("0.0.0.0", 9090):
|
||||||
|
metrics = ['prompt_tokens_seconds', 'predicted_tokens_seconds',
|
||||||
|
'kv_cache_usage_ratio', 'requests_processing', 'requests_deferred']
|
||||||
|
|
||||||
|
for metric in metrics:
|
||||||
|
resp = requests.get(f"http://localhost:9090/api/v1/query_range",
|
||||||
|
params={'query': 'llamacpp:' + metric, 'start': start_time, 'end': end_time, 'step': 2})
|
||||||
|
|
||||||
|
with open(f"{metric}.json", 'w') as metric_json:
|
||||||
|
metric_json.write(resp.text)
|
||||||
|
|
||||||
|
if resp.status_code != 200:
|
||||||
|
print(f"bench: unable to extract prometheus metric {metric}: {resp.text}")
|
||||||
|
else:
|
||||||
|
metric_data = resp.json()
|
||||||
|
values = metric_data['data']['result'][0]['values']
|
||||||
|
timestamps, metric_values = zip(*values)
|
||||||
|
metric_values = [float(value) for value in metric_values]
|
||||||
|
prometheus_metrics[metric] = metric_values
|
||||||
|
timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
|
||||||
|
plt.figure(figsize=(16, 10), dpi=80)
|
||||||
|
plt.plot(timestamps_dt, metric_values, label=metric)
|
||||||
|
plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
|
||||||
|
plt.yticks(fontsize=12, alpha=.7)
|
||||||
|
|
||||||
|
ylabel = f"llamacpp:{metric}"
|
||||||
|
plt.title(title,
|
||||||
|
fontsize=14, wrap=True)
|
||||||
|
plt.grid(axis='both', alpha=.3)
|
||||||
|
plt.ylabel(ylabel, fontsize=22)
|
||||||
|
plt.xlabel(xlabel, fontsize=14, wrap=True)
|
||||||
|
plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator())
|
||||||
|
plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y-%m-%d %H:%M:%S"))
|
||||||
|
plt.gcf().autofmt_xdate()
|
||||||
|
|
||||||
|
# Remove borders
|
||||||
|
plt.gca().spines["top"].set_alpha(0.0)
|
||||||
|
plt.gca().spines["bottom"].set_alpha(0.3)
|
||||||
|
plt.gca().spines["right"].set_alpha(0.0)
|
||||||
|
plt.gca().spines["left"].set_alpha(0.3)
|
||||||
|
|
||||||
|
# Save the plot as a jpg image
|
||||||
|
plt.savefig(f'{metric}.jpg', dpi=60)
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
# Mermaid format in case images upload failed
|
||||||
|
with (open(f"{metric}.mermaid", 'w') as mermaid_f):
|
||||||
|
mermaid = (
|
||||||
|
f"""---
|
||||||
|
config:
|
||||||
|
xyChart:
|
||||||
|
titleFontSize: 12
|
||||||
|
width: 900
|
||||||
|
height: 600
|
||||||
|
themeVariables:
|
||||||
|
xyChart:
|
||||||
|
titleColor: "#000000"
|
||||||
|
---
|
||||||
|
xychart-beta
|
||||||
|
title "{title}"
|
||||||
|
y-axis "llamacpp:{metric}"
|
||||||
|
x-axis "llamacpp:{metric}" {int(min(timestamps))} --> {int(max(timestamps))}
|
||||||
|
line [{', '.join([str(round(float(value), 2)) for value in metric_values])}]
|
||||||
|
""")
|
||||||
|
mermaid_f.write(mermaid)
|
||||||
|
|
||||||
|
# 140 chars max for commit status description
|
||||||
|
bench_results = {
|
||||||
|
"i": iterations,
|
||||||
|
"req": {
|
||||||
|
"p95": round(data['metrics']["http_req_duration"]["p(95)"], 2),
|
||||||
|
"avg": round(data['metrics']["http_req_duration"]["avg"], 2),
|
||||||
|
},
|
||||||
|
"pp": {
|
||||||
|
"p95": round(data['metrics']["llamacpp_prompt_processing_second"]["p(95)"], 2),
|
||||||
|
"avg": round(data['metrics']["llamacpp_prompt_processing_second"]["avg"], 2),
|
||||||
|
"0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2),
|
||||||
|
},
|
||||||
|
"tg": {
|
||||||
|
"p95": round(data['metrics']["llamacpp_tokens_second"]["p(95)"], 2),
|
||||||
|
"avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
|
||||||
|
"0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
with open("results.github.env", 'a') as github_env:
|
||||||
|
github_env.write(f"BENCH_RESULTS={json.dumps(bench_results, indent=None, separators=(',', ':') )}\n")
|
||||||
|
github_env.write(f"BENCH_ITERATIONS={iterations}\n")
|
||||||
|
|
||||||
|
title = title.replace('\n', ' ')
|
||||||
|
xlabel = xlabel.replace('\n', ' ')
|
||||||
|
github_env.write(f"BENCH_GRAPH_TITLE={title}\n")
|
||||||
|
github_env.write(f"BENCH_GRAPH_XLABEL={xlabel}\n")
|
||||||
|
|
||||||
|
|
||||||
|
def start_benchmark(args):
|
||||||
|
k6_path = './k6'
|
||||||
|
if 'BENCH_K6_BIN_PATH' in os.environ:
|
||||||
|
k6_path = os.environ['BENCH_K6_BIN_PATH']
|
||||||
|
k6_args = [
|
||||||
|
'run', args.scenario,
|
||||||
|
'--no-color',
|
||||||
|
]
|
||||||
|
k6_args.extend(['--duration', args.duration])
|
||||||
|
k6_args.extend(['--iterations', args.n_prompts])
|
||||||
|
k6_args.extend(['--vus', args.parallel])
|
||||||
|
k6_args.extend(['--summary-export', 'k6-results.json'])
|
||||||
|
args = f"SERVER_BENCH_N_PROMPTS={args.n_prompts} SERVER_BENCH_MAX_PROMPT_TOKENS={args.max_prompt_tokens} SERVER_BENCH_MAX_CONTEXT={args.max_tokens} "
|
||||||
|
args = args + ' '.join([str(arg) for arg in [k6_path, *k6_args]])
|
||||||
|
print(f"bench: starting k6 with: {args}")
|
||||||
|
k6_completed = subprocess.run(args, shell=True, stdout=sys.stdout, stderr=sys.stderr)
|
||||||
|
if k6_completed.returncode != 0:
|
||||||
|
raise Exception("bench: unable to run k6")
|
||||||
|
|
||||||
|
|
||||||
|
def start_server(args):
|
||||||
|
server_process = start_server_background(args)
|
||||||
|
|
||||||
|
attempts = 0
|
||||||
|
max_attempts = 20
|
||||||
|
if 'GITHUB_ACTIONS' in os.environ:
|
||||||
|
max_attempts *= 2
|
||||||
|
|
||||||
|
while not is_server_listening(args.host, args.port):
|
||||||
|
attempts += 1
|
||||||
|
if attempts > max_attempts:
|
||||||
|
assert False, "server not started"
|
||||||
|
print(f"bench: waiting for server to start ...")
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
print("bench: server started.")
|
||||||
|
return server_process
|
||||||
|
|
||||||
|
|
||||||
|
def start_server_background(args):
|
||||||
|
# Start the server
|
||||||
|
server_path = '../../../build/bin/server'
|
||||||
|
if 'LLAMA_SERVER_BIN_PATH' in os.environ:
|
||||||
|
server_path = os.environ['LLAMA_SERVER_BIN_PATH']
|
||||||
|
server_args = [
|
||||||
|
'--host', args.host,
|
||||||
|
'--port', args.port,
|
||||||
|
]
|
||||||
|
model_file = args.model_path_prefix + os.path.sep + args.hf_file
|
||||||
|
model_dir = os.path.dirname(model_file)
|
||||||
|
if not os.path.exists(model_dir):
|
||||||
|
os.makedirs(model_dir)
|
||||||
|
server_args.extend(['--model', model_file])
|
||||||
|
server_args.extend(['--hf-repo', args.hf_repo])
|
||||||
|
server_args.extend(['--hf-file', args.hf_file])
|
||||||
|
server_args.extend(['--n-gpu-layers', args.n_gpu_layers])
|
||||||
|
server_args.extend(['--ctx-size', args.ctx_size])
|
||||||
|
server_args.extend(['--parallel', args.parallel])
|
||||||
|
server_args.extend(['--batch-size', args.batch_size])
|
||||||
|
server_args.extend(['--ubatch-size', args.ubatch_size])
|
||||||
|
server_args.extend(['--n-predict', args.max_tokens * 2])
|
||||||
|
server_args.extend(['--defrag-thold', "0.1"])
|
||||||
|
server_args.append('--cont-batching')
|
||||||
|
server_args.append('--metrics')
|
||||||
|
server_args.extend(['--log-format', "text"])
|
||||||
|
args = [str(arg) for arg in [server_path, *server_args]]
|
||||||
|
print(f"bench: starting server with: {' '.join(args)}")
|
||||||
|
pkwargs = {
|
||||||
|
'stdout': subprocess.PIPE,
|
||||||
|
'stderr': subprocess.PIPE
|
||||||
|
}
|
||||||
|
server_process = subprocess.Popen(
|
||||||
|
args,
|
||||||
|
**pkwargs)
|
||||||
|
|
||||||
|
def server_log(in_stream, out_stream):
|
||||||
|
for line in iter(in_stream.readline, b''):
|
||||||
|
print(line.decode('utf-8'), end='', file=out_stream)
|
||||||
|
|
||||||
|
thread_stdout = threading.Thread(target=server_log, args=(server_process.stdout, sys.stdout))
|
||||||
|
thread_stdout.start()
|
||||||
|
thread_stderr = threading.Thread(target=server_log, args=(server_process.stderr, sys.stderr))
|
||||||
|
thread_stderr.start()
|
||||||
|
|
||||||
|
return server_process
|
||||||
|
|
||||||
|
|
||||||
|
def is_server_listening(server_fqdn, server_port):
|
||||||
|
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
|
||||||
|
result = sock.connect_ex((server_fqdn, server_port))
|
||||||
|
_is_server_listening = result == 0
|
||||||
|
if _is_server_listening:
|
||||||
|
print(f"server is listening on {server_fqdn}:{server_port}...")
|
||||||
|
return _is_server_listening
|
||||||
|
|
||||||
|
|
||||||
|
def escape_metric_name(metric_name):
|
||||||
|
return re.sub('[^A-Z0-9]', '_', metric_name.upper())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
9
examples/server/bench/prometheus.yml
Normal file
9
examples/server/bench/prometheus.yml
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
global:
|
||||||
|
scrape_interval: 10s
|
||||||
|
external_labels:
|
||||||
|
llamacpp: 'server'
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'llama.cpp server'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:8080']
|
2
examples/server/bench/requirements.txt
Normal file
2
examples/server/bench/requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
matplotlib
|
||||||
|
requests
|
|
@ -1,4 +1,4 @@
|
||||||
import http from 'k6/http'
|
import sse from 'k6/x/sse'
|
||||||
import {check, sleep} from 'k6'
|
import {check, sleep} from 'k6'
|
||||||
import {SharedArray} from 'k6/data'
|
import {SharedArray} from 'k6/data'
|
||||||
import {Counter, Rate, Trend} from 'k6/metrics'
|
import {Counter, Rate, Trend} from 'k6/metrics'
|
||||||
|
@ -53,7 +53,9 @@ const data = new SharedArray('conversations', function () {
|
||||||
|
|
||||||
const llamacpp_prompt_tokens = new Trend('llamacpp_prompt_tokens')
|
const llamacpp_prompt_tokens = new Trend('llamacpp_prompt_tokens')
|
||||||
const llamacpp_completion_tokens = new Trend('llamacpp_completion_tokens')
|
const llamacpp_completion_tokens = new Trend('llamacpp_completion_tokens')
|
||||||
|
|
||||||
const llamacpp_tokens_second = new Trend('llamacpp_tokens_second')
|
const llamacpp_tokens_second = new Trend('llamacpp_tokens_second')
|
||||||
|
const llamacpp_prompt_processing_second = new Trend('llamacpp_prompt_processing_second')
|
||||||
|
|
||||||
const llamacpp_prompt_tokens_total_counter = new Counter('llamacpp_prompt_tokens_total_counter')
|
const llamacpp_prompt_tokens_total_counter = new Counter('llamacpp_prompt_tokens_total_counter')
|
||||||
const llamacpp_completion_tokens_total_counter = new Counter('llamacpp_completion_tokens_total_counter')
|
const llamacpp_completion_tokens_total_counter = new Counter('llamacpp_completion_tokens_total_counter')
|
||||||
|
@ -86,35 +88,62 @@ export default function () {
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"model": model,
|
"model": model,
|
||||||
"stream": false,
|
"stream": true,
|
||||||
|
"seed": 42,
|
||||||
"max_tokens": max_tokens
|
"max_tokens": max_tokens
|
||||||
}
|
}
|
||||||
|
|
||||||
const body = JSON.stringify(payload)
|
const params = {method: 'POST', body: JSON.stringify(payload)};
|
||||||
|
|
||||||
let res = http.post(`${server_url}/chat/completions`, body, {
|
const startTime = new Date()
|
||||||
headers: {'Content-Type': 'application/json'},
|
let promptEvalEndTime = null
|
||||||
timeout: '300s'
|
let prompt_tokens = 0
|
||||||
|
let completions_tokens = 0
|
||||||
|
let finish_reason = null
|
||||||
|
const res = sse.open(`${server_url}/chat/completions`, params, function (client) {
|
||||||
|
client.on('event', function (event) {
|
||||||
|
if (promptEvalEndTime == null) {
|
||||||
|
promptEvalEndTime = new Date()
|
||||||
|
}
|
||||||
|
|
||||||
|
let chunk = JSON.parse(event.data)
|
||||||
|
let choice = chunk.choices[0]
|
||||||
|
if (choice.finish_reason) {
|
||||||
|
finish_reason = choice.finish_reason
|
||||||
|
}
|
||||||
|
|
||||||
|
if (chunk.usage) {
|
||||||
|
prompt_tokens = chunk.usage.prompt_tokens
|
||||||
|
llamacpp_prompt_tokens.add(prompt_tokens)
|
||||||
|
llamacpp_prompt_tokens_total_counter.add(prompt_tokens)
|
||||||
|
|
||||||
|
completions_tokens = chunk.usage.completion_tokens
|
||||||
|
llamacpp_completion_tokens.add(completions_tokens)
|
||||||
|
llamacpp_completion_tokens_total_counter.add(completions_tokens)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
client.on('error', function (e) {
|
||||||
|
console.log('An unexpected error occurred: ', e.error());
|
||||||
|
throw e;
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
check(res, {'success completion': (r) => r.status === 200})
|
check(res, {'success completion': (r) => r.status === 200})
|
||||||
|
|
||||||
if (res.status === 200) {
|
const endTime = new Date()
|
||||||
const completions = res.json()
|
|
||||||
|
|
||||||
llamacpp_prompt_tokens.add(completions.usage.prompt_tokens)
|
const promptEvalTime = promptEvalEndTime - startTime
|
||||||
llamacpp_prompt_tokens_total_counter.add(completions.usage.prompt_tokens)
|
if (promptEvalTime > 0) {
|
||||||
|
llamacpp_prompt_processing_second.add(prompt_tokens / (promptEvalEndTime - startTime) * 1.e3)
|
||||||
llamacpp_completion_tokens.add(completions.usage.completion_tokens)
|
|
||||||
llamacpp_completion_tokens_total_counter.add(completions.usage.completion_tokens)
|
|
||||||
|
|
||||||
llamacpp_completions_truncated_rate.add(completions.choices[0].finish_reason === 'length')
|
|
||||||
llamacpp_completions_stop_rate.add(completions.choices[0].finish_reason === 'stop')
|
|
||||||
|
|
||||||
llamacpp_tokens_second.add(completions.usage.total_tokens / res.timings.duration * 1.e3)
|
|
||||||
} else {
|
|
||||||
console.error(`response: ${res.body} request=${payload}`)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const completion_time = endTime - promptEvalEndTime
|
||||||
|
if (completions_tokens > 0 && completion_time > 0) {
|
||||||
|
llamacpp_tokens_second.add(completions_tokens / completion_time * 1.e3)
|
||||||
|
}
|
||||||
|
llamacpp_completions_truncated_rate.add(finish_reason === 'length')
|
||||||
|
llamacpp_completions_stop_rate.add(finish_reason === 'stop')
|
||||||
|
|
||||||
sleep(0.3)
|
sleep(0.3)
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,444 +43,454 @@ unsigned char completion_js[] = {
|
||||||
0x7d, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63,
|
0x7d, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63,
|
||||||
0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x3d, 0x20,
|
0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x3d, 0x20,
|
||||||
0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x72,
|
0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x72,
|
||||||
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x69, 0x66,
|
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x3b, 0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
||||||
0x20, 0x28, 0x21, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65,
|
0x73, 0x74, 0x20, 0x61, 0x70, 0x69, 0x5f, 0x75, 0x72, 0x6c, 0x20, 0x3d,
|
||||||
0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x61, 0x70, 0x69, 0x5f,
|
||||||
0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x6e, 0x65,
|
0x75, 0x72, 0x6c, 0x20, 0x7c, 0x7c, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x0a,
|
||||||
0x77, 0x20, 0x41, 0x62, 0x6f, 0x72, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x72,
|
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x63, 0x6f, 0x6e, 0x74, 0x72,
|
||||||
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d,
|
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||||
0x0a, 0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f,
|
0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20,
|
||||||
0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x50, 0x61, 0x72, 0x61,
|
0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x41, 0x62, 0x6f, 0x72, 0x74, 0x43,
|
||||||
0x6d, 0x73, 0x20, 0x3d, 0x20, 0x7b, 0x20, 0x2e, 0x2e, 0x2e, 0x70, 0x61,
|
0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x28, 0x29, 0x3b,
|
||||||
0x72, 0x61, 0x6d, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x73, 0x2c,
|
0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
|
||||||
0x20, 0x2e, 0x2e, 0x2e, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20,
|
0x74, 0x20, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e,
|
||||||
0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x20, 0x7d, 0x3b, 0x0a, 0x0a, 0x20,
|
0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, 0x7b, 0x20, 0x2e,
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x73, 0x70, 0x6f,
|
0x2e, 0x2e, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x44, 0x65, 0x66, 0x61, 0x75,
|
||||||
0x6e, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20,
|
0x6c, 0x74, 0x73, 0x2c, 0x20, 0x2e, 0x2e, 0x2e, 0x70, 0x61, 0x72, 0x61,
|
||||||
0x66, 0x65, 0x74, 0x63, 0x68, 0x28, 0x22, 0x2f, 0x63, 0x6f, 0x6d, 0x70,
|
0x6d, 0x73, 0x2c, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x20, 0x7d,
|
||||||
0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x2c, 0x20, 0x7b, 0x0a, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x3a, 0x20, 0x27,
|
|
||||||
0x50, 0x4f, 0x53, 0x54, 0x27, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x62,
|
|
||||||
0x6f, 0x64, 0x79, 0x3a, 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x73, 0x74,
|
|
||||||
0x72, 0x69, 0x6e, 0x67, 0x69, 0x66, 0x79, 0x28, 0x63, 0x6f, 0x6d, 0x70,
|
|
||||||
0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x73,
|
|
||||||
0x29, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x68, 0x65, 0x61, 0x64, 0x65,
|
|
||||||
0x72, 0x73, 0x3a, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x27, 0x43, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x27,
|
|
||||||
0x3a, 0x20, 0x27, 0x6b, 0x65, 0x65, 0x70, 0x2d, 0x61, 0x6c, 0x69, 0x76,
|
|
||||||
0x65, 0x27, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x27, 0x43,
|
|
||||||
0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x2d, 0x54, 0x79, 0x70, 0x65, 0x27,
|
|
||||||
0x3a, 0x20, 0x27, 0x61, 0x70, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69,
|
|
||||||
0x6f, 0x6e, 0x2f, 0x6a, 0x73, 0x6f, 0x6e, 0x27, 0x2c, 0x0a, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x27, 0x41, 0x63, 0x63, 0x65, 0x70, 0x74, 0x27,
|
|
||||||
0x3a, 0x20, 0x27, 0x74, 0x65, 0x78, 0x74, 0x2f, 0x65, 0x76, 0x65, 0x6e,
|
|
||||||
0x74, 0x2d, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x27, 0x2c, 0x0a, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x2e, 0x2e, 0x2e, 0x28, 0x70, 0x61, 0x72,
|
|
||||||
0x61, 0x6d, 0x73, 0x2e, 0x61, 0x70, 0x69, 0x5f, 0x6b, 0x65, 0x79, 0x20,
|
|
||||||
0x3f, 0x20, 0x7b, 0x27, 0x41, 0x75, 0x74, 0x68, 0x6f, 0x72, 0x69, 0x7a,
|
|
||||||
0x61, 0x74, 0x69, 0x6f, 0x6e, 0x27, 0x3a, 0x20, 0x60, 0x42, 0x65, 0x61,
|
|
||||||
0x72, 0x65, 0x72, 0x20, 0x24, 0x7b, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73,
|
|
||||||
0x2e, 0x61, 0x70, 0x69, 0x5f, 0x6b, 0x65, 0x79, 0x7d, 0x60, 0x7d, 0x20,
|
|
||||||
0x3a, 0x20, 0x7b, 0x7d, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x2c,
|
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x3a,
|
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e,
|
|
||||||
0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x2c, 0x0a, 0x20, 0x20, 0x7d, 0x29,
|
|
||||||
0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72,
|
0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72,
|
||||||
0x65, 0x61, 0x64, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x72, 0x65, 0x73, 0x70,
|
0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x61, 0x77,
|
||||||
0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x62, 0x6f, 0x64, 0x79, 0x2e, 0x67, 0x65,
|
0x61, 0x69, 0x74, 0x20, 0x66, 0x65, 0x74, 0x63, 0x68, 0x28, 0x60, 0x24,
|
||||||
0x74, 0x52, 0x65, 0x61, 0x64, 0x65, 0x72, 0x28, 0x29, 0x3b, 0x0a, 0x20,
|
0x7b, 0x61, 0x70, 0x69, 0x5f, 0x75, 0x72, 0x6c, 0x7d, 0x2f, 0x63, 0x6f,
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x64, 0x65, 0x63, 0x6f, 0x64,
|
0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x60, 0x2c, 0x20, 0x7b,
|
||||||
0x65, 0x72, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x54, 0x65, 0x78,
|
0x0a, 0x20, 0x20, 0x20, 0x20, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x3a,
|
||||||
0x74, 0x44, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x72, 0x28, 0x29, 0x3b, 0x0a,
|
0x20, 0x27, 0x50, 0x4f, 0x53, 0x54, 0x27, 0x2c, 0x0a, 0x20, 0x20, 0x20,
|
||||||
0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65,
|
0x20, 0x62, 0x6f, 0x64, 0x79, 0x3a, 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e,
|
||||||
0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x6c,
|
0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x69, 0x66, 0x79, 0x28, 0x63, 0x6f,
|
||||||
0x65, 0x74, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20,
|
0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x50, 0x61, 0x72, 0x61,
|
||||||
0x3d, 0x20, 0x22, 0x22, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x42, 0x75, 0x66,
|
0x6d, 0x73, 0x29, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x68, 0x65, 0x61,
|
||||||
0x66, 0x65, 0x72, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x70, 0x61, 0x72, 0x74,
|
0x64, 0x65, 0x72, 0x73, 0x3a, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x69, 0x61, 0x6c, 0x6c, 0x79, 0x20, 0x72, 0x65, 0x61, 0x64, 0x20, 0x6c,
|
0x20, 0x20, 0x27, 0x43, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f,
|
||||||
0x69, 0x6e, 0x65, 0x73, 0x0a, 0x0a, 0x20, 0x20, 0x74, 0x72, 0x79, 0x20,
|
0x6e, 0x27, 0x3a, 0x20, 0x27, 0x6b, 0x65, 0x65, 0x70, 0x2d, 0x61, 0x6c,
|
||||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f,
|
0x69, 0x76, 0x65, 0x27, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x6e, 0x74, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x75, 0x65, 0x3b, 0x0a, 0x0a,
|
0x27, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x2d, 0x54, 0x79, 0x70,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x77, 0x68, 0x69, 0x6c, 0x65, 0x20, 0x28, 0x63,
|
0x65, 0x27, 0x3a, 0x20, 0x27, 0x61, 0x70, 0x70, 0x6c, 0x69, 0x63, 0x61,
|
||||||
0x6f, 0x6e, 0x74, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x6a, 0x73, 0x6f, 0x6e, 0x27, 0x2c, 0x0a,
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x27, 0x41, 0x63, 0x63, 0x65, 0x70,
|
||||||
0x74, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x72, 0x65,
|
0x74, 0x27, 0x3a, 0x20, 0x27, 0x74, 0x65, 0x78, 0x74, 0x2f, 0x65, 0x76,
|
||||||
0x61, 0x64, 0x65, 0x72, 0x2e, 0x72, 0x65, 0x61, 0x64, 0x28, 0x29, 0x3b,
|
0x65, 0x6e, 0x74, 0x2d, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x27, 0x2c,
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72,
|
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2e, 0x2e, 0x2e, 0x28, 0x70,
|
||||||
0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x6f, 0x6e, 0x65, 0x29, 0x20,
|
0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x61, 0x70, 0x69, 0x5f, 0x6b, 0x65,
|
||||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x72,
|
0x79, 0x20, 0x3f, 0x20, 0x7b, 0x27, 0x41, 0x75, 0x74, 0x68, 0x6f, 0x72,
|
||||||
0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x27, 0x3a, 0x20, 0x60, 0x42,
|
||||||
0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x41,
|
0x65, 0x61, 0x72, 0x65, 0x72, 0x20, 0x24, 0x7b, 0x70, 0x61, 0x72, 0x61,
|
||||||
0x64, 0x64, 0x20, 0x61, 0x6e, 0x79, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f,
|
0x6d, 0x73, 0x2e, 0x61, 0x70, 0x69, 0x5f, 0x6b, 0x65, 0x79, 0x7d, 0x60,
|
||||||
0x76, 0x65, 0x72, 0x20, 0x64, 0x61, 0x74, 0x61, 0x20, 0x74, 0x6f, 0x20,
|
0x7d, 0x20, 0x3a, 0x20, 0x7b, 0x7d, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x74, 0x68, 0x65, 0x20, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x20,
|
0x7d, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x73, 0x69, 0x67, 0x6e, 0x61,
|
||||||
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x64, 0x61, 0x74,
|
0x6c, 0x3a, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65,
|
||||||
0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
|
0x72, 0x2e, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x2c, 0x0a, 0x20, 0x20,
|
||||||
0x74, 0x20, 0x74, 0x65, 0x78, 0x74, 0x20, 0x3d, 0x20, 0x6c, 0x65, 0x66,
|
0x7d, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
|
||||||
0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x2b, 0x20, 0x64, 0x65, 0x63, 0x6f,
|
0x20, 0x72, 0x65, 0x61, 0x64, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x72, 0x65,
|
||||||
0x64, 0x65, 0x72, 0x2e, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x28, 0x72,
|
0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x62, 0x6f, 0x64, 0x79, 0x2e,
|
||||||
0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x29,
|
0x67, 0x65, 0x74, 0x52, 0x65, 0x61, 0x64, 0x65, 0x72, 0x28, 0x29, 0x3b,
|
||||||
0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20,
|
0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x64, 0x65, 0x63,
|
||||||
0x43, 0x68, 0x65, 0x63, 0x6b, 0x20, 0x69, 0x66, 0x20, 0x74, 0x68, 0x65,
|
0x6f, 0x64, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x54,
|
||||||
0x20, 0x6c, 0x61, 0x73, 0x74, 0x20, 0x63, 0x68, 0x61, 0x72, 0x61, 0x63,
|
0x65, 0x78, 0x74, 0x44, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x72, 0x28, 0x29,
|
||||||
0x74, 0x65, 0x72, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x6c, 0x69, 0x6e,
|
0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e,
|
||||||
0x65, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20,
|
||||||
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x6e, 0x64, 0x73,
|
0x20, 0x6c, 0x65, 0x74, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65,
|
||||||
0x57, 0x69, 0x74, 0x68, 0x4c, 0x69, 0x6e, 0x65, 0x42, 0x72, 0x65, 0x61,
|
0x72, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x42,
|
||||||
0x6b, 0x20, 0x3d, 0x20, 0x74, 0x65, 0x78, 0x74, 0x2e, 0x65, 0x6e, 0x64,
|
0x75, 0x66, 0x66, 0x65, 0x72, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x70, 0x61,
|
||||||
0x73, 0x57, 0x69, 0x74, 0x68, 0x28, 0x27, 0x5c, 0x6e, 0x27, 0x29, 0x3b,
|
0x72, 0x74, 0x69, 0x61, 0x6c, 0x6c, 0x79, 0x20, 0x72, 0x65, 0x61, 0x64,
|
||||||
0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x53,
|
0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x0a, 0x0a, 0x20, 0x20, 0x74, 0x72,
|
||||||
0x70, 0x6c, 0x69, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x65, 0x78,
|
0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20,
|
||||||
0x74, 0x20, 0x69, 0x6e, 0x74, 0x6f, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73,
|
0x63, 0x6f, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x75, 0x65, 0x3b,
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x6c,
|
0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x77, 0x68, 0x69, 0x6c, 0x65, 0x20,
|
||||||
0x69, 0x6e, 0x65, 0x73, 0x20, 0x3d, 0x20, 0x74, 0x65, 0x78, 0x74, 0x2e,
|
0x28, 0x63, 0x6f, 0x6e, 0x74, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||||
0x73, 0x70, 0x6c, 0x69, 0x74, 0x28, 0x27, 0x5c, 0x6e, 0x27, 0x29, 0x3b,
|
0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x73,
|
||||||
0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x49,
|
0x75, 0x6c, 0x74, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20,
|
||||||
0x66, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x65, 0x78, 0x74, 0x20, 0x64,
|
0x72, 0x65, 0x61, 0x64, 0x65, 0x72, 0x2e, 0x72, 0x65, 0x61, 0x64, 0x28,
|
||||||
0x6f, 0x65, 0x73, 0x6e, 0x27, 0x74, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77,
|
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
|
||||||
0x69, 0x74, 0x68, 0x20, 0x61, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x62,
|
0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x6f, 0x6e, 0x65,
|
||||||
0x72, 0x65, 0x61, 0x6b, 0x2c, 0x20, 0x74, 0x68, 0x65, 0x6e, 0x20, 0x74,
|
0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x68, 0x65, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x20, 0x6c, 0x69, 0x6e, 0x65,
|
0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x20, 0x69, 0x73, 0x20, 0x69, 0x6e, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65,
|
0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f,
|
||||||
0x74, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20,
|
0x20, 0x41, 0x64, 0x64, 0x20, 0x61, 0x6e, 0x79, 0x20, 0x6c, 0x65, 0x66,
|
||||||
0x53, 0x74, 0x6f, 0x72, 0x65, 0x20, 0x69, 0x74, 0x20, 0x69, 0x6e, 0x20,
|
0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x64, 0x61, 0x74, 0x61, 0x20, 0x74,
|
||||||
0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, 0x6f, 0x20,
|
0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e,
|
||||||
0x62, 0x65, 0x20, 0x61, 0x64, 0x64, 0x65, 0x64, 0x20, 0x74, 0x6f, 0x20,
|
0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x64,
|
||||||
0x74, 0x68, 0x65, 0x20, 0x6e, 0x65, 0x78, 0x74, 0x20, 0x63, 0x68, 0x75,
|
0x61, 0x74, 0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
|
||||||
0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x64, 0x61, 0x74, 0x61, 0x0a, 0x20,
|
0x6e, 0x73, 0x74, 0x20, 0x74, 0x65, 0x78, 0x74, 0x20, 0x3d, 0x20, 0x6c,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x65, 0x6e,
|
0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x2b, 0x20, 0x64, 0x65,
|
||||||
|
0x63, 0x6f, 0x64, 0x65, 0x72, 0x2e, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65,
|
||||||
|
0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x76, 0x61, 0x6c, 0x75,
|
||||||
|
0x65, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f,
|
||||||
|
0x2f, 0x20, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x20, 0x69, 0x66, 0x20, 0x74,
|
||||||
|
0x68, 0x65, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x20, 0x63, 0x68, 0x61, 0x72,
|
||||||
|
0x61, 0x63, 0x74, 0x65, 0x72, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x6c,
|
||||||
|
0x69, 0x6e, 0x65, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x0a, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x6e,
|
||||||
0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x4c, 0x69, 0x6e, 0x65, 0x42, 0x72,
|
0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x4c, 0x69, 0x6e, 0x65, 0x42, 0x72,
|
||||||
0x65, 0x61, 0x6b, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x65, 0x61, 0x6b, 0x20, 0x3d, 0x20, 0x74, 0x65, 0x78, 0x74, 0x2e, 0x65,
|
||||||
0x20, 0x20, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20,
|
0x6e, 0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x28, 0x27, 0x5c, 0x6e, 0x27,
|
||||||
0x3d, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x2e, 0x70, 0x6f, 0x70, 0x28,
|
0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f,
|
||||||
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x65,
|
0x20, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74,
|
||||||
0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x65, 0x78, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x6f, 0x20, 0x6c, 0x69, 0x6e,
|
||||||
0x20, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x3d,
|
0x65, 0x73, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74,
|
||||||
0x20, 0x22, 0x22, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x52, 0x65, 0x73, 0x65,
|
0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x20, 0x3d, 0x20, 0x74, 0x65, 0x78,
|
||||||
0x74, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x69,
|
0x74, 0x2e, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x28, 0x27, 0x5c, 0x6e, 0x27,
|
||||||
0x66, 0x20, 0x77, 0x65, 0x20, 0x68, 0x61, 0x76, 0x65, 0x20, 0x61, 0x20,
|
0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f,
|
||||||
0x6c, 0x69, 0x6e, 0x65, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x20, 0x61,
|
0x20, 0x49, 0x66, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x65, 0x78, 0x74,
|
||||||
0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x65, 0x6e, 0x64, 0x0a, 0x20, 0x20,
|
0x20, 0x64, 0x6f, 0x65, 0x73, 0x6e, 0x27, 0x74, 0x20, 0x65, 0x6e, 0x64,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x61, 0x20, 0x6c, 0x69, 0x6e, 0x65,
|
||||||
0x20, 0x2f, 0x2f, 0x20, 0x50, 0x61, 0x72, 0x73, 0x65, 0x20, 0x61, 0x6c,
|
0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x2c, 0x20, 0x74, 0x68, 0x65, 0x6e,
|
||||||
0x6c, 0x20, 0x73, 0x73, 0x65, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73,
|
0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x20, 0x6c, 0x69,
|
||||||
0x20, 0x61, 0x6e, 0x64, 0x20, 0x61, 0x64, 0x64, 0x20, 0x74, 0x68, 0x65,
|
0x6e, 0x65, 0x20, 0x69, 0x73, 0x20, 0x69, 0x6e, 0x63, 0x6f, 0x6d, 0x70,
|
||||||
0x6d, 0x20, 0x74, 0x6f, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x0a,
|
0x6c, 0x65, 0x74, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
|
0x2f, 0x20, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x20, 0x69, 0x74, 0x20, 0x69,
|
||||||
0x72, 0x65, 0x67, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x2f, 0x5e, 0x28, 0x5c,
|
0x6e, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74,
|
||||||
0x53, 0x2b, 0x29, 0x3a, 0x5c, 0x73, 0x28, 0x2e, 0x2a, 0x29, 0x24, 0x2f,
|
0x6f, 0x20, 0x62, 0x65, 0x20, 0x61, 0x64, 0x64, 0x65, 0x64, 0x20, 0x74,
|
||||||
0x67, 0x6d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f,
|
0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6e, 0x65, 0x78, 0x74, 0x20, 0x63,
|
||||||
0x72, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x69, 0x6e,
|
0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x64, 0x61, 0x74, 0x61,
|
||||||
0x65, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x29, 0x20,
|
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21,
|
||||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
|
0x65, 0x6e, 0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x4c, 0x69, 0x6e, 0x65,
|
||||||
0x6e, 0x73, 0x74, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x3d, 0x20,
|
0x42, 0x72, 0x65, 0x61, 0x6b, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||||
0x72, 0x65, 0x67, 0x65, 0x78, 0x2e, 0x65, 0x78, 0x65, 0x63, 0x28, 0x6c,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65,
|
||||||
0x69, 0x6e, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x72, 0x20, 0x3d, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x2e, 0x70, 0x6f,
|
||||||
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x29,
|
0x70, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
||||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x5b, 0x6d, 0x61, 0x74, 0x63,
|
0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72,
|
||||||
0x68, 0x5b, 0x31, 0x5d, 0x5d, 0x20, 0x3d, 0x20, 0x6d, 0x61, 0x74, 0x63,
|
0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x52, 0x65,
|
||||||
0x68, 0x5b, 0x32, 0x5d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x73, 0x65, 0x74, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72,
|
||||||
0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x73, 0x69, 0x6e, 0x63, 0x65, 0x20,
|
0x20, 0x69, 0x66, 0x20, 0x77, 0x65, 0x20, 0x68, 0x61, 0x76, 0x65, 0x20,
|
||||||
0x77, 0x65, 0x20, 0x6b, 0x6e, 0x6f, 0x77, 0x20, 0x74, 0x68, 0x69, 0x73,
|
0x61, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b,
|
||||||
0x20, 0x69, 0x73, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70,
|
0x20, 0x61, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x65, 0x6e, 0x64, 0x0a,
|
||||||
0x70, 0x2c, 0x20, 0x6c, 0x65, 0x74, 0x27, 0x73, 0x20, 0x6a, 0x75, 0x73,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20,
|
||||||
0x74, 0x20, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x20, 0x74, 0x68, 0x65,
|
0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x50, 0x61, 0x72, 0x73, 0x65, 0x20,
|
||||||
0x20, 0x6a, 0x73, 0x6f, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x64, 0x61, 0x74,
|
0x61, 0x6c, 0x6c, 0x20, 0x73, 0x73, 0x65, 0x20, 0x65, 0x76, 0x65, 0x6e,
|
||||||
0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x74, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x61, 0x64, 0x64, 0x20, 0x74,
|
||||||
0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64,
|
0x68, 0x65, 0x6d, 0x20, 0x74, 0x6f, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c,
|
||||||
0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x74, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c,
|
0x74, 0x20, 0x72, 0x65, 0x67, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x2f, 0x5e,
|
||||||
0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x3d, 0x20, 0x4a, 0x53, 0x4f,
|
0x28, 0x5c, 0x53, 0x2b, 0x29, 0x3a, 0x5c, 0x73, 0x28, 0x2e, 0x2a, 0x29,
|
||||||
0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, 0x72, 0x65, 0x73, 0x75,
|
0x24, 0x2f, 0x67, 0x6d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x3b, 0x0a, 0x20, 0x20,
|
0x66, 0x6f, 0x72, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
|
0x69, 0x6e, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73,
|
||||||
0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x72, 0x65, 0x73,
|
0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e,
|
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20,
|
||||||
0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x3d, 0x20, 0x72, 0x65, 0x67, 0x65, 0x78, 0x2e, 0x65, 0x78, 0x65, 0x63,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x79, 0x69,
|
0x28, 0x6c, 0x69, 0x6e, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x65, 0x6c, 0x64, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x6d, 0x61, 0x74, 0x63,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x79, 0x69, 0x65, 0x6c, 0x64, 0x20, 0x72, 0x65,
|
0x68, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x73, 0x75, 0x6c, 0x74, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x5b, 0x6d, 0x61,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x69, 0x66,
|
0x74, 0x63, 0x68, 0x5b, 0x31, 0x5d, 0x5d, 0x20, 0x3d, 0x20, 0x6d, 0x61,
|
||||||
0x20, 0x77, 0x65, 0x20, 0x67, 0x6f, 0x74, 0x20, 0x61, 0x20, 0x73, 0x74,
|
0x74, 0x63, 0x68, 0x5b, 0x32, 0x5d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x6f, 0x70, 0x20, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x20, 0x66, 0x72, 0x6f,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x73, 0x69, 0x6e, 0x63,
|
||||||
0x6d, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2c, 0x20, 0x77, 0x65,
|
0x65, 0x20, 0x77, 0x65, 0x20, 0x6b, 0x6e, 0x6f, 0x77, 0x20, 0x74, 0x68,
|
||||||
0x20, 0x77, 0x69, 0x6c, 0x6c, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x20,
|
0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e,
|
||||||
0x68, 0x65, 0x72, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x63, 0x70, 0x70, 0x2c, 0x20, 0x6c, 0x65, 0x74, 0x27, 0x73, 0x20, 0x6a,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73,
|
0x75, 0x73, 0x74, 0x20, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x20, 0x74,
|
||||||
0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x73, 0x74, 0x6f,
|
0x68, 0x65, 0x20, 0x6a, 0x73, 0x6f, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x64,
|
||||||
0x70, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x61, 0x74, 0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74,
|
||||||
|
0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73,
|
||||||
|
0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x3d, 0x20, 0x4a,
|
||||||
|
0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, 0x72, 0x65,
|
||||||
|
0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x3b, 0x0a,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x72,
|
||||||
|
0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63,
|
||||||
|
0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20,
|
||||||
|
0x79, 0x69, 0x65, 0x6c, 0x64, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x79, 0x69, 0x65, 0x6c, 0x64, 0x20,
|
||||||
|
0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20,
|
||||||
|
0x69, 0x66, 0x20, 0x77, 0x65, 0x20, 0x67, 0x6f, 0x74, 0x20, 0x61, 0x20,
|
||||||
|
0x73, 0x74, 0x6f, 0x70, 0x20, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x20, 0x66,
|
||||||
|
0x72, 0x6f, 0x6d, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2c, 0x20,
|
||||||
|
0x77, 0x65, 0x20, 0x77, 0x69, 0x6c, 0x6c, 0x20, 0x62, 0x72, 0x65, 0x61,
|
||||||
|
0x6b, 0x20, 0x68, 0x65, 0x72, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72,
|
||||||
0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67,
|
0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x73,
|
||||||
|
0x74, 0x6f, 0x70, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
|
||||||
|
0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61,
|
||||||
|
0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
|
||||||
|
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69,
|
||||||
|
0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20,
|
||||||
|
0x3d, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74,
|
||||||
|
0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e,
|
||||||
|
0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x20, 0x3d, 0x20,
|
||||||
|
0x66, 0x61, 0x6c, 0x73, 0x65, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x72, 0x65,
|
||||||
|
0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75,
|
||||||
|
0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75,
|
||||||
|
0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x4a,
|
||||||
|
0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, 0x72, 0x65,
|
||||||
|
0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b,
|
||||||
|
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c,
|
||||||
|
0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x6d, 0x65, 0x73, 0x73,
|
||||||
|
0x61, 0x67, 0x65, 0x2e, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73,
|
||||||
|
0x28, 0x27, 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61,
|
||||||
|
0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x29, 0x20, 0x7b, 0x0a,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x54, 0x68, 0x72, 0x6f, 0x77,
|
||||||
|
0x20, 0x61, 0x6e, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x74, 0x6f,
|
||||||
|
0x20, 0x62, 0x65, 0x20, 0x63, 0x61, 0x75, 0x67, 0x68, 0x74, 0x20, 0x62,
|
||||||
|
0x79, 0x20, 0x75, 0x70, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x20, 0x63,
|
||||||
|
0x61, 0x6c, 0x6c, 0x65, 0x72, 0x73, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74,
|
||||||
|
0x68, 0x72, 0x6f, 0x77, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45, 0x72, 0x72,
|
||||||
|
0x6f, 0x72, 0x28, 0x27, 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61,
|
||||||
|
0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x3b, 0x0a,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x7d, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x65,
|
||||||
|
0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e,
|
||||||
|
0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x5b, 0x24,
|
||||||
|
0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f,
|
||||||
|
0x72, 0x2e, 0x63, 0x6f, 0x64, 0x65, 0x7d, 0x20, 0x2d, 0x20, 0x24, 0x7b,
|
||||||
|
0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72,
|
||||||
|
0x2e, 0x74, 0x79, 0x70, 0x65, 0x7d, 0x5d, 0x3a, 0x20, 0x24, 0x7b, 0x72,
|
||||||
|
0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e,
|
||||||
|
0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x7d, 0x60, 0x29, 0x3b, 0x0a,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x28,
|
||||||
|
0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f,
|
||||||
|
0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c,
|
||||||
|
0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f,
|
||||||
|
0x72, 0x20, 0x24, 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65,
|
||||||
|
0x72, 0x72, 0x6f, 0x72, 0x7d, 0x60, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
||||||
|
0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20,
|
||||||
|
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e,
|
||||||
|
0x6e, 0x61, 0x6d, 0x65, 0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62,
|
||||||
|
0x6f, 0x72, 0x74, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b,
|
||||||
|
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f,
|
||||||
|
0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c,
|
||||||
|
0x61, 0x6d, 0x61, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22,
|
||||||
|
0x2c, 0x20, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b,
|
||||||
|
0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c,
|
||||||
|
0x6c, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
||||||
|
0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72,
|
||||||
|
0x74, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20,
|
||||||
|
0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65,
|
||||||
|
0x6e, 0x74, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61,
|
||||||
|
0x6c, 0x6c, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65,
|
||||||
|
0x74, 0x75, 0x72, 0x6e, 0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e,
|
||||||
|
0x74, 0x20, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61,
|
||||||
|
0x74, 0x20, 0x79, 0x6f, 0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75,
|
||||||
|
0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f,
|
||||||
|
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65,
|
||||||
|
0x3a, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69,
|
||||||
|
0x6d, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
|
||||||
|
0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74,
|
||||||
|
0x20, 0x7d, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f,
|
||||||
|
0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27,
|
||||||
|
0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
|
||||||
|
0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c,
|
||||||
|
0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72,
|
||||||
|
0x67, 0x65, 0x74, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a,
|
||||||
|
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61,
|
||||||
|
0x64, 0x64, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65,
|
||||||
|
0x6e, 0x65, 0x72, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65,
|
||||||
|
0x22, 0x2c, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d,
|
||||||
|
0x3e, 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69,
|
||||||
|
0x74, 0x65, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74,
|
||||||
|
0x61, 0x69, 0x6c, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29,
|
||||||
|
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f,
|
||||||
|
0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73,
|
||||||
|
0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74,
|
||||||
|
0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72,
|
||||||
|
0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73,
|
||||||
|
0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69,
|
||||||
|
0x67, 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b,
|
||||||
|
0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65,
|
||||||
|
0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e,
|
||||||
|
0x65, 0x77, 0x20, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67,
|
||||||
|
0x65, 0x74, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79,
|
||||||
|
0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65,
|
||||||
|
0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28,
|
||||||
|
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20,
|
||||||
|
0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f,
|
||||||
|
0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c,
|
||||||
|
0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68,
|
||||||
|
0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74,
|
||||||
|
0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b,
|
||||||
|
0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
|
||||||
|
0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65,
|
||||||
|
0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64,
|
||||||
|
0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74,
|
||||||
|
0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45,
|
||||||
|
0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67,
|
||||||
|
0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c,
|
||||||
|
0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61,
|
||||||
|
0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28,
|
||||||
|
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67,
|
||||||
0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65,
|
0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65,
|
||||||
0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54,
|
||||||
0x20, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e,
|
0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74,
|
||||||
0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20,
|
0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20,
|
||||||
0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e,
|
0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28,
|
||||||
0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73,
|
0x22, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
|
||||||
0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75,
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65,
|
||||||
0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x66, 0x61,
|
0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69,
|
||||||
0x6c, 0x73, 0x65, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x6e, 0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b,
|
0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69,
|
||||||
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74,
|
||||||
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b,
|
||||||
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74,
|
|
||||||
0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x72,
|
|
||||||
0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74,
|
|
||||||
0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x4a, 0x53, 0x4f,
|
|
||||||
0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, 0x72, 0x65, 0x73, 0x75,
|
|
||||||
0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e,
|
|
||||||
0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67,
|
|
||||||
0x65, 0x2e, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73, 0x28, 0x27,
|
|
||||||
0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61, 0x69, 0x6c,
|
|
||||||
0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x2f, 0x2f, 0x20, 0x54, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x61,
|
|
||||||
0x6e, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, 0x62,
|
|
||||||
0x65, 0x20, 0x63, 0x61, 0x75, 0x67, 0x68, 0x74, 0x20, 0x62, 0x79, 0x20,
|
|
||||||
0x75, 0x70, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6c,
|
|
||||||
0x6c, 0x65, 0x72, 0x73, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x72,
|
|
||||||
0x6f, 0x77, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45, 0x72, 0x72, 0x6f, 0x72,
|
|
||||||
0x28, 0x27, 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61,
|
|
||||||
0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x3b, 0x0a, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x7d, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72,
|
|
||||||
0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70,
|
|
||||||
0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x5b, 0x24, 0x7b, 0x72,
|
|
||||||
0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e,
|
|
||||||
0x63, 0x6f, 0x64, 0x65, 0x7d, 0x20, 0x2d, 0x20, 0x24, 0x7b, 0x72, 0x65,
|
|
||||||
0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x74,
|
|
||||||
0x79, 0x70, 0x65, 0x7d, 0x5d, 0x3a, 0x20, 0x24, 0x7b, 0x72, 0x65, 0x73,
|
|
||||||
0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x6d, 0x65,
|
|
||||||
0x73, 0x73, 0x61, 0x67, 0x65, 0x7d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x28, 0x65, 0x29,
|
|
||||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65,
|
|
||||||
0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, 0x61, 0x6d,
|
|
||||||
0x61, 0x2e, 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20,
|
|
||||||
0x24, 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72,
|
|
||||||
0x6f, 0x72, 0x7d, 0x60, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20,
|
|
||||||
0x63, 0x61, 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61,
|
|
||||||
0x6d, 0x65, 0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72,
|
|
||||||
0x74, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65,
|
|
||||||
0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d,
|
|
||||||
0x61, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20,
|
|
||||||
0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20,
|
|
||||||
0x20, 0x7d, 0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79,
|
|
||||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72,
|
|
||||||
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28,
|
|
||||||
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65,
|
|
||||||
0x74, 0x75, 0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
|
||||||
0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c,
|
|
||||||
0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75,
|
|
||||||
0x72, 0x6e, 0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20,
|
|
||||||
0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20,
|
|
||||||
0x79, 0x6f, 0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75, 0x62, 0x73,
|
|
||||||
0x63, 0x72, 0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a,
|
|
||||||
0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a,
|
|
||||||
0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70,
|
|
||||||
0x6f, 0x72, 0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45,
|
|
||||||
0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d,
|
|
||||||
0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70,
|
|
||||||
0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f,
|
|
||||||
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
|
|
||||||
0x74, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61,
|
|
||||||
0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65,
|
|
||||||
0x74, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64,
|
|
||||||
0x45, 0x76, 0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65,
|
|
||||||
0x72, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c,
|
|
||||||
0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20,
|
|
||||||
0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f,
|
|
||||||
0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65,
|
|
||||||
0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69,
|
|
||||||
0x6c, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f,
|
|
||||||
0x2f, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65,
|
|
||||||
0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
|
|
||||||
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61,
|
|
||||||
0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d,
|
|
||||||
0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d,
|
|
||||||
0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20,
|
|
||||||
0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
|
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74,
|
|
||||||
0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77,
|
|
||||||
0x20, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74,
|
|
||||||
0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63,
|
|
||||||
0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
|
||||||
0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66,
|
|
||||||
0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f,
|
|
||||||
0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66,
|
|
||||||
0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70,
|
|
||||||
0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63,
|
|
||||||
0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e,
|
|
||||||
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
|
|
||||||
0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64,
|
|
||||||
0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b,
|
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65,
|
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65,
|
||||||
0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73,
|
0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73,
|
||||||
0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e,
|
0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e,
|
||||||
0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65,
|
0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65,
|
||||||
0x6e, 0x74, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22,
|
0x6e, 0x74, 0x28, 0x22, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22,
|
||||||
0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20,
|
0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20,
|
||||||
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d,
|
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74,
|
||||||
0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
|
0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e,
|
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54,
|
||||||
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74,
|
0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74,
|
||||||
0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72,
|
0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28,
|
||||||
0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68,
|
0x22, 0x64, 0x6f, 0x6e, 0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65,
|
||||||
0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75,
|
0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74,
|
||||||
0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67,
|
0x65, 0x6e, 0x74, 0x20, 0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20,
|
||||||
0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65,
|
0x20, 0x7d, 0x29, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74,
|
||||||
0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64,
|
0x75, 0x72, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72,
|
||||||
0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b,
|
0x67, 0x65, 0x74, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43,
|
||||||
0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61,
|
0x61, 0x6c, 0x6c, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72,
|
||||||
0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67,
|
0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d,
|
||||||
0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x69, 0x73, 0x65, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73,
|
||||||
0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
|
0x6f, 0x6c, 0x76, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65,
|
||||||
0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e,
|
0x20, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74,
|
||||||
0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
0x65, 0x78, 0x74, 0x2e, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74,
|
0x65, 0x73, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f,
|
||||||
0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61,
|
0x72, 0x74, 0x20, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67,
|
||||||
0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77,
|
0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70,
|
||||||
0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74,
|
0x6c, 0x65, 0x3a, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20,
|
||||||
0x28, 0x22, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20,
|
0x20, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69,
|
||||||
0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68,
|
0x73, 0x65, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74,
|
||||||
0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d,
|
0x68, 0x65, 0x6e, 0x28, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
||||||
0x69, 0x6e, 0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20,
|
0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
|
0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72,
|
0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65,
|
||||||
0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68,
|
0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
||||||
0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75,
|
0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64,
|
0x6f, 0x72, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x6f, 0x6e, 0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61,
|
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65,
|
||||||
0x69, 0x6c, 0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
|
0x6e, 0x74, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c,
|
||||||
0x74, 0x20, 0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d,
|
0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28,
|
||||||
0x29, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72,
|
0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20,
|
||||||
0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65,
|
0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e,
|
||||||
0x74, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c,
|
0x77, 0x72, 0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
|
||||||
0x6c, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74,
|
0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74,
|
||||||
0x75, 0x72, 0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73,
|
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
|
||||||
0x65, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c,
|
0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70,
|
||||||
0x76, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63,
|
0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d,
|
||||||
0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74, 0x65, 0x78,
|
0x73, 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66,
|
||||||
0x74, 0x2e, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73,
|
0x69, 0x67, 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20,
|
||||||
0x20, 0x6e, 0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74,
|
0x7b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e,
|
||||||
0x20, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f,
|
0x65, 0x77, 0x20, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61,
|
||||||
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65,
|
0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76,
|
||||||
0x3a, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x65, 0x2c, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d,
|
||||||
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65,
|
0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20,
|
||||||
0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65,
|
0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22,
|
||||||
0x6e, 0x28, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20,
|
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a,
|
||||||
0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77,
|
||||||
0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77,
|
0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63,
|
||||||
0x72, 0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
|
||||||
0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a,
|
0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61,
|
||||||
0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72,
|
0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67,
|
||||||
0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63,
|
0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20,
|
||||||
0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61,
|
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63,
|
||||||
0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72,
|
0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20,
|
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65,
|
||||||
0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72,
|
0x73, 0x6f, 0x6c, 0x76, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
|
||||||
0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29,
|
0x74, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61,
|
||||||
0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63,
|
0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20,
|
||||||
0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72,
|
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65,
|
||||||
0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f,
|
0x63, 0x74, 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20,
|
||||||
0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20,
|
0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d,
|
||||||
0x3d, 0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67,
|
0x3b, 0x0a, 0x0a, 0x2f, 0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64,
|
||||||
0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
|
0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20,
|
||||||
0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77,
|
0x2a, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f,
|
||||||
0x20, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61, 0x73, 0x79,
|
0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d,
|
||||||
0x6e, 0x63, 0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c,
|
0x70, 0x6c, 0x65, 0x74, 0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e,
|
||||||
0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20,
|
0x63, 0x20, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63,
|
||||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f,
|
0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63,
|
||||||
0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a,
|
0x61, 0x6c, 0x6c, 0x62, 0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
0x7b, 0x0a, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69,
|
|
||||||
0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75,
|
0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75,
|
||||||
0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28,
|
0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28,
|
||||||
0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61,
|
0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70,
|
||||||
0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29,
|
0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b,
|
||||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63,
|
0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20,
|
||||||
0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68,
|
0x7d, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61,
|
||||||
0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e,
|
0x6c, 0x6c, 0x62, 0x61, 0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b,
|
||||||
0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f,
|
||||||
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f,
|
0x20, 0x47, 0x65, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64,
|
||||||
0x6c, 0x76, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29,
|
0x65, 0x6c, 0x20, 0x69, 0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d,
|
||||||
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63,
|
0x20, 0x74, 0x68, 0x65, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e,
|
||||||
0x68, 0x20, 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a,
|
0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74,
|
0x66, 0x75, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74,
|
||||||
0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
0x69, 0x6e, 0x67, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74,
|
||||||
0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a,
|
0x65, 0x78, 0x74, 0x20, 0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61,
|
||||||
0x0a, 0x2f, 0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70,
|
0x6e, 0x64, 0x20, 0x73, 0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78,
|
||||||
0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f,
|
0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c,
|
||||||
0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73,
|
0x6c, 0x61, 0x6d, 0x61, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66,
|
||||||
0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c,
|
0x6f, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x63,
|
||||||
0x65, 0x74, 0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20,
|
0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20,
|
||||||
0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e,
|
0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21,
|
||||||
0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c,
|
0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73,
|
||||||
0x6c, 0x62, 0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
|
0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||||
0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20,
|
0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x61, 0x70, 0x69,
|
||||||
0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b,
|
0x5f, 0x75, 0x72, 0x6c, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69,
|
||||||
0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61,
|
0x67, 0x2e, 0x61, 0x70, 0x69, 0x5f, 0x75, 0x72, 0x6c, 0x20, 0x7c, 0x7c,
|
||||||
0x72, 0x61, 0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c,
|
0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
||||||
0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63,
|
0x73, 0x74, 0x20, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x20, 0x3d, 0x20, 0x61,
|
||||||
0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29,
|
0x77, 0x61, 0x69, 0x74, 0x20, 0x66, 0x65, 0x74, 0x63, 0x68, 0x28, 0x60,
|
||||||
0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c,
|
0x24, 0x7b, 0x61, 0x70, 0x69, 0x5f, 0x75, 0x72, 0x6c, 0x7d, 0x2f, 0x70,
|
||||||
0x62, 0x61, 0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b,
|
0x72, 0x6f, 0x70, 0x73, 0x60, 0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28,
|
||||||
0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47,
|
0x72, 0x20, 0x3d, 0x3e, 0x20, 0x72, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x28,
|
||||||
0x65, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c,
|
0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x67, 0x65, 0x6e, 0x65,
|
||||||
0x20, 0x69, 0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74,
|
0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69,
|
||||||
0x68, 0x65, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54,
|
0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x2e,
|
||||||
0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75,
|
0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x67, 0x65, 0x6e, 0x65,
|
||||||
0x6c, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e,
|
0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69,
|
||||||
0x67, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78,
|
0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x72,
|
||||||
0x74, 0x20, 0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64,
|
0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61,
|
||||||
0x20, 0x73, 0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f,
|
0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67,
|
||||||
0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61,
|
0x73, 0x3b, 0x0a, 0x7d, 0x0a
|
||||||
0x6d, 0x61, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20,
|
|
||||||
0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d,
|
|
||||||
0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67,
|
|
||||||
0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65,
|
|
||||||
0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x70, 0x72, 0x6f, 0x70,
|
|
||||||
0x73, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x66, 0x65,
|
|
||||||
0x74, 0x63, 0x68, 0x28, 0x22, 0x2f, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x22,
|
|
||||||
0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20,
|
|
||||||
0x72, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f,
|
|
||||||
0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d,
|
|
||||||
0x20, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x2e, 0x64, 0x65, 0x66, 0x61, 0x75,
|
|
||||||
0x6c, 0x74, 0x5f, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f,
|
|
||||||
0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a,
|
|
||||||
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e,
|
|
||||||
0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
|
|
||||||
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a
|
|
||||||
};
|
};
|
||||||
size_t completion_js_len = 5796;
|
unsigned int completion_js_len = 5909;
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1928,4 +1928,4 @@ unsigned char index_js[] = {
|
||||||
0x45, 0x66, 0x66, 0x65, 0x63, 0x74, 0x2c, 0x50, 0x74, 0x20, 0x61, 0x73,
|
0x45, 0x66, 0x66, 0x65, 0x63, 0x74, 0x2c, 0x50, 0x74, 0x20, 0x61, 0x73,
|
||||||
0x20, 0x75, 0x73, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x7d, 0x3b, 0x0a
|
0x20, 0x75, 0x73, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x7d, 0x3b, 0x0a
|
||||||
};
|
};
|
||||||
size_t index_js_len = 23136;
|
unsigned int index_js_len = 23136;
|
||||||
|
|
|
@ -21,6 +21,7 @@ let generation_settings = null;
|
||||||
//
|
//
|
||||||
export async function* llama(prompt, params = {}, config = {}) {
|
export async function* llama(prompt, params = {}, config = {}) {
|
||||||
let controller = config.controller;
|
let controller = config.controller;
|
||||||
|
const api_url = config.api_url || "";
|
||||||
|
|
||||||
if (!controller) {
|
if (!controller) {
|
||||||
controller = new AbortController();
|
controller = new AbortController();
|
||||||
|
@ -28,7 +29,7 @@ export async function* llama(prompt, params = {}, config = {}) {
|
||||||
|
|
||||||
const completionParams = { ...paramDefaults, ...params, prompt };
|
const completionParams = { ...paramDefaults, ...params, prompt };
|
||||||
|
|
||||||
const response = await fetch("/completion", {
|
const response = await fetch(`${api_url}/completion`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
body: JSON.stringify(completionParams),
|
body: JSON.stringify(completionParams),
|
||||||
headers: {
|
headers: {
|
||||||
|
@ -193,9 +194,10 @@ export const llamaComplete = async (params, controller, callback) => {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the model info from the server. This is useful for getting the context window and so on.
|
// Get the model info from the server. This is useful for getting the context window and so on.
|
||||||
export const llamaModelInfo = async () => {
|
export const llamaModelInfo = async (config = {}) => {
|
||||||
if (!generation_settings) {
|
if (!generation_settings) {
|
||||||
const props = await fetch("/props").then(r => r.json());
|
const api_url = config.api_url || "";
|
||||||
|
const props = await fetch(`${api_url}/props`).then(r => r.json());
|
||||||
generation_settings = props.default_generation_settings;
|
generation_settings = props.default_generation_settings;
|
||||||
}
|
}
|
||||||
return generation_settings;
|
return generation_settings;
|
||||||
|
|
|
@ -199,10 +199,10 @@
|
||||||
<script type="module">
|
<script type="module">
|
||||||
import {
|
import {
|
||||||
html, h, signal, effect, computed, render, useSignal, useEffect, useRef, Component
|
html, h, signal, effect, computed, render, useSignal, useEffect, useRef, Component
|
||||||
} from '/index.js';
|
} from './index.js';
|
||||||
|
|
||||||
import { llama } from '/completion.js';
|
import { llama } from './completion.js';
|
||||||
import { SchemaConverter } from '/json-schema-to-grammar.mjs';
|
import { SchemaConverter } from './json-schema-to-grammar.mjs';
|
||||||
let selected_image = false;
|
let selected_image = false;
|
||||||
var slot_id = -1;
|
var slot_id = -1;
|
||||||
|
|
||||||
|
@ -222,6 +222,7 @@
|
||||||
temperature: 0.7,
|
temperature: 0.7,
|
||||||
repeat_last_n: 256, // 0 = disable penalty, -1 = context size
|
repeat_last_n: 256, // 0 = disable penalty, -1 = context size
|
||||||
repeat_penalty: 1.18, // 1.0 = disabled
|
repeat_penalty: 1.18, // 1.0 = disabled
|
||||||
|
penalize_nl: false,
|
||||||
top_k: 40, // <= 0 to use vocab size
|
top_k: 40, // <= 0 to use vocab size
|
||||||
top_p: 0.95, // 1.0 = disabled
|
top_p: 0.95, // 1.0 = disabled
|
||||||
min_p: 0.05, // 0 = disabled
|
min_p: 0.05, // 0 = disabled
|
||||||
|
@ -405,7 +406,7 @@
|
||||||
throw new Error("already running");
|
throw new Error("already running");
|
||||||
}
|
}
|
||||||
controller.value = new AbortController();
|
controller.value = new AbortController();
|
||||||
for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) {
|
for await (const chunk of llama(prompt, llamaParams, { controller: controller.value, api_url: document.baseURI.replace(/\/+$/, '') })) {
|
||||||
const data = chunk.data;
|
const data = chunk.data;
|
||||||
|
|
||||||
if (data.stop) {
|
if (data.stop) {
|
||||||
|
@ -627,6 +628,7 @@
|
||||||
const updateParams = (el) => params.value = { ...params.value, [el.target.name]: el.target.value }
|
const updateParams = (el) => params.value = { ...params.value, [el.target.name]: el.target.value }
|
||||||
const updateParamsFloat = (el) => params.value = { ...params.value, [el.target.name]: parseFloat(el.target.value) }
|
const updateParamsFloat = (el) => params.value = { ...params.value, [el.target.name]: parseFloat(el.target.value) }
|
||||||
const updateParamsInt = (el) => params.value = { ...params.value, [el.target.name]: Math.floor(parseFloat(el.target.value)) }
|
const updateParamsInt = (el) => params.value = { ...params.value, [el.target.name]: Math.floor(parseFloat(el.target.value)) }
|
||||||
|
const updateParamsBool = (el) => params.value = { ...params.value, [el.target.name]: el.target.checked }
|
||||||
|
|
||||||
const grammarJsonSchemaPropOrder = signal('')
|
const grammarJsonSchemaPropOrder = signal('')
|
||||||
const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
|
const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
|
||||||
|
@ -670,6 +672,15 @@
|
||||||
`
|
`
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const BoolField = ({ label, name, value }) => {
|
||||||
|
return html`
|
||||||
|
<div>
|
||||||
|
<label for="${name}">${label}</label>
|
||||||
|
<input type="checkbox" id="${name}" name="${name}" checked="${value}" onclick=${updateParamsBool} />
|
||||||
|
</div>
|
||||||
|
`
|
||||||
|
};
|
||||||
|
|
||||||
const userTemplateReset = (e) => {
|
const userTemplateReset = (e) => {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
userTemplateResetToDefaultAndApply()
|
userTemplateResetToDefaultAndApply()
|
||||||
|
@ -769,6 +780,7 @@
|
||||||
${FloatField({ label: "Temperature", max: 2.0, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
|
${FloatField({ label: "Temperature", max: 2.0, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
|
||||||
${FloatField({ label: "Penalize repeat sequence", max: 2.0, min: 0.0, name: "repeat_penalty", step: 0.01, value: params.value.repeat_penalty })}
|
${FloatField({ label: "Penalize repeat sequence", max: 2.0, min: 0.0, name: "repeat_penalty", step: 0.01, value: params.value.repeat_penalty })}
|
||||||
${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
|
${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
|
||||||
|
${BoolField({ label: "Penalize repetition of newlines", name: "penalize_nl", value: params.value.penalize_nl })}
|
||||||
${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
|
${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
|
||||||
${FloatField({ label: "Top-P sampling", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
|
${FloatField({ label: "Top-P sampling", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
|
||||||
${FloatField({ label: "Min-P sampling", max: 1.0, min: 0.0, name: "min_p", step: 0.01, value: params.value.min_p })}
|
${FloatField({ label: "Min-P sampling", max: 1.0, min: 0.0, name: "min_p", step: 0.01, value: params.value.min_p })}
|
||||||
|
|
|
@ -2190,8 +2190,6 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co
|
||||||
printf(" KV cache defragmentation threshold (default: %.1f, < 0 - disabled)\n", params.defrag_thold);
|
printf(" KV cache defragmentation threshold (default: %.1f, < 0 - disabled)\n", params.defrag_thold);
|
||||||
printf(" -b N, --batch-size N logical maximum batch size (default: %d)\n", params.n_batch);
|
printf(" -b N, --batch-size N logical maximum batch size (default: %d)\n", params.n_batch);
|
||||||
printf(" -ub N, --ubatch-size N physical maximum batch size (default: %d)\n", params.n_ubatch);
|
printf(" -ub N, --ubatch-size N physical maximum batch size (default: %d)\n", params.n_ubatch);
|
||||||
printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
|
|
||||||
printf(" not recommended: doubles context memory required and no measurable increase in quality\n");
|
|
||||||
if (llama_supports_mlock()) {
|
if (llama_supports_mlock()) {
|
||||||
printf(" --mlock force system to keep model in RAM rather than swapping or compressing\n");
|
printf(" --mlock force system to keep model in RAM rather than swapping or compressing\n");
|
||||||
}
|
}
|
||||||
|
@ -2214,6 +2212,8 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co
|
||||||
printf(" fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1\n");
|
printf(" fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1\n");
|
||||||
printf(" -mg i, --main-gpu i the GPU to use for the model (with split-mode = none),\n");
|
printf(" -mg i, --main-gpu i the GPU to use for the model (with split-mode = none),\n");
|
||||||
printf(" or for intermediate results and KV (with split-mode = row)\n");
|
printf(" or for intermediate results and KV (with split-mode = row)\n");
|
||||||
|
printf(" -nkvo, --no-kv-offload\n");
|
||||||
|
printf(" disable KV offload\n");
|
||||||
}
|
}
|
||||||
printf(" -m FNAME, --model FNAME\n");
|
printf(" -m FNAME, --model FNAME\n");
|
||||||
printf(" model path (default: %s)\n", params.model.c_str());
|
printf(" model path (default: %s)\n", params.model.c_str());
|
||||||
|
@ -2499,6 +2499,8 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
|
||||||
"See main README.md for information on enabling GPU BLAS support",
|
"See main README.md for information on enabling GPU BLAS support",
|
||||||
{{"n_gpu_layers", params.n_gpu_layers}});
|
{{"n_gpu_layers", params.n_gpu_layers}});
|
||||||
}
|
}
|
||||||
|
} else if (arg == "-nkvo" || arg == "--no-kv-offload") {
|
||||||
|
params.no_kv_offload = true;
|
||||||
} else if (arg == "--split-mode" || arg == "-sm") {
|
} else if (arg == "--split-mode" || arg == "-sm") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
|
@ -3567,6 +3569,7 @@ int main(int argc, char ** argv) {
|
||||||
sigemptyset (&sigint_action.sa_mask);
|
sigemptyset (&sigint_action.sa_mask);
|
||||||
sigint_action.sa_flags = 0;
|
sigint_action.sa_flags = 0;
|
||||||
sigaction(SIGINT, &sigint_action, NULL);
|
sigaction(SIGINT, &sigint_action, NULL);
|
||||||
|
sigaction(SIGTERM, &sigint_action, NULL);
|
||||||
#elif defined (_WIN32)
|
#elif defined (_WIN32)
|
||||||
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
|
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
|
||||||
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
|
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
|
||||||
|
|
|
@ -1114,7 +1114,10 @@ def start_server_background(context):
|
||||||
server_args.append('--verbose')
|
server_args.append('--verbose')
|
||||||
if 'SERVER_LOG_FORMAT_JSON' not in os.environ:
|
if 'SERVER_LOG_FORMAT_JSON' not in os.environ:
|
||||||
server_args.extend(['--log-format', "text"])
|
server_args.extend(['--log-format', "text"])
|
||||||
print(f"starting server with: {context.server_path} {server_args}")
|
|
||||||
|
args = [str(arg) for arg in [context.server_path, *server_args]]
|
||||||
|
print(f"bench: starting server with: {' '.join(args)}")
|
||||||
|
|
||||||
flags = 0
|
flags = 0
|
||||||
if 'nt' == os.name:
|
if 'nt' == os.name:
|
||||||
flags |= subprocess.DETACHED_PROCESS
|
flags |= subprocess.DETACHED_PROCESS
|
||||||
|
@ -1130,16 +1133,14 @@ def start_server_background(context):
|
||||||
[str(arg) for arg in [context.server_path, *server_args]],
|
[str(arg) for arg in [context.server_path, *server_args]],
|
||||||
**pkwargs)
|
**pkwargs)
|
||||||
|
|
||||||
def log_stdout(process):
|
def server_log(in_stream, out_stream):
|
||||||
for line in iter(process.stdout.readline, b''):
|
for line in iter(in_stream.readline, b''):
|
||||||
print(line.decode('utf-8'), end='')
|
print(line.decode('utf-8'), end='', file=out_stream)
|
||||||
thread_stdout = threading.Thread(target=log_stdout, args=(context.server_process,))
|
|
||||||
|
thread_stdout = threading.Thread(target=server_log, args=(context.server_process.stdout, sys.stdout))
|
||||||
thread_stdout.start()
|
thread_stdout.start()
|
||||||
|
|
||||||
def log_stderr(process):
|
thread_stderr = threading.Thread(target=server_log, args=(context.server_process.stderr, sys.stderr))
|
||||||
for line in iter(process.stderr.readline, b''):
|
|
||||||
print(line.decode('utf-8'), end='', file=sys.stderr)
|
|
||||||
thread_stderr = threading.Thread(target=log_stderr, args=(context.server_process,))
|
|
||||||
thread_stderr.start()
|
thread_stderr.start()
|
||||||
|
|
||||||
print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")
|
print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")
|
||||||
|
|
|
@ -49,12 +49,23 @@ extern bool server_log_json;
|
||||||
#define LOG_WARNING(MSG, ...) server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__)
|
#define LOG_WARNING(MSG, ...) server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||||
#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
|
#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||||
|
|
||||||
|
static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static T json_value(const json &body, const std::string &key, const T &default_value) {
|
static T json_value(const json &body, const std::string &key, const T &default_value) {
|
||||||
// Fallback null to default value
|
// Fallback null to default value
|
||||||
return body.contains(key) && !body.at(key).is_null()
|
if (body.contains(key) && !body.at(key).is_null()){
|
||||||
? body.value(key, default_value)
|
try {
|
||||||
: default_value;
|
return body.value(key, default_value);
|
||||||
|
}
|
||||||
|
catch (nlohmann::json_abi_v3_11_3::detail::type_error const&){
|
||||||
|
std::string message = "Wrong type supplied for parameter '" + key + "'. Expected '" + typeid(default_value).name() + "', using default value.";
|
||||||
|
server_log("WARN", __func__, __LINE__, message.c_str(), body);
|
||||||
|
return default_value;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return default_value;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra) {
|
static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra) {
|
||||||
|
@ -556,6 +567,15 @@ static std::vector<json> format_partial_response_oaicompat(json result, const st
|
||||||
{"model", modelname},
|
{"model", modelname},
|
||||||
{"object", "chat.completion.chunk"}
|
{"object", "chat.completion.chunk"}
|
||||||
};
|
};
|
||||||
|
if (!finish_reason.empty()) {
|
||||||
|
int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
|
||||||
|
int num_prompt_tokens = json_value(result, "tokens_evaluated", 0);
|
||||||
|
ret.push_back({"usage", json {
|
||||||
|
{"completion_tokens", num_tokens_predicted},
|
||||||
|
{"prompt_tokens", num_prompt_tokens},
|
||||||
|
{"total_tokens", num_tokens_predicted + num_prompt_tokens}
|
||||||
|
}});
|
||||||
|
}
|
||||||
|
|
||||||
return std::vector<json>({ret});
|
return std::vector<json>({ret});
|
||||||
}
|
}
|
||||||
|
|
|
@ -705,8 +705,13 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
||||||
struct ggml_tensor * leaf = graph->leafs[i];
|
struct ggml_tensor * leaf = graph->leafs[i];
|
||||||
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
|
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
|
||||||
galloc->leaf_allocs[i].buffer_id = hn->buffer_id;
|
galloc->leaf_allocs[i].buffer_id = hn->buffer_id;
|
||||||
galloc->leaf_allocs[i].leaf.offset = hn->offset;
|
if (leaf->view_src || leaf->data) {
|
||||||
galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
|
galloc->leaf_allocs[i].leaf.offset = SIZE_MAX;
|
||||||
|
galloc->leaf_allocs[i].leaf.size_max = 0;
|
||||||
|
} else {
|
||||||
|
galloc->leaf_allocs[i].leaf.offset = hn->offset;
|
||||||
|
galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// reallocate buffers if needed
|
// reallocate buffers if needed
|
||||||
|
|
|
@ -137,7 +137,7 @@ extern "C" {
|
||||||
/*
|
/*
|
||||||
Example usage:
|
Example usage:
|
||||||
|
|
||||||
// operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be asigned
|
// operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be assigned
|
||||||
// preferrably to run on the same backend as the buffer
|
// preferrably to run on the same backend as the buffer
|
||||||
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
||||||
|
|
||||||
|
|
|
@ -377,13 +377,20 @@ typedef struct {
|
||||||
} block_iq1_s;
|
} block_iq1_s;
|
||||||
static_assert(sizeof(block_iq1_s) == sizeof(ggml_half) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding");
|
static_assert(sizeof(block_iq1_s) == sizeof(ggml_half) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding");
|
||||||
|
|
||||||
// 1.8125 bpw
|
// 1.75 bpw
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint8_t qs[QK_K/8]; // grid index, low 8 bits
|
uint8_t qs[QK_K/8]; // grid index, low 8 bits
|
||||||
uint8_t qh[QK_K/16]; // grid index, high 3 bits + grid shift bit (for two groups of 8)
|
uint8_t qh[QK_K/16]; // grid index, high 3 bits + grid shift bit (for two groups of 8)
|
||||||
uint8_t scales[QK_K/32]; // 4-bit block scales
|
#if QK_K == 64
|
||||||
|
ggml_half d;
|
||||||
|
#endif
|
||||||
|
uint8_t scales[QK_K/32]; // 3-bit block scales (4-bit if QK_K == 64)
|
||||||
} block_iq1_m;
|
} block_iq1_m;
|
||||||
|
#if QK_K == 64
|
||||||
|
static_assert(sizeof(block_iq1_m) == QK_K/8 + QK_K/16 + QK_K/32 + sizeof(ggml_half), "wrong iq1_m block size/padding");
|
||||||
|
#else
|
||||||
static_assert(sizeof(block_iq1_m) == QK_K/8 + QK_K/16 + QK_K/32, "wrong iq1_m block size/padding");
|
static_assert(sizeof(block_iq1_m) == QK_K/8 + QK_K/16 + QK_K/32, "wrong iq1_m block size/padding");
|
||||||
|
#endif
|
||||||
|
|
||||||
// Used by IQ1_M quants
|
// Used by IQ1_M quants
|
||||||
typedef union {
|
typedef union {
|
||||||
|
@ -440,10 +447,11 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_
|
||||||
|
|
||||||
#define GGML_COMMON_IMPL
|
#define GGML_COMMON_IMPL
|
||||||
#elif defined(GGML_COMMON_IMPL_SYCL)
|
#elif defined(GGML_COMMON_IMPL_SYCL)
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
#define GGML_TABLE_BEGIN(type, name, size) static dpct::global_memory<const type, 1> name(sycl::range<1>(size), {
|
#define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
|
||||||
#define GGML_TABLE_END() });
|
#define GGML_TABLE_END() };
|
||||||
|
|
||||||
#define GGML_COMMON_IMPL
|
#define GGML_COMMON_IMPL
|
||||||
#endif
|
#endif
|
||||||
|
|
214
ggml-cuda.cu
214
ggml-cuda.cu
|
@ -401,10 +401,8 @@ GGML_CALL static void * ggml_backend_cuda_buffer_get_base(ggml_backend_buffer_t
|
||||||
GGML_CALL static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
|
GGML_CALL static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
|
||||||
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
||||||
|
|
||||||
if (tensor->view_src != NULL && tensor->view_offs == 0) {
|
if (tensor->view_src != NULL) {
|
||||||
assert(tensor->view_src->buffer->buft == buffer->buft);
|
assert(tensor->view_src->buffer->buft == buffer->buft);
|
||||||
tensor->backend = tensor->view_src->backend;
|
|
||||||
tensor->extra = tensor->view_src->extra;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1964,227 +1962,49 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
template<typename ... Srcs>
|
|
||||||
static __global__ void k_compute_batched_ptrs_id(
|
|
||||||
const void ** ptrs_src, void ** ptrs_dst,
|
|
||||||
int ne12, int ne13,
|
|
||||||
int ne23,
|
|
||||||
int nb02, int nb03,
|
|
||||||
int nb12, int nb13,
|
|
||||||
int nb2, int nb3,
|
|
||||||
int r2, int r3,
|
|
||||||
ggml_type src0_type, half * src0_as_f16, int64_t src0_ne,
|
|
||||||
const half * src1_f16, half * dst_f16,
|
|
||||||
const int32_t * ids, const int id,
|
|
||||||
Srcs... src0s) {
|
|
||||||
|
|
||||||
int i = ids[id];
|
|
||||||
|
|
||||||
half * src0_f16;
|
|
||||||
const void * srcs_ar[] = { (const half *) src0s... };
|
|
||||||
if (src0_type == GGML_TYPE_F16) {
|
|
||||||
src0_f16 = (half *) srcs_ar[i];
|
|
||||||
} else {
|
|
||||||
src0_f16 = src0_as_f16;
|
|
||||||
if (threadIdx.x == 0 && threadIdx.y == 0) {
|
|
||||||
const to_fp16_cuda_t to_fp16 = ggml_get_to_fp16_cuda(src0_type);
|
|
||||||
to_fp16(srcs_ar[i], src0_f16, src0_ne, cudaStreamFireAndForget);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int i13 = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
int i12 = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
|
|
||||||
if (i13 >= ne13 || i12 >= ne12) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
int i03 = i13 / r3;
|
|
||||||
int i02 = i12 / r2;
|
|
||||||
|
|
||||||
ptrs_src[0*ne23 + i12 + i13*ne12] = (const char *) src0_f16 + i02*nb02 + i03*nb03;
|
|
||||||
ptrs_src[1*ne23 + i12 + i13*ne12] = (const char *) src1_f16 + i12*nb12/2 + i13*nb13/2;
|
|
||||||
ptrs_dst[0*ne23 + i12 + i13*ne12] = ( char *) dst_f16 + i12* nb2/2 + i13* nb3/2;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void ggml_cuda_mul_mat_id_cublas(ggml_tensor * dst) {
|
|
||||||
const struct ggml_tensor * ids = dst->src[0];
|
|
||||||
const struct ggml_tensor * src1 = dst->src[1];
|
|
||||||
const struct ggml_tensor * src00 = dst->src[2];
|
|
||||||
|
|
||||||
const int id = dst->op_params[0];
|
|
||||||
|
|
||||||
GGML_ASSERT(!ggml_is_transposed(src00));
|
|
||||||
GGML_ASSERT(!ggml_is_transposed(src1));
|
|
||||||
|
|
||||||
GGML_ASSERT(src00->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
|
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
|
||||||
|
|
||||||
const int64_t ne00 = src00->ne[0]; GGML_UNUSED(ne00);
|
|
||||||
const int64_t ne01 = src00->ne[1];
|
|
||||||
const int64_t ne02 = src00->ne[2];
|
|
||||||
const int64_t ne03 = src00->ne[3];
|
|
||||||
|
|
||||||
//const int64_t nb01 = src00->nb[1];
|
|
||||||
const int64_t nb02 = src00->nb[2]; GGML_UNUSED(nb02);
|
|
||||||
const int64_t nb03 = src00->nb[3]; GGML_UNUSED(nb03);
|
|
||||||
|
|
||||||
const int64_t ne10 = src1->ne[0];
|
|
||||||
const int64_t ne11 = src1->ne[1];
|
|
||||||
const int64_t ne12 = src1->ne[2];
|
|
||||||
const int64_t ne13 = src1->ne[3];
|
|
||||||
|
|
||||||
//const int64_t nb11 = src1->nb[1];
|
|
||||||
const int64_t nb12 = src1->nb[2]; GGML_UNUSED(nb12);
|
|
||||||
const int64_t nb13 = src1->nb[3]; GGML_UNUSED(nb13);
|
|
||||||
|
|
||||||
const int64_t ne1 = ggml_nelements(src1);
|
|
||||||
const int64_t ne = ggml_nelements(dst);
|
|
||||||
|
|
||||||
ggml_cuda_set_device(g_main_device);
|
|
||||||
cudaStream_t main_stream = g_cudaStreams[g_main_device][0];
|
|
||||||
|
|
||||||
CUBLAS_CHECK(cublasSetStream(g_cublas_handles[g_main_device], main_stream));
|
|
||||||
|
|
||||||
//ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra;
|
|
||||||
//void * src0_ddq = src0_extra->data_device[g_main_device];
|
|
||||||
//half * src0_as_f16 = (half *) src0_ddq;
|
|
||||||
|
|
||||||
ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra;
|
|
||||||
float * src1_ddf = (float *) src1_extra->data_device[g_main_device];
|
|
||||||
|
|
||||||
ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
|
|
||||||
float * dst_ddf = (float *) dst_extra->data_device[g_main_device];
|
|
||||||
|
|
||||||
// convert src1 to fp16
|
|
||||||
const to_fp16_cuda_t to_fp16_cuda = ggml_get_to_fp16_cuda(src1->type);
|
|
||||||
GGML_ASSERT(to_fp16_cuda != nullptr);
|
|
||||||
|
|
||||||
size_t src1_as = 0;
|
|
||||||
half * src1_as_f16 = (half *) ggml_cuda_pool_malloc(ne1 * sizeof(half), &src1_as);
|
|
||||||
to_fp16_cuda(src1_ddf, src1_as_f16, ne1, main_stream);
|
|
||||||
|
|
||||||
size_t dst_as = 0;
|
|
||||||
half * dst_f16 = (half *) ggml_cuda_pool_malloc(ne * sizeof(half), &dst_as);
|
|
||||||
|
|
||||||
GGML_ASSERT(ne12 % ne02 == 0);
|
|
||||||
GGML_ASSERT(ne13 % ne03 == 0);
|
|
||||||
|
|
||||||
// broadcast factors
|
|
||||||
const int64_t r2 = ne12/ne02;
|
|
||||||
const int64_t r3 = ne13/ne03;
|
|
||||||
|
|
||||||
const half alpha_f16 = 1.0f;
|
|
||||||
const half beta_f16 = 0.0f;
|
|
||||||
|
|
||||||
// use cublasGemmBatchedEx
|
|
||||||
const int ne23 = ne12*ne13;
|
|
||||||
|
|
||||||
const void ** ptrs_src = nullptr;
|
|
||||||
void ** ptrs_dst = nullptr;
|
|
||||||
|
|
||||||
size_t ptrs_src_s = 0;
|
|
||||||
size_t ptrs_dst_s = 0;
|
|
||||||
|
|
||||||
ptrs_src = (const void **) ggml_cuda_pool_malloc(2*ne23*sizeof(void *), &ptrs_src_s);
|
|
||||||
ptrs_dst = ( void **) ggml_cuda_pool_malloc(1*ne23*sizeof(void *), &ptrs_dst_s);
|
|
||||||
|
|
||||||
int64_t src0_ne = ggml_nelements(src00);
|
|
||||||
half * src0_as_f16 = nullptr;
|
|
||||||
size_t src0_as = 0;
|
|
||||||
if (src00->type != GGML_TYPE_F16) {
|
|
||||||
src0_as_f16 = (half *) ggml_cuda_pool_malloc(src0_ne * sizeof(half), &src0_as);
|
|
||||||
}
|
|
||||||
|
|
||||||
static_assert(GGML_MAX_SRC == 6, "GGML_MAX_SRC == 6");
|
|
||||||
dim3 block_dims(ne13, ne12);
|
|
||||||
k_compute_batched_ptrs_id<<<1, block_dims, 0, main_stream>>>(
|
|
||||||
ptrs_src, ptrs_dst,
|
|
||||||
ne12, ne13,
|
|
||||||
ne23,
|
|
||||||
ne00*ne01*sizeof(half), ne00*ne01*ne02*sizeof(half),
|
|
||||||
nb12, nb13,
|
|
||||||
dst->nb[2], dst->nb[3],
|
|
||||||
r2, r3,
|
|
||||||
src00->type, src0_as_f16, src0_ne,
|
|
||||||
src1_as_f16, dst_f16,
|
|
||||||
(const int *)((ggml_tensor_extra_gpu *)ids->extra)->data_device[g_main_device], id,
|
|
||||||
dst->src[2] ? (const half *)((ggml_tensor_extra_gpu *)dst->src[2]->extra)->data_device[g_main_device] : nullptr,
|
|
||||||
dst->src[3] ? (const half *)((ggml_tensor_extra_gpu *)dst->src[3]->extra)->data_device[g_main_device] : nullptr,
|
|
||||||
dst->src[4] ? (const half *)((ggml_tensor_extra_gpu *)dst->src[4]->extra)->data_device[g_main_device] : nullptr,
|
|
||||||
dst->src[5] ? (const half *)((ggml_tensor_extra_gpu *)dst->src[5]->extra)->data_device[g_main_device] : nullptr
|
|
||||||
);
|
|
||||||
CUDA_CHECK(cudaGetLastError());
|
|
||||||
|
|
||||||
CUBLAS_CHECK(
|
|
||||||
cublasGemmBatchedEx(g_cublas_handles[g_main_device], CUBLAS_OP_T, CUBLAS_OP_N,
|
|
||||||
ne01, ne11, ne10,
|
|
||||||
&alpha_f16, (const void **) (ptrs_src + 0*ne23), CUDA_R_16F, ne00,
|
|
||||||
(const void **) (ptrs_src + 1*ne23), CUDA_R_16F, ne10,
|
|
||||||
&beta_f16, ( void **) (ptrs_dst + 0*ne23), CUDA_R_16F, ne01,
|
|
||||||
ne23,
|
|
||||||
CUBLAS_COMPUTE_16F,
|
|
||||||
CUBLAS_GEMM_DEFAULT_TENSOR_OP));
|
|
||||||
|
|
||||||
if (src0_as != 0) {
|
|
||||||
ggml_cuda_pool_free(src0_as_f16, src0_as);
|
|
||||||
}
|
|
||||||
if (ptrs_src_s != 0) {
|
|
||||||
ggml_cuda_pool_free(ptrs_src, ptrs_src_s);
|
|
||||||
}
|
|
||||||
if (ptrs_dst_s != 0) {
|
|
||||||
ggml_cuda_pool_free(ptrs_dst, ptrs_dst_s);
|
|
||||||
}
|
|
||||||
|
|
||||||
const to_fp32_cuda_t to_fp32_cuda = ggml_get_to_fp32_cuda(GGML_TYPE_F16);
|
|
||||||
to_fp32_cuda(dst_f16, dst_ddf, ne, main_stream);
|
|
||||||
|
|
||||||
ggml_cuda_pool_free(src1_as_f16, src1_as);
|
|
||||||
ggml_cuda_pool_free(dst_f16, dst_as);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void ggml_cuda_mul_mat_id(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
static void ggml_cuda_mul_mat_id(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||||
#if 0
|
|
||||||
ggml_cuda_mul_mat_id_cublas(dst);
|
|
||||||
// TODO: mmq/mmv support
|
|
||||||
#endif
|
|
||||||
const ggml_tensor * src0 = dst->src[0];
|
const ggml_tensor * src0 = dst->src[0];
|
||||||
const ggml_tensor * src1 = dst->src[1];
|
const ggml_tensor * src1 = dst->src[1];
|
||||||
|
const ggml_tensor * ids = dst->src[2];
|
||||||
|
|
||||||
|
GGML_ASSERT(!ggml_backend_buffer_is_cuda_split(src0->buffer) && "mul_mat_id does not support split buffers");
|
||||||
|
|
||||||
cudaStream_t stream = ctx.stream();
|
cudaStream_t stream = ctx.stream();
|
||||||
|
|
||||||
const size_t nb11 = src1->nb[1];
|
const size_t nb11 = src1->nb[1];
|
||||||
const size_t nb1 = dst->nb[1];
|
const size_t nb1 = dst->nb[1];
|
||||||
|
|
||||||
const struct ggml_tensor * ids = src0;
|
|
||||||
const int32_t id = ((int32_t *) dst->op_params)[0];
|
const int32_t id = ((int32_t *) dst->op_params)[0];
|
||||||
const int32_t n_as = ((int32_t *) dst->op_params)[1];
|
const int32_t n_as = src0->ne[2];
|
||||||
|
|
||||||
std::vector<char> ids_host(ggml_nbytes(ids));
|
std::vector<char> ids_host(ggml_nbytes(ids));
|
||||||
const char * ids_dev = (const char *) ids->data;
|
const char * ids_dev = (const char *) ids->data;
|
||||||
CUDA_CHECK(cudaMemcpyAsync(ids_host.data(), ids_dev, ggml_nbytes(ids), cudaMemcpyDeviceToHost, stream));
|
CUDA_CHECK(cudaMemcpyAsync(ids_host.data(), ids_dev, ggml_nbytes(ids), cudaMemcpyDeviceToHost, stream));
|
||||||
CUDA_CHECK(cudaStreamSynchronize(stream));
|
CUDA_CHECK(cudaStreamSynchronize(stream));
|
||||||
|
|
||||||
|
ggml_tensor src0_row = *src0;
|
||||||
ggml_tensor src1_row = *src1;
|
ggml_tensor src1_row = *src1;
|
||||||
ggml_tensor dst_row = *dst;
|
ggml_tensor dst_row = *dst;
|
||||||
|
|
||||||
|
char * src0_original = (char *) src0->data;
|
||||||
char * src1_original = (char *) src1->data;
|
char * src1_original = (char *) src1->data;
|
||||||
char * dst_original = (char *) dst->data;
|
char * dst_original = (char *) dst->data;
|
||||||
|
|
||||||
|
src0_row.ne[2] = 1;
|
||||||
|
src0_row.ne[3] = 1;
|
||||||
|
src0_row.nb[3] = src0->nb[2];
|
||||||
|
|
||||||
if (src1->ne[1] == 1) {
|
if (src1->ne[1] == 1) {
|
||||||
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
||||||
const int32_t row_id = *(const int32_t *) (ids_host.data() + i01*ids->nb[1] + id*ids->nb[0]);
|
const int32_t row_id = *(const int32_t *) (ids_host.data() + i01*ids->nb[1] + id*ids->nb[0]);
|
||||||
|
|
||||||
GGML_ASSERT(row_id >= 0 && row_id < n_as);
|
GGML_ASSERT(row_id >= 0 && row_id < n_as);
|
||||||
|
|
||||||
const struct ggml_tensor * src0_row = dst->src[row_id + 2];
|
src0_row.data = src0_original + row_id*src0->nb[2];
|
||||||
|
|
||||||
src1_row.data = src1_original + i01*src1->nb[1];
|
src1_row.data = src1_original + i01*src1->nb[1];
|
||||||
dst_row.data = dst_original + i01*dst->nb[1];
|
dst_row.data = dst_original + i01*dst->nb[1];
|
||||||
|
|
||||||
ggml_cuda_mul_mat(ctx, src0_row, &src1_row, &dst_row);
|
ggml_cuda_mul_mat(ctx, &src0_row, &src1_row, &dst_row);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ggml_cuda_pool_alloc<char> src1_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(src1));
|
ggml_cuda_pool_alloc<char> src1_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(src1));
|
||||||
|
@ -2194,8 +2014,6 @@ static void ggml_cuda_mul_mat_id(ggml_backend_cuda_context & ctx, ggml_tensor *
|
||||||
dst_row.data = dst_contiguous.get();
|
dst_row.data = dst_contiguous.get();
|
||||||
|
|
||||||
for (int32_t row_id = 0; row_id < n_as; ++row_id) {
|
for (int32_t row_id = 0; row_id < n_as; ++row_id) {
|
||||||
const struct ggml_tensor * src0_row = dst->src[row_id + 2];
|
|
||||||
|
|
||||||
int64_t num_src1_rows = 0;
|
int64_t num_src1_rows = 0;
|
||||||
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
||||||
const int32_t row_id_i = *(const int32_t *) (ids_host.data() + i01*ids->nb[1] + id*ids->nb[0]);
|
const int32_t row_id_i = *(const int32_t *) (ids_host.data() + i01*ids->nb[1] + id*ids->nb[0]);
|
||||||
|
@ -2215,6 +2033,8 @@ static void ggml_cuda_mul_mat_id(ggml_backend_cuda_context & ctx, ggml_tensor *
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
src0_row.data = src0_original + row_id*src0->nb[2];
|
||||||
|
|
||||||
src1_row.ne[1] = num_src1_rows;
|
src1_row.ne[1] = num_src1_rows;
|
||||||
dst_row.ne[1] = num_src1_rows;
|
dst_row.ne[1] = num_src1_rows;
|
||||||
|
|
||||||
|
@ -2226,7 +2046,7 @@ static void ggml_cuda_mul_mat_id(ggml_backend_cuda_context & ctx, ggml_tensor *
|
||||||
dst_row.nb[2] = num_src1_rows*nb1;
|
dst_row.nb[2] = num_src1_rows*nb1;
|
||||||
dst_row.nb[3] = num_src1_rows*nb1;
|
dst_row.nb[3] = num_src1_rows*nb1;
|
||||||
|
|
||||||
ggml_cuda_mul_mat(ctx, src0_row, &src1_row, &dst_row);
|
ggml_cuda_mul_mat(ctx, &src0_row, &src1_row, &dst_row);
|
||||||
|
|
||||||
num_src1_rows = 0;
|
num_src1_rows = 0;
|
||||||
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
||||||
|
@ -2395,7 +2215,7 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
|
||||||
cudaError_t err = cudaGetLastError();
|
cudaError_t err = cudaGetLastError();
|
||||||
if (err != cudaSuccess) {
|
if (err != cudaSuccess) {
|
||||||
fprintf(stderr, "%s: %s failed\n", __func__, ggml_op_desc(dst));
|
fprintf(stderr, "%s: %s failed\n", __func__, ggml_op_desc(dst));
|
||||||
GGML_ASSERT(false);
|
CUDA_CHECK(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -8,32 +8,41 @@ static inline __device__ void ggml_cuda_swap(T & a, T & b) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<ggml_sort_order order>
|
template<ggml_sort_order order>
|
||||||
static __global__ void k_argsort_f32_i32(const float * x, int * dst, const int ncols) {
|
static __global__ void k_argsort_f32_i32(const float * x, int * dst, const int ncols, int ncols_pad) {
|
||||||
// bitonic sort
|
// bitonic sort
|
||||||
int col = threadIdx.x;
|
int col = threadIdx.x;
|
||||||
int row = blockIdx.y;
|
int row = blockIdx.y;
|
||||||
|
|
||||||
if (col >= ncols) return;
|
if (col >= ncols_pad) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const float * x_row = x + row * ncols;
|
const float * x_row = x + row * ncols;
|
||||||
int * dst_row = dst + row * ncols;
|
extern __shared__ int dst_row[];
|
||||||
|
|
||||||
// initialize indices
|
// initialize indices
|
||||||
if (col < ncols) {
|
dst_row[col] = col;
|
||||||
dst_row[col] = col;
|
|
||||||
}
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
for (int k = 2; k <= ncols; k *= 2) {
|
for (int k = 2; k <= ncols_pad; k *= 2) {
|
||||||
for (int j = k / 2; j > 0; j /= 2) {
|
for (int j = k / 2; j > 0; j /= 2) {
|
||||||
int ixj = col ^ j;
|
int ixj = col ^ j;
|
||||||
if (ixj > col) {
|
if (ixj > col) {
|
||||||
if ((col & k) == 0) {
|
if ((col & k) == 0) {
|
||||||
if (order == GGML_SORT_ORDER_ASC ? x_row[dst_row[col]] > x_row[dst_row[ixj]] : x_row[dst_row[col]] < x_row[dst_row[ixj]]) {
|
if (dst_row[col] >= ncols ||
|
||||||
|
(dst_row[ixj] < ncols && (order == GGML_SORT_ORDER_ASC ?
|
||||||
|
x_row[dst_row[col]] > x_row[dst_row[ixj]] :
|
||||||
|
x_row[dst_row[col]] < x_row[dst_row[ixj]]))
|
||||||
|
) {
|
||||||
ggml_cuda_swap(dst_row[col], dst_row[ixj]);
|
ggml_cuda_swap(dst_row[col], dst_row[ixj]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (order == GGML_SORT_ORDER_ASC ? x_row[dst_row[col]] < x_row[dst_row[ixj]] : x_row[dst_row[col]] > x_row[dst_row[ixj]]) {
|
if (dst_row[ixj] >= ncols ||
|
||||||
|
(dst_row[col] < ncols && (order == GGML_SORT_ORDER_ASC ?
|
||||||
|
x_row[dst_row[col]] < x_row[dst_row[ixj]] :
|
||||||
|
x_row[dst_row[col]] > x_row[dst_row[ixj]]))
|
||||||
|
) {
|
||||||
ggml_cuda_swap(dst_row[col], dst_row[ixj]);
|
ggml_cuda_swap(dst_row[col], dst_row[ixj]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -41,18 +50,35 @@ static __global__ void k_argsort_f32_i32(const float * x, int * dst, const int n
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// copy the result to dst without the padding
|
||||||
|
if (col < ncols) {
|
||||||
|
dst[row * ncols + col] = dst_row[col];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int next_power_of_2(int x) {
|
||||||
|
int n = 1;
|
||||||
|
while (n < x) {
|
||||||
|
n *= 2;
|
||||||
|
}
|
||||||
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, const int nrows, ggml_sort_order order, cudaStream_t stream) {
|
static void argsort_f32_i32_cuda(const float * x, int * dst, const int ncols, const int nrows, ggml_sort_order order, cudaStream_t stream) {
|
||||||
// bitonic sort requires ncols to be power of 2
|
// bitonic sort requires ncols to be power of 2
|
||||||
GGML_ASSERT((ncols & (ncols - 1)) == 0);
|
const int ncols_pad = next_power_of_2(ncols);
|
||||||
|
|
||||||
const dim3 block_dims(ncols, 1, 1);
|
const dim3 block_dims(ncols_pad, 1, 1);
|
||||||
const dim3 block_nums(1, nrows, 1);
|
const dim3 block_nums(1, nrows, 1);
|
||||||
|
const size_t shared_mem = ncols_pad * sizeof(int);
|
||||||
|
|
||||||
|
GGML_ASSERT(shared_mem <= ggml_cuda_info().devices[ggml_cuda_get_device()].smpb);
|
||||||
|
|
||||||
if (order == GGML_SORT_ORDER_ASC) {
|
if (order == GGML_SORT_ORDER_ASC) {
|
||||||
k_argsort_f32_i32<GGML_SORT_ORDER_ASC><<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
|
k_argsort_f32_i32<GGML_SORT_ORDER_ASC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
|
||||||
} else if (order == GGML_SORT_ORDER_DESC) {
|
} else if (order == GGML_SORT_ORDER_DESC) {
|
||||||
k_argsort_f32_i32<GGML_SORT_ORDER_DESC><<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
|
k_argsort_f32_i32<GGML_SORT_ORDER_DESC><<<block_nums, block_dims, shared_mem, stream>>>(x, dst, ncols, ncols_pad);
|
||||||
} else {
|
} else {
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "../ggml.h"
|
#include "ggml.h"
|
||||||
#include "../ggml-cuda.h"
|
#include "ggml-cuda.h"
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#if defined(GGML_USE_HIPBLAS)
|
#if defined(GGML_USE_HIPBLAS)
|
||||||
|
@ -11,7 +12,7 @@
|
||||||
#define GGML_COMMON_DECL_CUDA
|
#define GGML_COMMON_DECL_CUDA
|
||||||
#define GGML_COMMON_IMPL_CUDA
|
#define GGML_COMMON_IMPL_CUDA
|
||||||
#endif
|
#endif
|
||||||
#include "../ggml-common.h"
|
#include "ggml-common.h"
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <array>
|
#include <array>
|
||||||
|
|
|
@ -2,14 +2,6 @@
|
||||||
#include "dequantize.cuh"
|
#include "dequantize.cuh"
|
||||||
#include "convert.cuh"
|
#include "convert.cuh"
|
||||||
|
|
||||||
// dmmv = dequantize_mul_mat_vec
|
|
||||||
#ifndef GGML_CUDA_DMMV_X
|
|
||||||
#define GGML_CUDA_DMMV_X 32
|
|
||||||
#endif
|
|
||||||
#ifndef GGML_CUDA_MMV_Y
|
|
||||||
#define GGML_CUDA_MMV_Y 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef K_QUANTS_PER_ITERATION
|
#ifndef K_QUANTS_PER_ITERATION
|
||||||
#define K_QUANTS_PER_ITERATION 2
|
#define K_QUANTS_PER_ITERATION 2
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -1,5 +1,16 @@
|
||||||
#include "common.cuh"
|
#include "common.cuh"
|
||||||
|
|
||||||
|
// dmmv = dequantize_mul_mat_vec
|
||||||
|
|
||||||
|
// TODO: remove this?
|
||||||
|
#ifndef GGML_CUDA_DMMV_X
|
||||||
|
#define GGML_CUDA_DMMV_X 32
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef GGML_CUDA_MMV_Y
|
||||||
|
#define GGML_CUDA_MMV_Y 1
|
||||||
|
#endif
|
||||||
|
|
||||||
void ggml_cuda_op_dequantize_mul_mat_vec(
|
void ggml_cuda_op_dequantize_mul_mat_vec(
|
||||||
ggml_backend_cuda_context & ctx,
|
ggml_backend_cuda_context & ctx,
|
||||||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
|
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
|
||||||
|
|
209
ggml-metal.m
209
ggml-metal.m
|
@ -1685,37 +1685,31 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||||
{
|
{
|
||||||
//GGML_ASSERT(ne00 == ne10);
|
//GGML_ASSERT(ne00 == ne10);
|
||||||
//GGML_ASSERT(ne03 == ne13);
|
//GGML_ASSERT(ne03 == ne13);
|
||||||
|
const int n_as = src0->ne[2];
|
||||||
GGML_ASSERT(src0t == GGML_TYPE_I32);
|
|
||||||
|
|
||||||
const int n_as = ((int32_t *) dst->op_params)[1];
|
|
||||||
|
|
||||||
// TODO: make this more general
|
|
||||||
GGML_ASSERT(n_as <= 8);
|
|
||||||
|
|
||||||
// max size of the src1ids array in the kernel shared buffer
|
// max size of the src1ids array in the kernel shared buffer
|
||||||
GGML_ASSERT(ne11 <= 4096);
|
GGML_ASSERT(ne11 <= 4096);
|
||||||
|
|
||||||
const int64_t ne20 = src2 ? src2->ne[0] : 0;
|
// src2 = ids
|
||||||
const int64_t ne21 = src2 ? src2->ne[1] : 0;
|
const int64_t ne20 = src2->ne[0]; GGML_UNUSED(ne20);
|
||||||
const int64_t ne22 = src2 ? src2->ne[2] : 0;
|
const int64_t ne21 = src2->ne[1];
|
||||||
const int64_t ne23 = src2 ? src2->ne[3] : 0; GGML_UNUSED(ne23);
|
const int64_t ne22 = src2->ne[2]; GGML_UNUSED(ne22);
|
||||||
|
const int64_t ne23 = src2->ne[3]; GGML_UNUSED(ne23);
|
||||||
|
|
||||||
const uint64_t nb20 = src2 ? src2->nb[0] : 0; GGML_UNUSED(nb20);
|
const uint64_t nb20 = src2->nb[0]; GGML_UNUSED(nb20);
|
||||||
const uint64_t nb21 = src2 ? src2->nb[1] : 0;
|
const uint64_t nb21 = src2->nb[1];
|
||||||
const uint64_t nb22 = src2 ? src2->nb[2] : 0;
|
const uint64_t nb22 = src2->nb[2]; GGML_UNUSED(nb22);
|
||||||
const uint64_t nb23 = src2 ? src2->nb[3] : 0; GGML_UNUSED(nb23);
|
const uint64_t nb23 = src2->nb[3]; GGML_UNUSED(nb23);
|
||||||
|
|
||||||
const enum ggml_type src2t = src2 ? src2->type : GGML_TYPE_COUNT; GGML_UNUSED(src2t);
|
const enum ggml_type src2t = src2->type; GGML_UNUSED(src2t);
|
||||||
|
|
||||||
GGML_ASSERT(!ggml_is_transposed(src2));
|
GGML_ASSERT(src2t == GGML_TYPE_I32);
|
||||||
|
|
||||||
|
GGML_ASSERT(!ggml_is_transposed(src0));
|
||||||
GGML_ASSERT(!ggml_is_transposed(src1));
|
GGML_ASSERT(!ggml_is_transposed(src1));
|
||||||
|
|
||||||
GGML_ASSERT(src1t == GGML_TYPE_F32);
|
GGML_ASSERT(src1t == GGML_TYPE_F32);
|
||||||
|
|
||||||
const uint r2 = ne12/ne22;
|
|
||||||
const uint r3 = ne13/ne23;
|
|
||||||
|
|
||||||
// find the break-even point where the matrix-matrix kernel becomes more efficient compared
|
// find the break-even point where the matrix-matrix kernel becomes more efficient compared
|
||||||
// to the matrix-vector kernel
|
// to the matrix-vector kernel
|
||||||
int ne11_mm_min = n_as;
|
int ne11_mm_min = n_as;
|
||||||
|
@ -1723,7 +1717,10 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||||
const int idx = ((int32_t *) dst->op_params)[0];
|
const int idx = ((int32_t *) dst->op_params)[0];
|
||||||
|
|
||||||
// batch size
|
// batch size
|
||||||
GGML_ASSERT(ne01 == ne11);
|
GGML_ASSERT(ne21 == ne11); // ?
|
||||||
|
GGML_ASSERT(ne12 == 1 && ne13 == 1); // no broadcasting
|
||||||
|
const uint r2 = 1;
|
||||||
|
const uint r3 = 1;
|
||||||
|
|
||||||
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
|
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
|
||||||
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
|
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
|
||||||
|
@ -1732,7 +1729,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||||
// indirect matrix multiplication
|
// indirect matrix multiplication
|
||||||
// !!!
|
// !!!
|
||||||
if ([ctx->device supportsFamily:MTLGPUFamilyApple7] &&
|
if ([ctx->device supportsFamily:MTLGPUFamilyApple7] &&
|
||||||
ne20 % 32 == 0 && ne20 >= 64 &&
|
ne00 % 32 == 0 && ne00 >= 64 &&
|
||||||
ne11 > ne11_mm_min) {
|
ne11 > ne11_mm_min) {
|
||||||
|
|
||||||
// some Metal matrix data types require aligned pointers
|
// some Metal matrix data types require aligned pointers
|
||||||
|
@ -1745,7 +1742,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||||
|
|
||||||
id<MTLComputePipelineState> pipeline = nil;
|
id<MTLComputePipelineState> pipeline = nil;
|
||||||
|
|
||||||
switch (src2->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F32_F32 ].pipeline; break;
|
case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F32_F32 ].pipeline; break;
|
||||||
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F16_F32 ].pipeline; break;
|
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_F16_F32 ].pipeline; break;
|
||||||
case GGML_TYPE_Q4_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q4_0_F32 ].pipeline; break;
|
case GGML_TYPE_Q4_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MM_ID_Q4_0_F32 ].pipeline; break;
|
||||||
|
@ -1774,36 +1771,27 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||||
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
|
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
|
||||||
[encoder setBuffer:id_dst offset:offs_dst atIndex:2];
|
[encoder setBuffer:id_dst offset:offs_dst atIndex:2];
|
||||||
[encoder setBytes:&nb01 length:sizeof(nb01) atIndex:3];
|
[encoder setBuffer:id_src2 offset:offs_src2 atIndex:3];
|
||||||
[encoder setBytes:&ne20 length:sizeof(ne20) atIndex:4];
|
[encoder setBytes:&nb21 length:sizeof(nb21) atIndex:4];
|
||||||
[encoder setBytes:&ne22 length:sizeof(ne22) atIndex:5];
|
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:5];
|
||||||
[encoder setBytes:&nb21 length:sizeof(nb21) atIndex:6];
|
[encoder setBytes:&ne02 length:sizeof(ne02) atIndex:6];
|
||||||
[encoder setBytes:&nb22 length:sizeof(nb22) atIndex:7];
|
[encoder setBytes:&nb01 length:sizeof(nb01) atIndex:7];
|
||||||
[encoder setBytes:&ne12 length:sizeof(ne12) atIndex:8];
|
[encoder setBytes:&nb02 length:sizeof(nb02) atIndex:8];
|
||||||
[encoder setBytes:&ne13 length:sizeof(ne13) atIndex:9];
|
[encoder setBytes:&ne12 length:sizeof(ne12) atIndex:9];
|
||||||
[encoder setBytes:&nb10 length:sizeof(nb10) atIndex:10];
|
[encoder setBytes:&ne13 length:sizeof(ne13) atIndex:10];
|
||||||
[encoder setBytes:&nb11 length:sizeof(nb11) atIndex:11];
|
[encoder setBytes:&nb10 length:sizeof(nb10) atIndex:11];
|
||||||
[encoder setBytes:&nb12 length:sizeof(nb12) atIndex:12];
|
[encoder setBytes:&nb11 length:sizeof(nb11) atIndex:12];
|
||||||
[encoder setBytes:&ne0 length:sizeof(ne0) atIndex:13];
|
[encoder setBytes:&nb12 length:sizeof(nb12) atIndex:13];
|
||||||
[encoder setBytes:&ne1 length:sizeof(ne1) atIndex:14];
|
[encoder setBytes:&ne0 length:sizeof(ne0) atIndex:14];
|
||||||
[encoder setBytes:&nb1 length:sizeof(nb1) atIndex:15];
|
[encoder setBytes:&ne1 length:sizeof(ne1) atIndex:15];
|
||||||
[encoder setBytes:&r2 length:sizeof(r2) atIndex:16];
|
[encoder setBytes:&nb1 length:sizeof(nb1) atIndex:16];
|
||||||
[encoder setBytes:&r3 length:sizeof(r3) atIndex:17];
|
[encoder setBytes:&r2 length:sizeof(r2) atIndex:17];
|
||||||
[encoder setBytes:&idx length:sizeof(idx) atIndex:18];
|
[encoder setBytes:&r3 length:sizeof(r3) atIndex:18];
|
||||||
// TODO: how to make this an array? read Metal docs
|
[encoder setBytes:&idx length:sizeof(idx) atIndex:19];
|
||||||
for (int j = 0; j < 8; ++j) {
|
|
||||||
// NOTE: this is done like this to avoid uninitialized kernel arguments when n_as < 8
|
|
||||||
struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
|
|
||||||
|
|
||||||
size_t offs_src_cur = 0;
|
|
||||||
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
|
|
||||||
|
|
||||||
[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
|
|
||||||
}
|
|
||||||
|
|
||||||
[encoder setThreadgroupMemoryLength:GGML_PAD(8192 + 2*ne11, 16) atIndex:0];
|
[encoder setThreadgroupMemoryLength:GGML_PAD(8192 + 2*ne11, 16) atIndex:0];
|
||||||
|
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake((ne11 + 31)/32, (ne21 + 63)/64, n_as*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(128, 1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake((ne11 + 31)/32, (ne01 + 63)/64, n_as*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(128, 1, 1)];
|
||||||
} else {
|
} else {
|
||||||
int nth0 = 32;
|
int nth0 = 32;
|
||||||
int nth1 = 1;
|
int nth1 = 1;
|
||||||
|
@ -1813,7 +1801,7 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||||
id<MTLComputePipelineState> pipeline = nil;
|
id<MTLComputePipelineState> pipeline = nil;
|
||||||
|
|
||||||
// use custom matrix x vector kernel
|
// use custom matrix x vector kernel
|
||||||
switch (src2t) {
|
switch (src0t) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(src1t == GGML_TYPE_F32);
|
GGML_ASSERT(src1t == GGML_TYPE_F32);
|
||||||
|
@ -1947,8 +1935,8 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if (ggml_is_quantized(src2t)) {
|
if (ggml_is_quantized(src0t)) {
|
||||||
GGML_ASSERT(ne20 >= nth0*nth1);
|
GGML_ASSERT(ne00 >= nth0*nth1);
|
||||||
}
|
}
|
||||||
|
|
||||||
const int64_t _ne1 = 1; // kernels needs a reference in constant memory
|
const int64_t _ne1 = 1; // kernels needs a reference in constant memory
|
||||||
|
@ -1957,75 +1945,66 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||||
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
|
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
|
||||||
[encoder setBuffer:id_dst offset:offs_dst atIndex:2];
|
[encoder setBuffer:id_dst offset:offs_dst atIndex:2];
|
||||||
[encoder setBytes:&nb01 length:sizeof(nb01) atIndex:3];
|
[encoder setBuffer:id_src2 offset:offs_src2 atIndex:3];
|
||||||
[encoder setBytes:&ne20 length:sizeof(ne20) atIndex:4];
|
[encoder setBytes:&nb21 length:sizeof(nb21) atIndex:4];
|
||||||
[encoder setBytes:&ne21 length:sizeof(ne21) atIndex:5];
|
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:5];
|
||||||
[encoder setBytes:&ne22 length:sizeof(ne22) atIndex:6];
|
[encoder setBytes:&ne01 length:sizeof(ne01) atIndex:6];
|
||||||
[encoder setBytes:&nb20 length:sizeof(nb20) atIndex:7];
|
[encoder setBytes:&ne02 length:sizeof(ne02) atIndex:7];
|
||||||
[encoder setBytes:&nb21 length:sizeof(nb21) atIndex:8];
|
[encoder setBytes:&nb00 length:sizeof(nb00) atIndex:8];
|
||||||
[encoder setBytes:&nb22 length:sizeof(nb22) atIndex:9];
|
[encoder setBytes:&nb01 length:sizeof(nb01) atIndex:9];
|
||||||
[encoder setBytes:&ne10 length:sizeof(ne10) atIndex:10];
|
[encoder setBytes:&nb02 length:sizeof(nb02) atIndex:10];
|
||||||
[encoder setBytes:&_ne1 length:sizeof(_ne1) atIndex:11];
|
[encoder setBytes:&ne10 length:sizeof(ne10) atIndex:11];
|
||||||
[encoder setBytes:&ne12 length:sizeof(ne12) atIndex:12];
|
[encoder setBytes:&_ne1 length:sizeof(_ne1) atIndex:12];
|
||||||
[encoder setBytes:&ne13 length:sizeof(ne13) atIndex:13];
|
[encoder setBytes:&ne12 length:sizeof(ne12) atIndex:13];
|
||||||
[encoder setBytes:&nb10 length:sizeof(nb10) atIndex:14];
|
[encoder setBytes:&ne13 length:sizeof(ne13) atIndex:14];
|
||||||
[encoder setBytes:&nb11 length:sizeof(nb11) atIndex:15];
|
[encoder setBytes:&nb10 length:sizeof(nb10) atIndex:15];
|
||||||
[encoder setBytes:&nb12 length:sizeof(nb12) atIndex:16];
|
[encoder setBytes:&nb11 length:sizeof(nb11) atIndex:16];
|
||||||
[encoder setBytes:&ne0 length:sizeof(ne0) atIndex:17];
|
[encoder setBytes:&nb12 length:sizeof(nb12) atIndex:17];
|
||||||
[encoder setBytes:&_ne1 length:sizeof(_ne1) atIndex:18];
|
[encoder setBytes:&ne0 length:sizeof(ne0) atIndex:18];
|
||||||
[encoder setBytes:&nb1 length:sizeof(nb1) atIndex:19];
|
[encoder setBytes:&_ne1 length:sizeof(_ne1) atIndex:19];
|
||||||
[encoder setBytes:&r2 length:sizeof(r2) atIndex:20];
|
[encoder setBytes:&nb1 length:sizeof(nb1) atIndex:20];
|
||||||
[encoder setBytes:&r3 length:sizeof(r3) atIndex:21];
|
[encoder setBytes:&r2 length:sizeof(r2) atIndex:21];
|
||||||
[encoder setBytes:&idx length:sizeof(idx) atIndex:22];
|
[encoder setBytes:&r3 length:sizeof(r3) atIndex:22];
|
||||||
// TODO: how to make this an array? read Metal docs
|
[encoder setBytes:&idx length:sizeof(idx) atIndex:23];
|
||||||
for (int j = 0; j < 8; ++j) {
|
|
||||||
// NOTE: this is done like this to avoid uninitialized kernel arguments when n_as < 8
|
|
||||||
struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
|
|
||||||
|
|
||||||
size_t offs_src_cur = 0;
|
if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1 || src0t == GGML_TYPE_Q5_0 ||
|
||||||
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
|
src0t == GGML_TYPE_Q5_1 || src0t == GGML_TYPE_Q8_0 || src0t == GGML_TYPE_Q2_K ||
|
||||||
|
src0t == GGML_TYPE_IQ1_S || src0t == GGML_TYPE_IQ1_M || src0t == GGML_TYPE_IQ2_S) {
|
||||||
[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:23 + j];
|
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 7)/8, _ne1, ne21*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||||
}
|
}
|
||||||
|
else if (src0t == GGML_TYPE_IQ2_XXS || src0t == GGML_TYPE_IQ2_XS) {
|
||||||
if (src2t == GGML_TYPE_Q4_0 || src2t == GGML_TYPE_Q4_1 || src2t == GGML_TYPE_Q5_0 ||
|
const int mem_size = src0t == GGML_TYPE_IQ2_XXS ? 256*8+128 : 512*8+128;
|
||||||
src2t == GGML_TYPE_Q5_1 || src2t == GGML_TYPE_Q8_0 || src2t == GGML_TYPE_Q2_K ||
|
|
||||||
src2t == GGML_TYPE_IQ1_S || src2t == GGML_TYPE_IQ1_M || src2t == GGML_TYPE_IQ2_S) {
|
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake((ne21 + 7)/8, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
|
||||||
}
|
|
||||||
else if (src2t == GGML_TYPE_IQ2_XXS || src2t == GGML_TYPE_IQ2_XS) {
|
|
||||||
const int mem_size = src2t == GGML_TYPE_IQ2_XXS ? 256*8+128 : 512*8+128;
|
|
||||||
[encoder setThreadgroupMemoryLength:mem_size atIndex:0];
|
[encoder setThreadgroupMemoryLength:mem_size atIndex:0];
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake((ne21 + 7)/8, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 7)/8, _ne1, ne21*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||||
}
|
}
|
||||||
else if (src2t == GGML_TYPE_IQ3_XXS || src2t == GGML_TYPE_IQ3_S) {
|
else if (src0t == GGML_TYPE_IQ3_XXS || src0t == GGML_TYPE_IQ3_S) {
|
||||||
const int mem_size = src2t == GGML_TYPE_IQ3_XXS ? 256*4+128 : 512*4;
|
const int mem_size = src0t == GGML_TYPE_IQ3_XXS ? 256*4+128 : 512*4;
|
||||||
[encoder setThreadgroupMemoryLength:mem_size atIndex:0];
|
[encoder setThreadgroupMemoryLength:mem_size atIndex:0];
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake((ne21 + 7)/8, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 7)/8, _ne1, ne21*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||||
}
|
}
|
||||||
else if (src2t == GGML_TYPE_IQ4_NL || src2t == GGML_TYPE_IQ4_XS) {
|
else if (src0t == GGML_TYPE_IQ4_NL || src0t == GGML_TYPE_IQ4_XS) {
|
||||||
const int mem_size = 32*sizeof(float);
|
const int mem_size = 32*sizeof(float);
|
||||||
[encoder setThreadgroupMemoryLength:mem_size atIndex:0];
|
[encoder setThreadgroupMemoryLength:mem_size atIndex:0];
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake((ne21 + 3)/4, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 3)/4, _ne1, ne21*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||||
}
|
}
|
||||||
else if (src2t == GGML_TYPE_Q4_K) {
|
else if (src0t == GGML_TYPE_Q4_K) {
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake((ne21 + 3)/4, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 3)/4, _ne1, ne21*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||||
}
|
}
|
||||||
else if (src2t == GGML_TYPE_Q3_K) {
|
else if (src0t == GGML_TYPE_Q3_K) {
|
||||||
#ifdef GGML_QKK_64
|
#ifdef GGML_QKK_64
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake((ne21 + 1)/2, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 1)/2, _ne1, ne21*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||||
#else
|
#else
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake((ne21 + 3)/4, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 3)/4, _ne1, ne21*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else if (src2t == GGML_TYPE_Q5_K) {
|
else if (src0t == GGML_TYPE_Q5_K) {
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake((ne21 + 3)/4, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 3)/4, _ne1, ne21*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||||
}
|
}
|
||||||
else if (src2t == GGML_TYPE_Q6_K) {
|
else if (src0t == GGML_TYPE_Q6_K) {
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake((ne21 + 1)/2, _ne1, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 1)/2, _ne1, ne21*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||||
} else {
|
} else {
|
||||||
const int64_t ny = (_ne1 + nrows - 1)/nrows;
|
const int64_t ny = (_ne1 + nrows - 1)/nrows;
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake(ne21, ny, ne01*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ny, ne21*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
@ -2432,6 +2411,16 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||||
|
|
||||||
enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0];
|
enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0];
|
||||||
|
|
||||||
|
// bitonic sort requires the number of elements to be power of 2
|
||||||
|
int64_t ne00_padded = 1;
|
||||||
|
while (ne00_padded < ne00) {
|
||||||
|
ne00_padded *= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metal kernels require the buffer size to be multiple of 16 bytes
|
||||||
|
// https://developer.apple.com/documentation/metal/mtlcomputecommandencoder/1443142-setthreadgroupmemorylength
|
||||||
|
const int mem_size = GGML_PAD(ne00_padded*sizeof(int32_t), 16);
|
||||||
|
|
||||||
id<MTLComputePipelineState> pipeline = nil;
|
id<MTLComputePipelineState> pipeline = nil;
|
||||||
|
|
||||||
switch (order) {
|
switch (order) {
|
||||||
|
@ -2441,11 +2430,13 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||||
};
|
};
|
||||||
|
|
||||||
[encoder setComputePipelineState:pipeline];
|
[encoder setComputePipelineState:pipeline];
|
||||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||||
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
||||||
[encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2];
|
[encoder setBytes:&ne00 length:sizeof( int64_t) atIndex:2];
|
||||||
|
[encoder setBytes:&ne00_padded length:sizeof( int64_t) atIndex:3];
|
||||||
|
[encoder setThreadgroupMemoryLength:mem_size atIndex:0];
|
||||||
|
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake(1, nrows, 1) threadsPerThreadgroup:MTLSizeMake(ne00, 1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake(1, nrows, 1) threadsPerThreadgroup:MTLSizeMake(ne00_padded, 1, 1)];
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_LEAKY_RELU:
|
case GGML_OP_LEAKY_RELU:
|
||||||
{
|
{
|
||||||
|
|
430
ggml-metal.metal
430
ggml-metal.metal
File diff suppressed because it is too large
Load diff
|
@ -3482,19 +3482,30 @@ void dequantize_row_iq1_m(const block_iq1_m * restrict x, float * restrict y, in
|
||||||
float delta[4];
|
float delta[4];
|
||||||
uint16_t idx[4];
|
uint16_t idx[4];
|
||||||
|
|
||||||
|
#if QK_K != 64
|
||||||
iq1m_scale_t scale;
|
iq1m_scale_t scale;
|
||||||
|
#endif
|
||||||
|
|
||||||
for (int i = 0; i < nb; i++) {
|
for (int i = 0; i < nb; i++) {
|
||||||
|
|
||||||
const uint16_t * sc = (const uint16_t *)x[i].scales;
|
const uint16_t * sc = (const uint16_t *)x[i].scales;
|
||||||
|
#if QK_K == 64
|
||||||
|
const float d = GGML_FP16_TO_FP32(x[i].d);
|
||||||
|
#else
|
||||||
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
|
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
|
||||||
const float d = GGML_FP16_TO_FP32(scale.f16);
|
const float d = GGML_FP16_TO_FP32(scale.f16);
|
||||||
|
#endif
|
||||||
const uint8_t * qs = x[i].qs;
|
const uint8_t * qs = x[i].qs;
|
||||||
const uint8_t * qh = x[i].qh;
|
const uint8_t * qh = x[i].qh;
|
||||||
|
|
||||||
for (int ib = 0; ib < QK_K/32; ++ib) {
|
for (int ib = 0; ib < QK_K/32; ++ib) {
|
||||||
|
#if QK_K == 64
|
||||||
|
const float dl1 = d * (2*((sc[ib/2] >> (8*(ib%2)+0)) & 0xf) + 1);
|
||||||
|
const float dl2 = d * (2*((sc[ib/2] >> (8*(ib%2)+4)) & 0xf) + 1);
|
||||||
|
#else
|
||||||
const float dl1 = d * (2*((sc[ib/2] >> (6*(ib%2)+0)) & 0x7) + 1);
|
const float dl1 = d * (2*((sc[ib/2] >> (6*(ib%2)+0)) & 0x7) + 1);
|
||||||
const float dl2 = d * (2*((sc[ib/2] >> (6*(ib%2)+3)) & 0x7) + 1);
|
const float dl2 = d * (2*((sc[ib/2] >> (6*(ib%2)+3)) & 0x7) + 1);
|
||||||
|
#endif
|
||||||
idx[0] = qs[0] | ((qh[0] << 8) & 0x700);
|
idx[0] = qs[0] | ((qh[0] << 8) & 0x700);
|
||||||
idx[1] = qs[1] | ((qh[0] << 4) & 0x700);
|
idx[1] = qs[1] | ((qh[0] << 4) & 0x700);
|
||||||
idx[2] = qs[2] | ((qh[1] << 8) & 0x700);
|
idx[2] = qs[2] | ((qh[1] << 8) & 0x700);
|
||||||
|
@ -9757,11 +9768,17 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * restrict s, size_t bs, const void
|
||||||
|
|
||||||
const int nb = n / QK_K;
|
const int nb = n / QK_K;
|
||||||
|
|
||||||
|
#if QK_K != 64
|
||||||
iq1m_scale_t scale;
|
iq1m_scale_t scale;
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined __ARM_NEON
|
#if defined __ARM_NEON
|
||||||
|
|
||||||
|
#if QK_K == 64
|
||||||
|
const int32x4_t mask = vdupq_n_s32(0xf);
|
||||||
|
#else
|
||||||
const int32x4_t mask = vdupq_n_s32(0x7);
|
const int32x4_t mask = vdupq_n_s32(0x7);
|
||||||
|
#endif
|
||||||
const int32x4_t mone = vdupq_n_s32(1);
|
const int32x4_t mone = vdupq_n_s32(1);
|
||||||
const int32x4_t mzero = vdupq_n_s32(0);
|
const int32x4_t mzero = vdupq_n_s32(0);
|
||||||
|
|
||||||
|
@ -9785,7 +9802,9 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * restrict s, size_t bs, const void
|
||||||
const uint8_t * qh = x[i].qh;
|
const uint8_t * qh = x[i].qh;
|
||||||
const uint16_t * sc = (const uint16_t *)x[i].scales;
|
const uint16_t * sc = (const uint16_t *)x[i].scales;
|
||||||
|
|
||||||
|
#if QK_K != 64
|
||||||
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
|
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
|
||||||
|
#endif
|
||||||
|
|
||||||
int32x4_t sumi1 = mzero;
|
int32x4_t sumi1 = mzero;
|
||||||
int32x4_t sumi2 = mzero;
|
int32x4_t sumi2 = mzero;
|
||||||
|
@ -9814,7 +9833,11 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * restrict s, size_t bs, const void
|
||||||
const int32x4_t p4 = vpaddq_s32(ggml_vdotq_s32(mzero, deltas.val[aux8[2]], q8b.val[2]), ggml_vdotq_s32(mzero, deltas.val[aux8[3]], q8b.val[3]));
|
const int32x4_t p4 = vpaddq_s32(ggml_vdotq_s32(mzero, deltas.val[aux8[2]], q8b.val[2]), ggml_vdotq_s32(mzero, deltas.val[aux8[3]], q8b.val[3]));
|
||||||
const int32x4_t p34 = vpaddq_s32(p3, p4);
|
const int32x4_t p34 = vpaddq_s32(p3, p4);
|
||||||
|
|
||||||
|
#if QK_K == 64
|
||||||
|
int32x4_t scales_4 = ggml_vld1q_u32(sc[0] >> 0, sc[0] >> 4, sc[0] >> 8, sc[0] >> 12);
|
||||||
|
#else
|
||||||
int32x4_t scales_4 = ggml_vld1q_u32(sc[ib/2] >> 0, sc[ib/2] >> 3, sc[ib/2] >> 6, sc[ib/2] >> 9);
|
int32x4_t scales_4 = ggml_vld1q_u32(sc[ib/2] >> 0, sc[ib/2] >> 3, sc[ib/2] >> 6, sc[ib/2] >> 9);
|
||||||
|
#endif
|
||||||
scales_4 = vaddq_s32(vshlq_n_s32(vandq_s32(scales_4, mask), 1), mone);
|
scales_4 = vaddq_s32(vshlq_n_s32(vandq_s32(scales_4, mask), 1), mone);
|
||||||
|
|
||||||
sumi1 = vmlaq_s32(sumi1, scales_4, p12);
|
sumi1 = vmlaq_s32(sumi1, scales_4, p12);
|
||||||
|
@ -9824,14 +9847,22 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * restrict s, size_t bs, const void
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if QK_K == 64
|
||||||
|
sumf += y[i].d * GGML_FP16_TO_FP32(x[i].d) * (vaddvq_s32(sumi1) + IQ1M_DELTA * vaddvq_s32(sumi2));
|
||||||
|
#else
|
||||||
sumf += y[i].d * GGML_FP16_TO_FP32(scale.f16) * (vaddvq_s32(sumi1) + IQ1M_DELTA * vaddvq_s32(sumi2));
|
sumf += y[i].d * GGML_FP16_TO_FP32(scale.f16) * (vaddvq_s32(sumi1) + IQ1M_DELTA * vaddvq_s32(sumi2));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
*s = sumf;
|
*s = sumf;
|
||||||
|
|
||||||
#elif defined __AVX2__
|
#elif defined __AVX2__
|
||||||
|
|
||||||
|
#if QK_K == 64
|
||||||
|
const __m256i mask = _mm256_set1_epi16(0xf);
|
||||||
|
#else
|
||||||
const __m256i mask = _mm256_set1_epi16(0x7);
|
const __m256i mask = _mm256_set1_epi16(0x7);
|
||||||
|
#endif
|
||||||
const __m256i mone = _mm256_set1_epi16(1);
|
const __m256i mone = _mm256_set1_epi16(1);
|
||||||
|
|
||||||
__m256 accum1 = _mm256_setzero_ps();
|
__m256 accum1 = _mm256_setzero_ps();
|
||||||
|
@ -9843,7 +9874,9 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * restrict s, size_t bs, const void
|
||||||
const uint8_t * qh = x[i].qh;
|
const uint8_t * qh = x[i].qh;
|
||||||
const uint16_t * sc = (const uint16_t *)x[i].scales;
|
const uint16_t * sc = (const uint16_t *)x[i].scales;
|
||||||
|
|
||||||
|
#if QK_K != 64
|
||||||
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
|
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
|
||||||
|
#endif
|
||||||
|
|
||||||
__m256i sumi1 = _mm256_setzero_si256();
|
__m256i sumi1 = _mm256_setzero_si256();
|
||||||
__m256i sumi2 = _mm256_setzero_si256();
|
__m256i sumi2 = _mm256_setzero_si256();
|
||||||
|
@ -9873,8 +9906,13 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * restrict s, size_t bs, const void
|
||||||
|
|
||||||
const __m256i dot3 = mul_add_epi8(delta1, q8b_1);
|
const __m256i dot3 = mul_add_epi8(delta1, q8b_1);
|
||||||
const __m256i dot4 = mul_add_epi8(delta2, q8b_2);
|
const __m256i dot4 = mul_add_epi8(delta2, q8b_2);
|
||||||
|
#if QK_K == 64
|
||||||
|
__m256i scale1 = MM256_SET_M128I(_mm_set1_epi16(sc[0] >> 4), _mm_set1_epi16(sc[0] >> 0));
|
||||||
|
__m256i scale2 = MM256_SET_M128I(_mm_set1_epi16(sc[0] >> 12), _mm_set1_epi16(sc[0] >> 8));
|
||||||
|
#else
|
||||||
__m256i scale1 = MM256_SET_M128I(_mm_set1_epi16(sc[ib/2] >> 3), _mm_set1_epi16(sc[ib/2] >> 0));
|
__m256i scale1 = MM256_SET_M128I(_mm_set1_epi16(sc[ib/2] >> 3), _mm_set1_epi16(sc[ib/2] >> 0));
|
||||||
__m256i scale2 = MM256_SET_M128I(_mm_set1_epi16(sc[ib/2] >> 9), _mm_set1_epi16(sc[ib/2] >> 6));
|
__m256i scale2 = MM256_SET_M128I(_mm_set1_epi16(sc[ib/2] >> 9), _mm_set1_epi16(sc[ib/2] >> 6));
|
||||||
|
#endif
|
||||||
scale1 = _mm256_add_epi16(_mm256_slli_epi16(_mm256_and_si256(scale1, mask), 1), mone);
|
scale1 = _mm256_add_epi16(_mm256_slli_epi16(_mm256_and_si256(scale1, mask), 1), mone);
|
||||||
scale2 = _mm256_add_epi16(_mm256_slli_epi16(_mm256_and_si256(scale2, mask), 1), mone);
|
scale2 = _mm256_add_epi16(_mm256_slli_epi16(_mm256_and_si256(scale2, mask), 1), mone);
|
||||||
const __m256i p1 = _mm256_madd_epi16(dot1, scale1);
|
const __m256i p1 = _mm256_madd_epi16(dot1, scale1);
|
||||||
|
@ -9888,7 +9926,11 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * restrict s, size_t bs, const void
|
||||||
qs += 8; qh += 4;
|
qs += 8; qh += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if QK_K == 64
|
||||||
|
const __m256 d = _mm256_set1_ps(y[i].d * GGML_FP16_TO_FP32(x[i].d));
|
||||||
|
#else
|
||||||
const __m256 d = _mm256_set1_ps(y[i].d * GGML_FP16_TO_FP32(scale.f16));
|
const __m256 d = _mm256_set1_ps(y[i].d * GGML_FP16_TO_FP32(scale.f16));
|
||||||
|
#endif
|
||||||
accum1 = _mm256_fmadd_ps(d, _mm256_cvtepi32_ps(sumi1), accum1);
|
accum1 = _mm256_fmadd_ps(d, _mm256_cvtepi32_ps(sumi1), accum1);
|
||||||
accum2 = _mm256_fmadd_ps(d, _mm256_cvtepi32_ps(sumi2), accum2);
|
accum2 = _mm256_fmadd_ps(d, _mm256_cvtepi32_ps(sumi2), accum2);
|
||||||
|
|
||||||
|
@ -9908,7 +9950,9 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * restrict s, size_t bs, const void
|
||||||
const uint8_t * qh = x[i].qh;
|
const uint8_t * qh = x[i].qh;
|
||||||
const uint16_t * sc = (const uint16_t *)x[i].scales;
|
const uint16_t * sc = (const uint16_t *)x[i].scales;
|
||||||
|
|
||||||
|
#if QK_K != 64
|
||||||
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
|
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
|
||||||
|
#endif
|
||||||
|
|
||||||
int sumi1 = 0, sumi2 = 0;
|
int sumi1 = 0, sumi2 = 0;
|
||||||
for (int ib = 0; ib < QK_K/32; ++ib) {
|
for (int ib = 0; ib < QK_K/32; ++ib) {
|
||||||
|
@ -9928,15 +9972,24 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * restrict s, size_t bs, const void
|
||||||
sum1[l/2] += lsum1;
|
sum1[l/2] += lsum1;
|
||||||
sum2[l/2] += lsum2*delta[l];
|
sum2[l/2] += lsum2*delta[l];
|
||||||
}
|
}
|
||||||
|
#if QK_K == 64
|
||||||
|
const int ls1 = 2*((sc[0] >> (8*(ib%2)+0)) & 0xf) + 1;
|
||||||
|
const int ls2 = 2*((sc[0] >> (8*(ib%2)+4)) & 0xf) + 1;
|
||||||
|
#else
|
||||||
const int ls1 = 2*((sc[ib/2] >> (6*(ib%2)+0)) & 0x7) + 1;
|
const int ls1 = 2*((sc[ib/2] >> (6*(ib%2)+0)) & 0x7) + 1;
|
||||||
const int ls2 = 2*((sc[ib/2] >> (6*(ib%2)+3)) & 0x7) + 1;
|
const int ls2 = 2*((sc[ib/2] >> (6*(ib%2)+3)) & 0x7) + 1;
|
||||||
|
#endif
|
||||||
sumi1 += sum1[0] * ls1 + sum1[1] * ls2;
|
sumi1 += sum1[0] * ls1 + sum1[1] * ls2;
|
||||||
sumi2 += sum2[0] * ls1 + sum2[1] * ls2;
|
sumi2 += sum2[0] * ls1 + sum2[1] * ls2;
|
||||||
qs += 4;
|
qs += 4;
|
||||||
qh += 2;
|
qh += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if QK_K == 64
|
||||||
|
sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * (sumi1 + IQ1M_DELTA * sumi2);
|
||||||
|
#else
|
||||||
sumf += GGML_FP16_TO_FP32(scale.f16) * y[i].d * (sumi1 + IQ1M_DELTA * sumi2);
|
sumf += GGML_FP16_TO_FP32(scale.f16) * y[i].d * (sumi1 + IQ1M_DELTA * sumi2);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
*s = sumf;
|
*s = sumf;
|
||||||
|
@ -11987,7 +12040,9 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
|
||||||
|
|
||||||
for (int ibl = 0; ibl < nbl; ++ibl) {
|
for (int ibl = 0; ibl < nbl; ++ibl) {
|
||||||
|
|
||||||
//y[ibl].d = GGML_FP32_TO_FP16(0.f);
|
#if QK_K == 64
|
||||||
|
y[ibl].d = GGML_FP32_TO_FP16(0.f);
|
||||||
|
#endif
|
||||||
memset(y[ibl].qs, 0, QK_K/8);
|
memset(y[ibl].qs, 0, QK_K/8);
|
||||||
memset(y[ibl].qh, 0, QK_K/16);
|
memset(y[ibl].qh, 0, QK_K/16);
|
||||||
memset(y[ibl].scales, 0, QK_K/32);
|
memset(y[ibl].scales, 0, QK_K/32);
|
||||||
|
@ -12162,13 +12217,22 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16_t * sc = (uint16_t *)y[ibl].scales;
|
uint16_t * sc = (uint16_t *)y[ibl].scales;
|
||||||
|
#if QK_K == 64
|
||||||
|
float d = max_scale/31;
|
||||||
|
#else
|
||||||
float d = max_scale/15;
|
float d = max_scale/15;
|
||||||
|
#endif
|
||||||
float id = 1/d;
|
float id = 1/d;
|
||||||
float sumqx_f = 0, sumq2_f = 0;
|
float sumqx_f = 0, sumq2_f = 0;
|
||||||
for (int ib = 0; ib < QK_K/block_size; ++ib) {
|
for (int ib = 0; ib < QK_K/block_size; ++ib) {
|
||||||
int l = nearest_int(0.5f*(id*scales[ib+0]-1));
|
int l = nearest_int(0.5f*(id*scales[ib+0]-1));
|
||||||
|
#if QK_K == 64
|
||||||
|
l = MAX(0, MIN(15, l));
|
||||||
|
sc[ib/4] |= (l << 4*(ib%4));
|
||||||
|
#else
|
||||||
l = MAX(0, MIN(7, l));
|
l = MAX(0, MIN(7, l));
|
||||||
sc[ib/4] |= (l << 3*(ib%4));
|
sc[ib/4] |= (l << 3*(ib%4));
|
||||||
|
#endif
|
||||||
y[ibl].qh[ib] |= masks[shifts[ib]];
|
y[ibl].qh[ib] |= masks[shifts[ib]];
|
||||||
const float * xb = xbl + block_size*ib;
|
const float * xb = xbl + block_size*ib;
|
||||||
if (quant_weights) {
|
if (quant_weights) {
|
||||||
|
@ -12191,10 +12255,14 @@ static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy
|
||||||
}
|
}
|
||||||
if (sumq2_f > 0) d = sumqx_f/sumq2_f;
|
if (sumq2_f > 0) d = sumqx_f/sumq2_f;
|
||||||
s.f16 = GGML_FP32_TO_FP16(d*1.1125f); // 1.1125f is another fudge factor. Don't ask me why it is needed.
|
s.f16 = GGML_FP32_TO_FP16(d*1.1125f); // 1.1125f is another fudge factor. Don't ask me why it is needed.
|
||||||
|
#if QK_K == 64
|
||||||
|
y[ibl].d = s.f16;
|
||||||
|
#else
|
||||||
sc[0] |= ((s.u16 & 0x000f) << 12);
|
sc[0] |= ((s.u16 & 0x000f) << 12);
|
||||||
sc[1] |= ((s.u16 & 0x00f0) << 8);
|
sc[1] |= ((s.u16 & 0x00f0) << 8);
|
||||||
sc[2] |= ((s.u16 & 0x0f00) << 4);
|
sc[2] |= ((s.u16 & 0x0f00) << 4);
|
||||||
sc[3] |= ((s.u16 & 0xf000) << 0);
|
sc[3] |= ((s.u16 & 0xf000) << 0);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
1275
ggml-sycl.cpp
1275
ggml-sycl.cpp
File diff suppressed because it is too large
Load diff
27814
ggml-vulkan-shaders.hpp
27814
ggml-vulkan-shaders.hpp
File diff suppressed because it is too large
Load diff
633
ggml-vulkan.cpp
633
ggml-vulkan.cpp
File diff suppressed because it is too large
Load diff
|
@ -11,17 +11,6 @@ extern "C" {
|
||||||
#define GGML_VK_MAX_DEVICES 16
|
#define GGML_VK_MAX_DEVICES 16
|
||||||
|
|
||||||
GGML_API void ggml_vk_instance_init(void);
|
GGML_API void ggml_vk_instance_init(void);
|
||||||
GGML_API void ggml_vk_init_cpu_assist(void);
|
|
||||||
|
|
||||||
GGML_API void ggml_vk_preallocate_buffers_graph_cpu_assist(struct ggml_tensor * node);
|
|
||||||
GGML_API void ggml_vk_preallocate_buffers_cpu_assist(void);
|
|
||||||
GGML_API void ggml_vk_build_graph_cpu_assist(struct ggml_tensor * node, bool last_node);
|
|
||||||
GGML_API bool ggml_vk_compute_forward_cpu_assist(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
|
||||||
#ifdef GGML_VULKAN_CHECK_RESULTS
|
|
||||||
void ggml_vk_check_results_1_cpu_assist(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
|
||||||
#endif
|
|
||||||
GGML_API void ggml_vk_graph_cleanup_cpu_assist(void);
|
|
||||||
GGML_API void ggml_vk_free_cpu_assist(void);
|
|
||||||
|
|
||||||
// backend API
|
// backend API
|
||||||
GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(size_t dev_num);
|
GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(size_t dev_num);
|
||||||
|
|
94
ggml.c
94
ggml.c
|
@ -278,8 +278,6 @@ inline static void * ggml_calloc(size_t num, size_t size) {
|
||||||
#include <Accelerate/Accelerate.h>
|
#include <Accelerate/Accelerate.h>
|
||||||
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
|
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
|
||||||
#include "ggml-opencl.h"
|
#include "ggml-opencl.h"
|
||||||
#elif defined(GGML_USE_VULKAN)
|
|
||||||
#include "ggml-vulkan.h"
|
|
||||||
#endif
|
#endif
|
||||||
#elif defined(GGML_USE_OPENBLAS)
|
#elif defined(GGML_USE_OPENBLAS)
|
||||||
#if defined(GGML_BLAS_USE_MKL)
|
#if defined(GGML_BLAS_USE_MKL)
|
||||||
|
@ -289,8 +287,6 @@ inline static void * ggml_calloc(size_t num, size_t size) {
|
||||||
#endif
|
#endif
|
||||||
#elif defined(GGML_USE_CLBLAST)
|
#elif defined(GGML_USE_CLBLAST)
|
||||||
#include "ggml-opencl.h"
|
#include "ggml-opencl.h"
|
||||||
#elif defined(GGML_USE_VULKAN)
|
|
||||||
#include "ggml-vulkan.h"
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// floating point type used to accumulate sums
|
// floating point type used to accumulate sums
|
||||||
|
@ -2718,8 +2714,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
||||||
|
|
||||||
#if defined(GGML_USE_CLBLAST)
|
#if defined(GGML_USE_CLBLAST)
|
||||||
ggml_cl_init();
|
ggml_cl_init();
|
||||||
#elif defined(GGML_USE_VULKAN)
|
|
||||||
ggml_vk_init_cpu_assist();
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ggml_setup_op_has_task_pass();
|
ggml_setup_op_has_task_pass();
|
||||||
|
@ -2939,7 +2933,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
||||||
data_size *= ne[i];
|
data_size *= ne[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_ASSERT(view_src == NULL || data_size + view_offs <= ggml_nbytes(view_src));
|
GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src));
|
||||||
|
|
||||||
void * data = view_src != NULL ? view_src->data : NULL;
|
void * data = view_src != NULL ? view_src->data : NULL;
|
||||||
if (data != NULL) {
|
if (data != NULL) {
|
||||||
|
@ -4580,45 +4574,38 @@ void ggml_mul_mat_set_prec(
|
||||||
|
|
||||||
// ggml_mul_mat_id
|
// ggml_mul_mat_id
|
||||||
|
|
||||||
|
// NOTE: id will be removed in the future and instead all the experts listed in ids will be computed
|
||||||
|
// this will allow computing all the used experts in a single matrix multiplication
|
||||||
struct ggml_tensor * ggml_mul_mat_id(
|
struct ggml_tensor * ggml_mul_mat_id(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * const as[],
|
struct ggml_tensor * as,
|
||||||
int n_as,
|
|
||||||
struct ggml_tensor * ids,
|
struct ggml_tensor * ids,
|
||||||
int id,
|
int id,
|
||||||
struct ggml_tensor * b) {
|
struct ggml_tensor * b) {
|
||||||
|
|
||||||
GGML_ASSERT(ids->type == GGML_TYPE_I32);
|
GGML_ASSERT(ids->type == GGML_TYPE_I32);
|
||||||
GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1);
|
GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
|
||||||
GGML_ASSERT(ids->ne[1] == b->ne[1]);
|
GGML_ASSERT(ids->ne[1] == b->ne[1]); // must have an expert per b row
|
||||||
GGML_ASSERT(ids->ne[2] == b->ne[2] && ids->ne[3] == b->ne[3]);
|
GGML_ASSERT(ids->ne[2] == b->ne[2] && ids->ne[3] == b->ne[3]);
|
||||||
GGML_ASSERT(n_as > 0 && n_as <= GGML_MAX_SRC - 2);
|
GGML_ASSERT(id >= 0 && id < ids->ne[0]); // valid id
|
||||||
GGML_ASSERT(id >= 0 && id < ids->ne[0]);
|
GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
|
||||||
|
|
||||||
bool is_node = false;
|
bool is_node = false;
|
||||||
|
|
||||||
if (as[0]->grad || b->grad) {
|
if (as->grad || b->grad) {
|
||||||
is_node = true;
|
is_node = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int64_t ne[4] = { as[0]->ne[1], b->ne[1], b->ne[2], b->ne[3] };
|
const int64_t ne[4] = { as->ne[1], b->ne[1], b->ne[2], b->ne[3] };
|
||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||||
|
|
||||||
ggml_set_op_params_i32(result, 0, id);
|
ggml_set_op_params_i32(result, 0, id);
|
||||||
ggml_set_op_params_i32(result, 1, n_as);
|
|
||||||
|
|
||||||
result->op = GGML_OP_MUL_MAT_ID;
|
result->op = GGML_OP_MUL_MAT_ID;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
result->src[0] = ids;
|
result->src[0] = as;
|
||||||
result->src[1] = b;
|
result->src[1] = b;
|
||||||
|
result->src[2] = ids;
|
||||||
for (int i = 0; i < n_as; i++) {
|
|
||||||
struct ggml_tensor * a = as[i];
|
|
||||||
GGML_ASSERT(ggml_are_same_shape(as[0], a));
|
|
||||||
GGML_ASSERT(ggml_can_mul_mat(a, b));
|
|
||||||
GGML_ASSERT(!ggml_is_transposed(a));
|
|
||||||
result->src[i + 2] = a;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -10955,10 +10942,9 @@ static void ggml_compute_forward_mul_mat_id(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
struct ggml_tensor * dst) {
|
struct ggml_tensor * dst) {
|
||||||
|
|
||||||
const struct ggml_tensor * ids = dst->src[0];
|
const struct ggml_tensor * src0 = dst->src[0];
|
||||||
const struct ggml_tensor * src1 = dst->src[1];
|
const struct ggml_tensor * src1 = dst->src[1];
|
||||||
|
const struct ggml_tensor * ids = dst->src[2];
|
||||||
const struct ggml_tensor * src0 = dst->src[2]; // only for GGML_TENSOR_BINARY_OP_LOCALS
|
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS
|
GGML_TENSOR_BINARY_OP_LOCALS
|
||||||
|
|
||||||
|
@ -10988,13 +10974,13 @@ static void ggml_compute_forward_mul_mat_id(
|
||||||
GGML_ASSERT(nb1 <= nb2);
|
GGML_ASSERT(nb1 <= nb2);
|
||||||
GGML_ASSERT(nb2 <= nb3);
|
GGML_ASSERT(nb2 <= nb3);
|
||||||
|
|
||||||
// broadcast factors
|
// broadcast is not supported with mmid
|
||||||
const int64_t r2 = ne12/ne02;
|
assert(ne12 == 1);
|
||||||
const int64_t r3 = ne13/ne03;
|
assert(ne13 == 1);
|
||||||
|
|
||||||
// row groups
|
// row groups
|
||||||
const int id = ggml_get_op_params_i32(dst, 0);
|
const int id = ggml_get_op_params_i32(dst, 0);
|
||||||
const int n_as = ggml_get_op_params_i32(dst, 1);
|
const int n_as = src0->ne[2];
|
||||||
|
|
||||||
char * wdata_src1_end = (src1->type == vec_dot_type) ?
|
char * wdata_src1_end = (src1->type == vec_dot_type) ?
|
||||||
(char *) params->wdata :
|
(char *) params->wdata :
|
||||||
|
@ -11054,7 +11040,7 @@ static void ggml_compute_forward_mul_mat_id(
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const struct ggml_tensor * src0_cur = dst->src[cur_a + 2];
|
size_t src0_offset = cur_a*src0->nb[2];
|
||||||
|
|
||||||
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
|
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
|
||||||
const size_t row_size = ggml_row_size(vec_dot_type, ne10);
|
const size_t row_size = ggml_row_size(vec_dot_type, ne10);
|
||||||
|
@ -11089,9 +11075,6 @@ static void ggml_compute_forward_mul_mat_id(
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(ne12 % ne02 == 0);
|
|
||||||
assert(ne13 % ne03 == 0);
|
|
||||||
|
|
||||||
// block-tiling attempt
|
// block-tiling attempt
|
||||||
const int64_t blck_0 = 16;
|
const int64_t blck_0 = 16;
|
||||||
const int64_t blck_1 = 16;
|
const int64_t blck_1 = 16;
|
||||||
|
@ -11108,14 +11091,14 @@ static void ggml_compute_forward_mul_mat_id(
|
||||||
const int64_t i11 = MMID_MATRIX_ROW(cur_a, _i11);
|
const int64_t i11 = MMID_MATRIX_ROW(cur_a, _i11);
|
||||||
|
|
||||||
// broadcast src0 into src1
|
// broadcast src0 into src1
|
||||||
const int64_t i03 = i13/r3;
|
//const int64_t i03 = i13/r3;
|
||||||
const int64_t i02 = i12/r2;
|
//const int64_t i02 = i12/r2;
|
||||||
|
|
||||||
const int64_t i1 = i11;
|
const int64_t i1 = i11;
|
||||||
const int64_t i2 = i12;
|
const int64_t i2 = i12;
|
||||||
const int64_t i3 = i13;
|
const int64_t i3 = i13;
|
||||||
|
|
||||||
const char * src0_row = (const char *) src0_cur->data + (0 + i02*nb02 + i03*nb03);
|
const char * src0_row = (const char *) src0->data + src0_offset;
|
||||||
|
|
||||||
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
|
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
|
||||||
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
|
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
|
||||||
|
@ -16129,20 +16112,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(GGML_USE_VULKAN)
|
|
||||||
const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor);
|
|
||||||
#ifdef GGML_VULKAN_CHECK_RESULTS
|
|
||||||
if (skip_cpu) {
|
|
||||||
ggml_vk_check_results_1_cpu_assist(params, tensor);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if (skip_cpu) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
|
|
||||||
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
|
||||||
#endif // GGML_USE_VULKAN
|
|
||||||
|
|
||||||
switch (tensor->op) {
|
switch (tensor->op) {
|
||||||
case GGML_OP_DUP:
|
case GGML_OP_DUP:
|
||||||
{
|
{
|
||||||
|
@ -18485,13 +18454,13 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
||||||
case GGML_OP_MUL_MAT_ID:
|
case GGML_OP_MUL_MAT_ID:
|
||||||
{
|
{
|
||||||
cur = 0;
|
cur = 0;
|
||||||
const struct ggml_tensor * src0 = node->src[2];
|
const struct ggml_tensor * src0 = node->src[0];
|
||||||
const struct ggml_tensor * src1 = node->src[1];
|
const struct ggml_tensor * src1 = node->src[1];
|
||||||
const enum ggml_type vec_dot_type = type_traits[src0->type].vec_dot_type;
|
const enum ggml_type vec_dot_type = type_traits[src0->type].vec_dot_type;
|
||||||
if (src1->type != vec_dot_type) {
|
if (src1->type != vec_dot_type) {
|
||||||
cur += ggml_row_size(vec_dot_type, ggml_nelements(src1));
|
cur += ggml_row_size(vec_dot_type, ggml_nelements(src1));
|
||||||
}
|
}
|
||||||
const int n_as = ggml_get_op_params_i32(node, 1);
|
const int n_as = src0->ne[2];
|
||||||
cur += GGML_PAD(cur, sizeof(int64_t)); // align
|
cur += GGML_PAD(cur, sizeof(int64_t)); // align
|
||||||
cur += n_as * sizeof(int64_t); // matrix_row_counts
|
cur += n_as * sizeof(int64_t); // matrix_row_counts
|
||||||
cur += n_as * src1->ne[1] * sizeof(int64_t); // matrix_rows
|
cur += n_as * src1->ne[1] * sizeof(int64_t); // matrix_rows
|
||||||
|
@ -18618,17 +18587,6 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GGML_USE_VULKAN
|
|
||||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
|
||||||
ggml_vk_preallocate_buffers_graph_cpu_assist(cgraph->nodes[i]);
|
|
||||||
}
|
|
||||||
ggml_vk_preallocate_buffers_cpu_assist();
|
|
||||||
|
|
||||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
|
||||||
ggml_vk_build_graph_cpu_assist(cgraph->nodes[i], i == cgraph->n_nodes - 1);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const int n_threads = cplan->n_threads;
|
const int n_threads = cplan->n_threads;
|
||||||
|
|
||||||
struct ggml_compute_state_shared state_shared = {
|
struct ggml_compute_state_shared state_shared = {
|
||||||
|
@ -18685,10 +18643,6 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GGML_USE_VULKAN
|
|
||||||
ggml_vk_graph_cleanup_cpu_assist();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// performance stats (graph)
|
// performance stats (graph)
|
||||||
{
|
{
|
||||||
int64_t perf_cycles_cur = ggml_perf_cycles() - perf_start_cycles;
|
int64_t perf_cycles_cur = ggml_perf_cycles() - perf_start_cycles;
|
||||||
|
|
3
ggml.h
3
ggml.h
|
@ -1171,8 +1171,7 @@ extern "C" {
|
||||||
// ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
|
// ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
|
||||||
GGML_API struct ggml_tensor * ggml_mul_mat_id(
|
GGML_API struct ggml_tensor * ggml_mul_mat_id(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * const as[],
|
struct ggml_tensor * as,
|
||||||
int n_as,
|
|
||||||
struct ggml_tensor * ids,
|
struct ggml_tensor * ids,
|
||||||
int id,
|
int id,
|
||||||
struct ggml_tensor * b);
|
struct ggml_tensor * b);
|
||||||
|
|
|
@ -18,6 +18,12 @@ shader_int8_ext = """
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Type-specific defines
|
# Type-specific defines
|
||||||
|
shader_f32_defines = """
|
||||||
|
#define QUANT_K 1
|
||||||
|
#define QUANT_R 1
|
||||||
|
|
||||||
|
#define A_TYPE float
|
||||||
|
"""
|
||||||
shader_f16_defines = """
|
shader_f16_defines = """
|
||||||
#define QUANT_K 1
|
#define QUANT_K 1
|
||||||
#define QUANT_R 1
|
#define QUANT_R 1
|
||||||
|
@ -157,8 +163,8 @@ struct block_q6_K
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Dequant functions
|
# Dequant functions
|
||||||
shader_f16_dequant_func = """
|
shader_float_dequant_func = """
|
||||||
#define DEQUANT_FUNC vec2 v = vec2(data_a[ib + 0], data_a[ib + 1]);
|
#define DEQUANT_FUNC vec2 v = vec2(ib, ib); // data_a[ib], data_a[ib + 1]);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
shader_q4_0_dequant_func = """
|
shader_q4_0_dequant_func = """
|
||||||
|
@ -410,6 +416,133 @@ mulmat_load_q8_0 = """
|
||||||
buf_a[buf_idx ] = FLOAT_TYPE(v.x);
|
buf_a[buf_idx ] = FLOAT_TYPE(v.x);
|
||||||
buf_a[buf_idx + 1] = FLOAT_TYPE(v.y);"""
|
buf_a[buf_idx + 1] = FLOAT_TYPE(v.y);"""
|
||||||
|
|
||||||
|
|
||||||
|
mulmat_load_q2_K = """
|
||||||
|
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
|
||||||
|
const uint buf_idx = (loadc_a + l) * (BK+1) + loadr_a * LOAD_VEC_A;
|
||||||
|
|
||||||
|
const uint ib = idx / 128; // 2 values per idx
|
||||||
|
const uint iqs = idx % 128; // 0..127
|
||||||
|
|
||||||
|
const uint qsi = (iqs / 64) * 32 + (iqs % 16) * 2; // 0,2,4..30
|
||||||
|
const uint scalesi = iqs / 8; // 0..15
|
||||||
|
const uint qsshift = ((iqs % 64) / 16) * 2; // 0,2,4,6
|
||||||
|
|
||||||
|
const uvec2 qs = uvec2(data_a[ib].qs[qsi], data_a[ib].qs[qsi + 1]);
|
||||||
|
const uint scales = data_a[ib].scales[scalesi];
|
||||||
|
const vec2 d = vec2(data_a[ib].d);
|
||||||
|
|
||||||
|
const vec2 v = d.x * float(scales & 0xF) * vec2((qs >> qsshift) & 3) - d.y * float(scales >> 4);
|
||||||
|
|
||||||
|
buf_a[buf_idx ] = FLOAT_TYPE(v.x);
|
||||||
|
buf_a[buf_idx + 1] = FLOAT_TYPE(v.y);"""
|
||||||
|
|
||||||
|
mulmat_load_q3_K = """
|
||||||
|
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
|
||||||
|
const uint buf_idx = (loadc_a + l) * (BK+1) + loadr_a * LOAD_VEC_A;
|
||||||
|
|
||||||
|
const uint ib = idx / 128; // 2 values per idx
|
||||||
|
const uint iqs = idx % 128; // 0..127
|
||||||
|
|
||||||
|
const uint n = iqs / 64; // 0,1
|
||||||
|
const uint qsi = n * 32 + (iqs % 16) * 2; // 0,2,4..62
|
||||||
|
const uint hmi = (iqs % 16) * 2; // 0,2,4..30
|
||||||
|
const uint j = (iqs % 64) / 4; // 0..3
|
||||||
|
const uint is = iqs / 8; // 0..15
|
||||||
|
const uint halfsplit = ((iqs % 64) / 16); // 0,1,2,3
|
||||||
|
const uint qsshift = halfsplit * 2; // 0,2,4,6
|
||||||
|
const uint m = 1 << (4 * n + halfsplit); // 1,2,4,8,16,32,64,128
|
||||||
|
|
||||||
|
const int8_t us = int8_t(is < 4 ? (data_a[ib].scales[is-0] & 0xF) | (((data_a[ib].scales[is+8] >> 0) & 3) << 4) :
|
||||||
|
is < 8 ? (data_a[ib].scales[is-0] & 0xF) | (((data_a[ib].scales[is+4] >> 2) & 3) << 4) :
|
||||||
|
is < 12 ? (data_a[ib].scales[is-8] >> 4) | (((data_a[ib].scales[is+0] >> 4) & 3) << 4) :
|
||||||
|
(data_a[ib].scales[is-8] >> 4) | (((data_a[ib].scales[is-4] >> 6) & 3) << 4));
|
||||||
|
const float dl = float(data_a[ib].d) * float(us - 32);
|
||||||
|
|
||||||
|
buf_a[buf_idx ] = FLOAT_TYPE(dl * float(int8_t((data_a[ib].qs[qsi ] >> qsshift) & 3) - (((data_a[ib].hmask[hmi ] & m) != 0) ? 0 : 4)));
|
||||||
|
buf_a[buf_idx + 1] = FLOAT_TYPE(dl * float(int8_t((data_a[ib].qs[qsi + 1] >> qsshift) & 3) - (((data_a[ib].hmask[hmi + 1] & m) != 0) ? 0 : 4)));"""
|
||||||
|
|
||||||
|
mulmat_load_q4_K = """
|
||||||
|
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
|
||||||
|
const uint buf_idx = (loadc_a + l) * (BK+1) + loadr_a * LOAD_VEC_A;
|
||||||
|
|
||||||
|
const uint ib = idx / 128; // 2 values per idx
|
||||||
|
const uint iqs = idx % 128; // 0..127
|
||||||
|
|
||||||
|
const uint n = iqs / 32; // 0,1,2,3
|
||||||
|
const uint b = (iqs % 32) / 16; // 0,1
|
||||||
|
const uint is = 2 * n + b; // 0..7
|
||||||
|
const uint qsi = n * 32 + (iqs % 16) * 2; // 0,2,4..126
|
||||||
|
|
||||||
|
const vec2 loadd = vec2(data_a[ib].d);
|
||||||
|
|
||||||
|
uint8_t sc;
|
||||||
|
uint8_t mbyte;
|
||||||
|
if (is < 4) {
|
||||||
|
sc = uint8_t(data_a[ib].scales[is ] & 63);
|
||||||
|
mbyte = uint8_t(data_a[ib].scales[is + 4] & 63);
|
||||||
|
} else {
|
||||||
|
sc = uint8_t((data_a[ib].scales[is + 4] & 0xF) | ((data_a[ib].scales[is - 4] >> 6) << 4));
|
||||||
|
mbyte = uint8_t((data_a[ib].scales[is + 4] >> 4) | ((data_a[ib].scales[is ] >> 6) << 4));
|
||||||
|
}
|
||||||
|
const float d = loadd.x * sc;
|
||||||
|
const float m = loadd.y * mbyte;
|
||||||
|
|
||||||
|
buf_a[buf_idx ] = FLOAT_TYPE(d * float((data_a[ib].qs[qsi ] >> (b * 4)) & 0xF) - m);
|
||||||
|
buf_a[buf_idx + 1] = FLOAT_TYPE(d * float((data_a[ib].qs[qsi + 1] >> (b * 4)) & 0xF) - m);"""
|
||||||
|
|
||||||
|
mulmat_load_q5_K = """
|
||||||
|
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
|
||||||
|
const uint buf_idx = (loadc_a + l) * (BK+1) + loadr_a * LOAD_VEC_A;
|
||||||
|
|
||||||
|
const uint ib = idx / 128; // 2 values per idx
|
||||||
|
const uint iqs = idx % 128; // 0..127
|
||||||
|
|
||||||
|
const uint n = iqs / 32; // 0,1,2,3
|
||||||
|
const uint b = (iqs % 32) / 16; // 0,1
|
||||||
|
const uint is = 2 * n + b; // 0..7
|
||||||
|
const uint qsi = n * 32 + (iqs % 16) * 2; // 0,2,4..126
|
||||||
|
const uint qhi = (iqs % 16) * 2; // 0,2,4..30
|
||||||
|
|
||||||
|
const uint8_t hm = uint8_t(1 << (iqs / 16));
|
||||||
|
|
||||||
|
const vec2 loadd = vec2(data_a[ib].d);
|
||||||
|
|
||||||
|
uint8_t sc;
|
||||||
|
uint8_t mbyte;
|
||||||
|
if (is < 4) {
|
||||||
|
sc = uint8_t(data_a[ib].scales[is ] & 63);
|
||||||
|
mbyte = uint8_t(data_a[ib].scales[is + 4] & 63);
|
||||||
|
} else {
|
||||||
|
sc = uint8_t((data_a[ib].scales[is + 4] & 0xF) | ((data_a[ib].scales[is - 4] >> 6) << 4));
|
||||||
|
mbyte = uint8_t((data_a[ib].scales[is + 4] >> 4) | ((data_a[ib].scales[is ] >> 6) << 4));
|
||||||
|
}
|
||||||
|
const float d = loadd.x * sc;
|
||||||
|
const float m = loadd.y * mbyte;
|
||||||
|
|
||||||
|
buf_a[buf_idx ] = FLOAT_TYPE(d * (float((data_a[ib].qs[qsi ] >> (b * 4)) & 0xF) + float((data_a[ib].qh[qhi ] & hm) != 0 ? 16 : 0)) - m);
|
||||||
|
buf_a[buf_idx + 1] = FLOAT_TYPE(d * (float((data_a[ib].qs[qsi + 1] >> (b * 4)) & 0xF) + float((data_a[ib].qh[qhi + 1] & hm) != 0 ? 16 : 0)) - m);"""
|
||||||
|
|
||||||
|
mulmat_load_q6_K = """
|
||||||
|
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
|
||||||
|
const uint buf_idx = (loadc_a + l) * (BK+1) + loadr_a * LOAD_VEC_A;
|
||||||
|
|
||||||
|
const uint ib = idx / 128; // 2 values per idx
|
||||||
|
const uint iqs = idx % 128; // 0..127
|
||||||
|
|
||||||
|
const uint n = iqs / 64; // 0,1
|
||||||
|
const uint b = (iqs % 64) / 32; // 0,1
|
||||||
|
const uint is_b = (iqs % 16) / 8; // 0,1
|
||||||
|
const uint qhshift = ((iqs % 64) / 16) * 2; // 0,2,4,6
|
||||||
|
const uint is = 8 * n + qhshift + is_b; // 0..15
|
||||||
|
const uint qsi = n * 64 + (iqs % 32) * 2; // 0,2,4..126
|
||||||
|
const uint qhi = n * 32 + (iqs % 16) * 2; // 0,2,4..62
|
||||||
|
|
||||||
|
const float dscale = float(data_a[ib].d) * float(data_a[ib].scales[is]);
|
||||||
|
|
||||||
|
buf_a[buf_idx ] = FLOAT_TYPE(dscale * float(int8_t(((data_a[ib].ql[qsi ] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi ] >> qhshift) & 3) << 4)) - 32));
|
||||||
|
buf_a[buf_idx + 1] = FLOAT_TYPE(dscale * float(int8_t(((data_a[ib].ql[qsi + 1] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi + 1] >> qhshift) & 3) << 4)) - 32));"""
|
||||||
|
|
||||||
mulmat_body2 = """
|
mulmat_body2 = """
|
||||||
}
|
}
|
||||||
[[unroll]] for (uint l = 0; l < BN; l += loadstride_b) {
|
[[unroll]] for (uint l = 0; l < BN; l += loadstride_b) {
|
||||||
|
@ -1611,8 +1744,9 @@ layout (push_constant) uniform parameter
|
||||||
uint ne10; uint ne11; uint ne12; uint ne13; uint nb10; uint nb11; uint nb12; uint nb13;
|
uint ne10; uint ne11; uint ne12; uint ne13; uint nb10; uint nb11; uint nb12; uint nb13;
|
||||||
uint d_offset;
|
uint d_offset;
|
||||||
float param1; float param2;
|
float param1; float param2;
|
||||||
} p;
|
} p;"""
|
||||||
|
|
||||||
|
generic_unary_op_funcs = """
|
||||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
|
layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
|
||||||
|
@ -1636,14 +1770,17 @@ uint dst_idx(uint idx) {
|
||||||
const uint i11 = (idx - i13_offset - i12_offset) / p.ne10;
|
const uint i11 = (idx - i13_offset - i12_offset) / p.ne10;
|
||||||
const uint i10 = idx - i13_offset - i12_offset - i11*p.ne10;
|
const uint i10 = idx - i13_offset - i12_offset - i11*p.ne10;
|
||||||
return i13*p.nb13 + i12*p.nb12 + i11*p.nb11 + i10*p.nb10;
|
return i13*p.nb13 + i12*p.nb12 + i11*p.nb11 + i10*p.nb10;
|
||||||
}
|
}"""
|
||||||
|
|
||||||
|
generic_unary_op_main = """
|
||||||
void main() {
|
void main() {
|
||||||
if (gl_GlobalInvocationID.x >= p.ne) {
|
if (gl_GlobalInvocationID.x >= p.ne) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
generic_unary_op_combined = f"{generic_unary_op_head}\n{generic_unary_op_funcs}\n{generic_unary_op_main}"
|
||||||
|
|
||||||
generic_binary_op_head = """#version 450
|
generic_binary_op_head = """#version 450
|
||||||
|
|
||||||
#extension GL_EXT_shader_16bit_storage : require
|
#extension GL_EXT_shader_16bit_storage : require
|
||||||
|
@ -1655,13 +1792,14 @@ layout (push_constant) uniform parameter
|
||||||
uint ne10; uint ne11; uint ne12; uint ne13; uint nb10; uint nb11; uint nb12; uint nb13;
|
uint ne10; uint ne11; uint ne12; uint ne13; uint nb10; uint nb11; uint nb12; uint nb13;
|
||||||
uint ne20; uint ne21; uint ne22; uint ne23; uint nb20; uint nb21; uint nb22; uint nb23;
|
uint ne20; uint ne21; uint ne22; uint ne23; uint nb20; uint nb21; uint nb22; uint nb23;
|
||||||
uint d_offset;
|
uint d_offset;
|
||||||
uint param1; uint param2;
|
float param1; float param2;
|
||||||
} p;
|
} p;"""
|
||||||
|
|
||||||
|
generic_binary_op_funcs = """
|
||||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
|
layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
|
||||||
layout (binding = 1) readonly buffer B {A_TYPE data_b[];};
|
layout (binding = 1) readonly buffer B {B_TYPE data_b[];};
|
||||||
layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
|
layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
|
||||||
|
|
||||||
uint src0_idx(uint idx) {
|
uint src0_idx(uint idx) {
|
||||||
|
@ -1693,14 +1831,17 @@ uint dst_idx(uint idx) {
|
||||||
const uint i21 = (idx - i23_offset - i22_offset) / p.ne20;
|
const uint i21 = (idx - i23_offset - i22_offset) / p.ne20;
|
||||||
const uint i20 = idx - i23_offset - i22_offset - i21*p.ne20;
|
const uint i20 = idx - i23_offset - i22_offset - i21*p.ne20;
|
||||||
return i23*p.nb23 + i22*p.nb22 + i21*p.nb21 + i20*p.nb20;
|
return i23*p.nb23 + i22*p.nb22 + i21*p.nb21 + i20*p.nb20;
|
||||||
}
|
}"""
|
||||||
|
|
||||||
|
generic_binary_op_main = """
|
||||||
void main() {
|
void main() {
|
||||||
if (gl_GlobalInvocationID.x >= p.ne) {
|
if (gl_GlobalInvocationID.x >= p.ne) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
generic_binary_op_combined = f"{generic_binary_op_head}\n{generic_binary_op_funcs}\n{generic_binary_op_main}"
|
||||||
|
|
||||||
# MUL F32
|
# MUL F32
|
||||||
mul_body = """
|
mul_body = """
|
||||||
data_d[p.d_offset + dst_idx(gl_GlobalInvocationID.x)] = D_TYPE(FLOAT_TYPE(data_a[src0_idx(gl_GlobalInvocationID.x)]) * FLOAT_TYPE(data_b[src1_idx(gl_GlobalInvocationID.x)]));
|
data_d[p.d_offset + dst_idx(gl_GlobalInvocationID.x)] = D_TYPE(FLOAT_TYPE(data_a[src0_idx(gl_GlobalInvocationID.x)]) * FLOAT_TYPE(data_b[src1_idx(gl_GlobalInvocationID.x)]));
|
||||||
|
@ -1745,39 +1886,55 @@ cpy_f16_f16_end = """
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# GET_ROWS
|
# GET_ROWS
|
||||||
get_rows_body = """
|
get_rows_float_body = """
|
||||||
#extension GL_EXT_control_flow_attributes : enable
|
|
||||||
#extension GL_EXT_shader_8bit_storage : require
|
|
||||||
|
|
||||||
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
|
||||||
|
|
||||||
layout (binding = 0) readonly buffer X {A_TYPE data_a[];};
|
|
||||||
layout (binding = 1) readonly buffer Y {int data_b[];};
|
|
||||||
layout (binding = 2) writeonly buffer D {D_TYPE dst[];};
|
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
const uint col = int(gl_GlobalInvocationID.x) * 2;
|
const uint i00 = gl_GlobalInvocationID.x;
|
||||||
const uint row = int(gl_GlobalInvocationID.y);
|
const uint i10 = gl_GlobalInvocationID.y;
|
||||||
|
const uint i11 = (gl_GlobalInvocationID.z)/p.ne12;
|
||||||
|
const uint i12 = (gl_GlobalInvocationID.z)%p.ne12;
|
||||||
|
|
||||||
if (col >= p.KY) {
|
if (i00 >= p.ne00) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint r = uint(data_b[row]);
|
const uint i01 = data_b[i10*p.nb10 + i11*p.nb11 + i12*p.nb12];
|
||||||
|
|
||||||
// copy data_a[r*p.KY + col] to dst[row*p.KX + col]
|
const uint a_offset = i01*p.nb01 + i11*p.nb02 + i12*p.nb03;
|
||||||
const uint xi = r*p.KY + col;
|
const uint d_offset = i10*p.nb21 + i11*p.nb22 + i12*p.nb23;
|
||||||
const uint di = row*p.KY + col;
|
|
||||||
|
|
||||||
const uint ib = xi/QUANT_K; // block index
|
#ifndef OPTIMIZATION_ERROR_WORKAROUND
|
||||||
const uint iqs = (xi%QUANT_K)/QUANT_R; // quant index
|
data_d[d_offset + i00] = D_TYPE(data_a[a_offset + i00]);
|
||||||
const uint iybs = di - di%QUANT_K; // y block start index
|
#else
|
||||||
|
data_d[d_offset + i00] = data_a[a_offset + i00];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
get_rows_body = """
|
||||||
|
void main() {
|
||||||
|
const uint i00 = (gl_GlobalInvocationID.x)*2;
|
||||||
|
const uint i10 = gl_GlobalInvocationID.y;
|
||||||
|
const uint i11 = (gl_GlobalInvocationID.z)/p.ne12;
|
||||||
|
const uint i12 = (gl_GlobalInvocationID.z)%p.ne12;
|
||||||
|
|
||||||
|
if (i00 >= p.ne00) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint i01 = data_b[i10*p.nb10 + i11*p.nb11 + i12*p.nb12];
|
||||||
|
|
||||||
|
const uint a_offset = i01*p.nb01 + i11*p.nb02 + i12*p.nb03;
|
||||||
|
const uint d_offset = i10*p.nb21 + i11*p.nb22 + i12*p.nb23;
|
||||||
|
|
||||||
|
const uint ib = a_offset + i00/QUANT_K; // block index
|
||||||
|
const uint iqs = (i00%QUANT_K)/QUANT_R; // quant index
|
||||||
|
const uint iybs = i00 - i00%QUANT_K; // dst block start index
|
||||||
const uint y_offset = QUANT_R == 1 ? 1 : QUANT_K/2;
|
const uint y_offset = QUANT_R == 1 ? 1 : QUANT_K/2;
|
||||||
|
|
||||||
DEQUANT_FUNC
|
DEQUANT_FUNC
|
||||||
|
|
||||||
dst[iybs + iqs + 0] = D_TYPE(v.x);
|
data_d[d_offset + iybs + iqs ] = D_TYPE(v.x);
|
||||||
dst[iybs + iqs + y_offset] = D_TYPE(v.y);
|
data_d[d_offset + iybs + iqs + y_offset] = D_TYPE(v.y);
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -2418,6 +2575,31 @@ async def main():
|
||||||
tasks.append(string_to_spv("matmul_q8_0_f32", "".join(stream), {"LOAD_VEC_A": 2, "A_TYPE": "block_q8_0", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
tasks.append(string_to_spv("matmul_q8_0_f32", "".join(stream), {"LOAD_VEC_A": 2, "A_TYPE": "block_q8_0", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
||||||
tasks.append(string_to_spv("matmul_q8_0_f32_aligned", "".join(stream), {"LOAD_VEC_A": 2, "LOAD_VEC_B": load_vec, "A_TYPE": "block_q8_0", "B_TYPE": vec_type, "D_TYPE": "float"}, fp16))
|
tasks.append(string_to_spv("matmul_q8_0_f32_aligned", "".join(stream), {"LOAD_VEC_A": 2, "LOAD_VEC_B": load_vec, "A_TYPE": "block_q8_0", "B_TYPE": vec_type, "D_TYPE": "float"}, fp16))
|
||||||
|
|
||||||
|
stream.clear()
|
||||||
|
stream.extend((mulmat_head, shader_int8_ext, shader_float_type, shader_q2_K_defines, mulmat_body1, mulmat_load_q2_K, mulmat_body2))
|
||||||
|
tasks.append(string_to_spv("matmul_q2_k_f32", "".join(stream), {"LOAD_VEC_A": 2, "A_TYPE": "block_q2_K", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
||||||
|
tasks.append(string_to_spv("matmul_q2_k_f32_aligned", "".join(stream), {"LOAD_VEC_A": 2, "LOAD_VEC_B": load_vec, "A_TYPE": "block_q2_K", "B_TYPE": vec_type, "D_TYPE": "float"}, fp16))
|
||||||
|
|
||||||
|
stream.clear()
|
||||||
|
stream.extend((mulmat_head, shader_int8_ext, shader_float_type, shader_q3_K_defines, mulmat_body1, mulmat_load_q3_K, mulmat_body2))
|
||||||
|
tasks.append(string_to_spv("matmul_q3_k_f32", "".join(stream), {"LOAD_VEC_A": 2, "A_TYPE": "block_q3_K", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
||||||
|
tasks.append(string_to_spv("matmul_q3_k_f32_aligned", "".join(stream), {"LOAD_VEC_A": 2, "LOAD_VEC_B": load_vec, "A_TYPE": "block_q3_K", "B_TYPE": vec_type, "D_TYPE": "float"}, fp16))
|
||||||
|
|
||||||
|
stream.clear()
|
||||||
|
stream.extend((mulmat_head, shader_int8_ext, shader_float_type, shader_q4_K_defines, mulmat_body1, mulmat_load_q4_K, mulmat_body2))
|
||||||
|
tasks.append(string_to_spv("matmul_q4_k_f32", "".join(stream), {"LOAD_VEC_A": 2, "A_TYPE": "block_q4_K", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
||||||
|
tasks.append(string_to_spv("matmul_q4_k_f32_aligned", "".join(stream), {"LOAD_VEC_A": 2, "LOAD_VEC_B": load_vec, "A_TYPE": "block_q4_K", "B_TYPE": vec_type, "D_TYPE": "float"}, fp16))
|
||||||
|
|
||||||
|
stream.clear()
|
||||||
|
stream.extend((mulmat_head, shader_int8_ext, shader_float_type, shader_q5_K_defines, mulmat_body1, mulmat_load_q5_K, mulmat_body2))
|
||||||
|
tasks.append(string_to_spv("matmul_q5_k_f32", "".join(stream), {"LOAD_VEC_A": 2, "A_TYPE": "block_q5_K", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
||||||
|
tasks.append(string_to_spv("matmul_q5_k_f32_aligned", "".join(stream), {"LOAD_VEC_A": 2, "LOAD_VEC_B": load_vec, "A_TYPE": "block_q5_K", "B_TYPE": vec_type, "D_TYPE": "float"}, fp16))
|
||||||
|
|
||||||
|
stream.clear()
|
||||||
|
stream.extend((mulmat_head, shader_int8_ext, shader_float_type, shader_q6_K_defines, mulmat_body1, mulmat_load_q6_K, mulmat_body2))
|
||||||
|
tasks.append(string_to_spv("matmul_q6_k_f32", "".join(stream), {"LOAD_VEC_A": 2, "A_TYPE": "block_q6_K", "B_TYPE": "float", "D_TYPE": "float"}, fp16))
|
||||||
|
tasks.append(string_to_spv("matmul_q6_k_f32_aligned", "".join(stream), {"LOAD_VEC_A": 2, "LOAD_VEC_B": load_vec, "A_TYPE": "block_q6_K", "B_TYPE": vec_type, "D_TYPE": "float"}, fp16))
|
||||||
|
|
||||||
# Shaders where precision is needed, so no fp16 version
|
# Shaders where precision is needed, so no fp16 version
|
||||||
|
|
||||||
# mul mat vec
|
# mul mat vec
|
||||||
|
@ -2426,7 +2608,7 @@ async def main():
|
||||||
stream.extend((mul_mat_vec_head, shader_int8_ext, shader_f32))
|
stream.extend((mul_mat_vec_head, shader_int8_ext, shader_f32))
|
||||||
|
|
||||||
if i == GGML_TYPE_F16:
|
if i == GGML_TYPE_F16:
|
||||||
stream.extend((shader_f16_defines, shader_f16_dequant_func, mul_mat_vec_body))
|
stream.extend((shader_f16_defines, shader_float_dequant_func, mul_mat_vec_body))
|
||||||
elif i == GGML_TYPE_Q4_0:
|
elif i == GGML_TYPE_Q4_0:
|
||||||
stream.extend((shader_q4_0_defines, shader_q4_0_dequant_func, mul_mat_vec_body))
|
stream.extend((shader_q4_0_defines, shader_q4_0_dequant_func, mul_mat_vec_body))
|
||||||
elif i == GGML_TYPE_Q4_1:
|
elif i == GGML_TYPE_Q4_1:
|
||||||
|
@ -2488,25 +2670,32 @@ async def main():
|
||||||
# get_rows
|
# get_rows
|
||||||
for i in range(0, VK_NUM_TYPES):
|
for i in range(0, VK_NUM_TYPES):
|
||||||
stream.clear()
|
stream.clear()
|
||||||
stream.extend((generic_head, shader_int8_ext, shader_f32))
|
stream.extend((generic_binary_op_head, shader_int8_ext, shader_f32))
|
||||||
|
optimization_workaround = False
|
||||||
|
|
||||||
if i == GGML_TYPE_F16:
|
if i == GGML_TYPE_F32:
|
||||||
stream.extend((shader_f16_defines, shader_f16_dequant_func, get_rows_body))
|
stream.extend((shader_f32_defines, generic_binary_op_funcs, get_rows_float_body))
|
||||||
|
elif i == GGML_TYPE_F16:
|
||||||
|
stream.extend((shader_f16_defines, generic_binary_op_funcs, get_rows_float_body))
|
||||||
|
optimization_workaround = True
|
||||||
elif i == GGML_TYPE_Q4_0:
|
elif i == GGML_TYPE_Q4_0:
|
||||||
stream.extend((shader_q4_0_defines, shader_q4_0_dequant_func, get_rows_body))
|
stream.extend((shader_q4_0_defines, shader_q4_0_dequant_func, generic_binary_op_funcs, get_rows_body))
|
||||||
elif i == GGML_TYPE_Q4_1:
|
elif i == GGML_TYPE_Q4_1:
|
||||||
stream.extend((shader_q4_1_defines, shader_q4_1_dequant_func, get_rows_body))
|
stream.extend((shader_q4_1_defines, shader_q4_1_dequant_func, generic_binary_op_funcs, get_rows_body))
|
||||||
elif i == GGML_TYPE_Q5_0:
|
elif i == GGML_TYPE_Q5_0:
|
||||||
stream.extend((shader_q5_0_defines, shader_q5_0_dequant_func, get_rows_body))
|
stream.extend((shader_q5_0_defines, shader_q5_0_dequant_func, generic_binary_op_funcs, get_rows_body))
|
||||||
elif i == GGML_TYPE_Q5_1:
|
elif i == GGML_TYPE_Q5_1:
|
||||||
stream.extend((shader_q5_1_defines, shader_q5_1_dequant_func, get_rows_body))
|
stream.extend((shader_q5_1_defines, shader_q5_1_dequant_func, generic_binary_op_funcs, get_rows_body))
|
||||||
elif i == GGML_TYPE_Q8_0:
|
elif i == GGML_TYPE_Q8_0:
|
||||||
stream.extend((shader_q8_0_defines, shader_q8_0_dequant_func, get_rows_body))
|
stream.extend((shader_q8_0_defines, shader_q8_0_dequant_func, generic_binary_op_funcs, get_rows_body))
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
tasks.append(string_to_spv(f"get_rows_{type_names[i]}", "".join(stream), {"B_TYPE": "float", "D_TYPE": "float16_t"}))
|
if optimization_workaround:
|
||||||
tasks.append(string_to_spv(f"get_rows_{type_names[i]}_f32", "".join(stream), {"B_TYPE": "float", "D_TYPE": "float"}))
|
tasks.append(string_to_spv(f"get_rows_{type_names[i]}", "".join(stream), {"B_TYPE": "int", "D_TYPE": "float16_t", "OPTIMIZATION_ERROR_WORKAROUND": "1"}))
|
||||||
|
else:
|
||||||
|
tasks.append(string_to_spv(f"get_rows_{type_names[i]}", "".join(stream), {"B_TYPE": "int", "D_TYPE": "float16_t"}))
|
||||||
|
tasks.append(string_to_spv(f"get_rows_{type_names[i]}_f32", "".join(stream), {"B_TYPE": "int", "D_TYPE": "float"}))
|
||||||
|
|
||||||
tasks.append(string_to_spv("mul_mat_vec_p021_f16_f32", mul_mat_p021_src, {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
|
tasks.append(string_to_spv("mul_mat_vec_p021_f16_f32", mul_mat_p021_src, {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
|
||||||
tasks.append(string_to_spv("mul_mat_vec_nc_f16_f32", mul_mat_nc_src, {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
|
tasks.append(string_to_spv("mul_mat_vec_nc_f16_f32", mul_mat_nc_src, {"A_TYPE": "float16_t", "B_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
@ -2515,20 +2704,20 @@ async def main():
|
||||||
tasks.append(string_to_spv("norm_f32", f"{generic_head}\n{shader_f32}\n{norm_body}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
tasks.append(string_to_spv("norm_f32", f"{generic_head}\n{shader_f32}\n{norm_body}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
tasks.append(string_to_spv("rms_norm_f32", f"{generic_head}\n{shader_f32}\n{rms_norm_body}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
tasks.append(string_to_spv("rms_norm_f32", f"{generic_head}\n{shader_f32}\n{rms_norm_body}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
|
||||||
tasks.append(string_to_spv("cpy_f32_f32", f"{generic_unary_op_head}\n{cpy_end}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
tasks.append(string_to_spv("cpy_f32_f32", f"{generic_unary_op_combined}\n{cpy_end}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
tasks.append(string_to_spv("cpy_f32_f16", f"{generic_unary_op_head}\n{cpy_end}", {"A_TYPE": "float", "D_TYPE": "float16_t"}))
|
tasks.append(string_to_spv("cpy_f32_f16", f"{generic_unary_op_combined}\n{cpy_end}", {"A_TYPE": "float", "D_TYPE": "float16_t"}))
|
||||||
tasks.append(string_to_spv("cpy_f16_f16", f"{generic_unary_op_head}\n{cpy_f16_f16_end}", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
|
tasks.append(string_to_spv("cpy_f16_f16", f"{generic_unary_op_combined}\n{cpy_f16_f16_end}", {"A_TYPE": "float16_t", "D_TYPE": "float16_t"}))
|
||||||
|
|
||||||
tasks.append(string_to_spv("add_f32", f"{generic_binary_op_head}\n{add_body}", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
tasks.append(string_to_spv("add_f32", f"{generic_binary_op_combined}\n{add_body}", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
tasks.append(string_to_spv("split_k_reduce", mulmat_split_k_reduce_src, {}))
|
tasks.append(string_to_spv("split_k_reduce", mulmat_split_k_reduce_src, {}))
|
||||||
tasks.append(string_to_spv("mul_f32", f"{generic_binary_op_head}\n{mul_body}", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
tasks.append(string_to_spv("mul_f32", f"{generic_binary_op_combined}\n{mul_body}", {"A_TYPE": "float", "B_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
tasks.append(string_to_spv("scale_f32", f"{generic_unary_op_head}\n{scale_body}", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
tasks.append(string_to_spv("scale_f32", f"{generic_unary_op_combined}\n{scale_body}", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
tasks.append(string_to_spv("sqr_f32", f"{generic_unary_op_head}\n{sqr_body}", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
tasks.append(string_to_spv("sqr_f32", f"{generic_unary_op_combined}\n{sqr_body}", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
tasks.append(string_to_spv("clamp_f32", f"{generic_unary_op_head}\n{clamp_body}", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
tasks.append(string_to_spv("clamp_f32", f"{generic_unary_op_combined}\n{clamp_body}", {"A_TYPE": "float", "D_TYPE": "float", "FLOAT_TYPE": "float"}))
|
||||||
|
|
||||||
tasks.append(string_to_spv("gelu_f32", f"{generic_head}\n{shader_f32}\n{gelu_body}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
tasks.append(string_to_spv("gelu_f32", f"{generic_head}\n{shader_f32}\n{gelu_body}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
tasks.append(string_to_spv("silu_f32", f"{generic_head}\n{shader_f32}\n{silu_body}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
tasks.append(string_to_spv("silu_f32", f"{generic_head}\n{shader_f32}\n{silu_body}", {"A_TYPE": "float", "D_TYPE": "float"}))
|
||||||
|
|
|
@ -24,6 +24,7 @@ class Keys:
|
||||||
ALIGNMENT = "general.alignment"
|
ALIGNMENT = "general.alignment"
|
||||||
NAME = "general.name"
|
NAME = "general.name"
|
||||||
AUTHOR = "general.author"
|
AUTHOR = "general.author"
|
||||||
|
VERSION = "general.version"
|
||||||
URL = "general.url"
|
URL = "general.url"
|
||||||
DESCRIPTION = "general.description"
|
DESCRIPTION = "general.description"
|
||||||
LICENSE = "general.license"
|
LICENSE = "general.license"
|
||||||
|
@ -123,6 +124,7 @@ class MODEL_ARCH(IntEnum):
|
||||||
GEMMA = auto()
|
GEMMA = auto()
|
||||||
STARCODER2 = auto()
|
STARCODER2 = auto()
|
||||||
MAMBA = auto()
|
MAMBA = auto()
|
||||||
|
XVERSE = auto()
|
||||||
COMMAND_R = auto()
|
COMMAND_R = auto()
|
||||||
|
|
||||||
|
|
||||||
|
@ -191,6 +193,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||||
MODEL_ARCH.GEMMA: "gemma",
|
MODEL_ARCH.GEMMA: "gemma",
|
||||||
MODEL_ARCH.STARCODER2: "starcoder2",
|
MODEL_ARCH.STARCODER2: "starcoder2",
|
||||||
MODEL_ARCH.MAMBA: "mamba",
|
MODEL_ARCH.MAMBA: "mamba",
|
||||||
|
MODEL_ARCH.XVERSE: "xverse",
|
||||||
MODEL_ARCH.COMMAND_R: "command-r",
|
MODEL_ARCH.COMMAND_R: "command-r",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -219,9 +222,9 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||||
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
||||||
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
||||||
MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
|
MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
|
||||||
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate.{xid}",
|
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down.{xid}",
|
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
|
||||||
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up.{xid}",
|
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
||||||
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
||||||
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
||||||
|
@ -365,6 +368,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
MODEL_TENSOR.FFN_ACT,
|
MODEL_TENSOR.FFN_ACT,
|
||||||
|
MODEL_TENSOR.ATTN_Q_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_K_NORM,
|
||||||
|
MODEL_TENSOR.POS_EMBD,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.GPTJ: [
|
MODEL_ARCH.GPTJ: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
@ -606,6 +612,22 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_TENSOR.SSM_D,
|
MODEL_TENSOR.SSM_D,
|
||||||
MODEL_TENSOR.SSM_OUT,
|
MODEL_TENSOR.SSM_OUT,
|
||||||
],
|
],
|
||||||
|
MODEL_ARCH.XVERSE: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_GATE,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
MODEL_ARCH.COMMAND_R: [
|
MODEL_ARCH.COMMAND_R: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
@ -650,6 +672,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_TENSOR.ROPE_FREQS,
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||||
],
|
],
|
||||||
|
MODEL_ARCH.XVERSE: [
|
||||||
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|
|
@ -296,6 +296,9 @@ class GGUFWriter:
|
||||||
def add_author(self, author: str) -> None:
|
def add_author(self, author: str) -> None:
|
||||||
self.add_string(Keys.General.AUTHOR, author)
|
self.add_string(Keys.General.AUTHOR, author)
|
||||||
|
|
||||||
|
def add_version(self, version: str) -> None:
|
||||||
|
self.add_string(Keys.General.VERSION, version)
|
||||||
|
|
||||||
def add_tensor_data_layout(self, layout: str) -> None:
|
def add_tensor_data_layout(self, layout: str) -> None:
|
||||||
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
|
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
|
||||||
|
|
||||||
|
@ -305,6 +308,9 @@ class GGUFWriter:
|
||||||
def add_description(self, description: str) -> None:
|
def add_description(self, description: str) -> None:
|
||||||
self.add_string(Keys.General.DESCRIPTION, description)
|
self.add_string(Keys.General.DESCRIPTION, description)
|
||||||
|
|
||||||
|
def add_licence(self, licence: str) -> None:
|
||||||
|
self.add_string(Keys.General.LICENSE, licence)
|
||||||
|
|
||||||
def add_source_url(self, url: str) -> None:
|
def add_source_url(self, url: str) -> None:
|
||||||
self.add_string(Keys.General.SOURCE_URL, url)
|
self.add_string(Keys.General.SOURCE_URL, url)
|
||||||
|
|
||||||
|
|
|
@ -231,9 +231,8 @@ class TensorNameMap:
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_UP_EXP: (
|
MODEL_TENSOR.FFN_UP_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.{xid}.w3", # mixtral
|
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
|
||||||
"model.layers.{bid}.block_sparse_moe.experts.{xid}.w3", # mixtral
|
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
|
||||||
"transformer.decoder_layer.{bid}.moe.{xid}.linear_v", # Grok
|
|
||||||
),
|
),
|
||||||
|
|
||||||
# AWQ-activation gate
|
# AWQ-activation gate
|
||||||
|
@ -252,9 +251,8 @@ class TensorNameMap:
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_GATE_EXP: (
|
MODEL_TENSOR.FFN_GATE_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.{xid}.w1", # mixtral
|
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
|
||||||
"model.layers.{bid}.block_sparse_moe.experts.{xid}.w1", # mixtral
|
"transformer.decoder_layer.{bid}.moe.linear" # Grok (merged)
|
||||||
"transformer.decoder_layer.{bid}.moe.{xid}.linear" # Grok
|
|
||||||
),
|
),
|
||||||
|
|
||||||
# Feed-forward down
|
# Feed-forward down
|
||||||
|
@ -280,20 +278,20 @@ class TensorNameMap:
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP: (
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
||||||
"layers.{bid}.feed_forward.experts.{xid}.w2", # mixtral
|
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
|
||||||
"model.layers.{bid}.block_sparse_moe.experts.{xid}.w2", # mixtral
|
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
|
||||||
"transformer.decoder_layer.{bid}.moe.{xid}.linear_1", # Grok
|
|
||||||
|
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_Q_NORM: (
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||||
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
||||||
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
||||||
|
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_K_NORM: (
|
MODEL_TENSOR.ATTN_K_NORM: (
|
||||||
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
||||||
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
||||||
|
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ROPE_FREQS: (
|
MODEL_TENSOR.ROPE_FREQS: (
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "gguf"
|
name = "gguf"
|
||||||
version = "0.8.0"
|
version = "0.9.0"
|
||||||
description = "Read and write ML models in GGUF for GGML"
|
description = "Read and write ML models in GGUF for GGML"
|
||||||
authors = ["GGML <ggml@ggml.ai>"]
|
authors = ["GGML <ggml@ggml.ai>"]
|
||||||
packages = [
|
packages = [
|
||||||
|
|
13
klite.embd
13
klite.embd
|
@ -7,7 +7,7 @@ Just copy this single static HTML file anywhere and open it in a browser, or fro
|
||||||
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
|
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
|
||||||
If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one.
|
If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one.
|
||||||
Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
|
Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
|
||||||
Current version: 127
|
Current version: 128
|
||||||
-Concedo
|
-Concedo
|
||||||
-->
|
-->
|
||||||
|
|
||||||
|
@ -198,9 +198,14 @@ Current version: 127
|
||||||
}
|
}
|
||||||
|
|
||||||
#actionmenuitems button,#actionmenuitems2 button {
|
#actionmenuitems button,#actionmenuitems2 button {
|
||||||
width: 60px;
|
width: 76px;
|
||||||
padding: 4px 4px;
|
}
|
||||||
font-size: 12px;
|
@media (max-width: 720px) {
|
||||||
|
#actionmenuitems button,#actionmenuitems2 button {
|
||||||
|
width: 60px;
|
||||||
|
padding: 4px 4px;
|
||||||
|
font-size: 12px;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#messagefield {
|
#messagefield {
|
||||||
|
|
38
llama.h
38
llama.h
|
@ -60,9 +60,9 @@ extern "C" {
|
||||||
|
|
||||||
enum llama_vocab_type {
|
enum llama_vocab_type {
|
||||||
LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab
|
LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab
|
||||||
LLAMA_VOCAB_TYPE_SPM = 1, // SentencePiece
|
LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
|
||||||
LLAMA_VOCAB_TYPE_BPE = 2, // Byte Pair Encoding
|
LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE
|
||||||
LLAMA_VOCAB_TYPE_WPM = 3, // WordPiece
|
LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece
|
||||||
};
|
};
|
||||||
|
|
||||||
// note: these values should be synchronized with ggml_rope
|
// note: these values should be synchronized with ggml_rope
|
||||||
|
@ -1004,17 +1004,45 @@ extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
|
// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
|
||||||
#ifdef LLAMA_API_INTERNAL
|
//#ifdef LLAMA_API_INTERNAL
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
struct ggml_tensor;
|
struct ggml_tensor;
|
||||||
|
|
||||||
|
struct llama_partial_utf8 {
|
||||||
|
uint32_t value; // bit value so far (unshifted)
|
||||||
|
int n_remain; // num bytes remaining; -1 indicates invalid sequence
|
||||||
|
};
|
||||||
|
|
||||||
|
struct llama_grammar {
|
||||||
|
const std::vector<std::vector<llama_grammar_element>> rules;
|
||||||
|
std::vector<std::vector<const llama_grammar_element *>> stacks;
|
||||||
|
|
||||||
|
// buffer for partially generated UTF-8 sequence from accepted tokens
|
||||||
|
llama_partial_utf8 partial_utf8;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct llama_grammar_candidate {
|
||||||
|
size_t index;
|
||||||
|
const uint32_t * code_points;
|
||||||
|
llama_partial_utf8 partial_utf8;
|
||||||
|
};
|
||||||
|
|
||||||
const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(
|
const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map(
|
||||||
struct llama_context * ctx
|
struct llama_context * ctx
|
||||||
);
|
);
|
||||||
|
|
||||||
#endif // LLAMA_API_INTERNAL
|
std::vector<std::vector<const llama_grammar_element *>> llama_grammar_accept(
|
||||||
|
const std::vector<std::vector<llama_grammar_element>> & rules,
|
||||||
|
const std::vector<std::vector<const llama_grammar_element *>> & stacks,
|
||||||
|
const uint32_t chr);
|
||||||
|
|
||||||
|
std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
|
||||||
|
const std::string & src,
|
||||||
|
llama_partial_utf8 partial_start);
|
||||||
|
|
||||||
|
//#endif // LLAMA_API_INTERNAL
|
||||||
|
|
||||||
#endif // LLAMA_H
|
#endif // LLAMA_H
|
||||||
|
|
243
tests/test-grammar-integration.cpp
Normal file
243
tests/test-grammar-integration.cpp
Normal file
|
@ -0,0 +1,243 @@
|
||||||
|
#ifdef NDEBUG
|
||||||
|
#undef NDEBUG
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define LLAMA_API_INTERNAL
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
#include "llama.h"
|
||||||
|
#include "grammar-parser.h"
|
||||||
|
#include "unicode.h"
|
||||||
|
#include <cassert>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
static void test_simple_grammar() {
|
||||||
|
// Test case for a simple grammar
|
||||||
|
const std::string grammar_str = R"""(root ::= expr
|
||||||
|
expr ::= term ("+" term)*
|
||||||
|
term ::= number
|
||||||
|
number ::= [0-9]+)""";
|
||||||
|
|
||||||
|
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
||||||
|
|
||||||
|
// Ensure we parsed correctly
|
||||||
|
assert(!parsed_grammar.rules.empty());
|
||||||
|
|
||||||
|
// Ensure we have a root node
|
||||||
|
assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
|
||||||
|
|
||||||
|
std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
|
||||||
|
llama_grammar* grammar = llama_grammar_init(
|
||||||
|
grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
|
||||||
|
|
||||||
|
std::string input = "123+456";
|
||||||
|
|
||||||
|
auto decoded = decode_utf8(input, {});
|
||||||
|
|
||||||
|
const auto & code_points = decoded.first;
|
||||||
|
|
||||||
|
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
|
||||||
|
auto prev_stacks = grammar->stacks;
|
||||||
|
grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
|
||||||
|
assert(!grammar->stacks.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool completed_grammar = false;
|
||||||
|
|
||||||
|
for (const auto & stack : grammar->stacks) {
|
||||||
|
if (stack.empty()) {
|
||||||
|
completed_grammar = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(completed_grammar);
|
||||||
|
|
||||||
|
// Clean up allocated memory
|
||||||
|
llama_grammar_free(grammar);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_complex_grammar() {
|
||||||
|
// Test case for a more complex grammar, with both failure strings and success strings
|
||||||
|
const std::string grammar_str = R"""(root ::= expression
|
||||||
|
expression ::= term ws (("+"|"-") ws term)*
|
||||||
|
term ::= factor ws (("*"|"/") ws factor)*
|
||||||
|
factor ::= number | variable | "(" expression ")" | function-call
|
||||||
|
number ::= [0-9]+
|
||||||
|
variable ::= [a-zA-Z_][a-zA-Z0-9_]*
|
||||||
|
function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
|
||||||
|
ws ::= [ \t\n\r]?)""";
|
||||||
|
|
||||||
|
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
||||||
|
|
||||||
|
// Ensure we parsed correctly
|
||||||
|
assert(!parsed_grammar.rules.empty());
|
||||||
|
|
||||||
|
// Ensure we have a root node
|
||||||
|
assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
|
||||||
|
|
||||||
|
std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
|
||||||
|
llama_grammar* grammar = llama_grammar_init(
|
||||||
|
grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
|
||||||
|
|
||||||
|
// Save the original grammar stacks so that we can reset after every new string we want to test
|
||||||
|
auto original_stacks = grammar->stacks;
|
||||||
|
|
||||||
|
// Test a few strings
|
||||||
|
std::vector<std::string> test_strings_pass = {
|
||||||
|
"42",
|
||||||
|
"1*2*3*4*5",
|
||||||
|
"x",
|
||||||
|
"x+10",
|
||||||
|
"x1+y2",
|
||||||
|
"(a+b)*(c-d)",
|
||||||
|
"func()",
|
||||||
|
"func(x,y+2)",
|
||||||
|
"a*(b+c)-d/e",
|
||||||
|
"f(g(x),h(y,z))",
|
||||||
|
"x + 10",
|
||||||
|
"x1 + y2",
|
||||||
|
"(a + b) * (c - d)",
|
||||||
|
"func()",
|
||||||
|
"func(x, y + 2)",
|
||||||
|
"a * (b + c) - d / e",
|
||||||
|
"f(g(x), h(y, z))",
|
||||||
|
"123+456",
|
||||||
|
"123*456*789-123/456+789*123",
|
||||||
|
"123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<std::string> test_strings_fail = {
|
||||||
|
"+",
|
||||||
|
"/ 3x",
|
||||||
|
"x + + y",
|
||||||
|
"a * / b",
|
||||||
|
"func(,)",
|
||||||
|
"func(x y)",
|
||||||
|
"(a + b",
|
||||||
|
"x + y)",
|
||||||
|
"a + b * (c - d",
|
||||||
|
"42 +",
|
||||||
|
"x +",
|
||||||
|
"x + 10 +",
|
||||||
|
"(a + b) * (c - d",
|
||||||
|
"func(",
|
||||||
|
"func(x, y + 2",
|
||||||
|
"a * (b + c) - d /",
|
||||||
|
"f(g(x), h(y, z)",
|
||||||
|
"123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
|
||||||
|
};
|
||||||
|
|
||||||
|
// Passing strings
|
||||||
|
for (const auto & test_string : test_strings_pass) {
|
||||||
|
auto decoded = decode_utf8(test_string, {});
|
||||||
|
|
||||||
|
const auto & code_points = decoded.first;
|
||||||
|
|
||||||
|
int pos = 0;
|
||||||
|
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
|
||||||
|
++pos;
|
||||||
|
auto prev_stacks = grammar->stacks;
|
||||||
|
grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
|
||||||
|
|
||||||
|
// Expect that each code point will not cause the grammar to fail
|
||||||
|
if (grammar->stacks.empty()) {
|
||||||
|
fprintf(stdout, "Error at position %d\n", pos);
|
||||||
|
fprintf(stderr, "Unexpected character '%s'\n", unicode_cpt_to_utf8(*it).c_str());
|
||||||
|
fprintf(stderr, "Input string is %s:\n", test_string.c_str());
|
||||||
|
}
|
||||||
|
assert(!grammar->stacks.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool completed_grammar = false;
|
||||||
|
|
||||||
|
for (const auto & stack : grammar->stacks) {
|
||||||
|
if (stack.empty()) {
|
||||||
|
completed_grammar = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(completed_grammar);
|
||||||
|
|
||||||
|
// Reset the grammar stacks
|
||||||
|
grammar->stacks = original_stacks;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Failing strings
|
||||||
|
for (const auto & test_string : test_strings_fail) {
|
||||||
|
auto decoded = decode_utf8(test_string, {});
|
||||||
|
|
||||||
|
const auto & code_points = decoded.first;
|
||||||
|
bool parse_failed = false;
|
||||||
|
|
||||||
|
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
|
||||||
|
auto prev_stacks = grammar->stacks;
|
||||||
|
grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
|
||||||
|
if (grammar->stacks.empty()) {
|
||||||
|
parse_failed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
assert(!grammar->stacks.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool completed_grammar = false;
|
||||||
|
|
||||||
|
for (const auto & stack : grammar->stacks) {
|
||||||
|
if (stack.empty()) {
|
||||||
|
completed_grammar = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that the grammar is not completed, or that each string failed to match as-expected
|
||||||
|
assert((!completed_grammar) || parse_failed);
|
||||||
|
|
||||||
|
// Reset the grammar stacks
|
||||||
|
grammar->stacks = original_stacks;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up allocated memory
|
||||||
|
llama_grammar_free(grammar);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_failure_missing_root() {
|
||||||
|
// Test case for a grammar that is missing a root rule
|
||||||
|
const std::string grammar_str = R"""(rot ::= expr
|
||||||
|
expr ::= term ("+" term)*
|
||||||
|
term ::= number
|
||||||
|
number ::= [0-9]+)""";
|
||||||
|
|
||||||
|
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
||||||
|
|
||||||
|
// Ensure we parsed correctly
|
||||||
|
assert(!parsed_grammar.rules.empty());
|
||||||
|
|
||||||
|
// Ensure we do NOT have a root node
|
||||||
|
assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_failure_missing_reference() {
|
||||||
|
// Test case for a grammar that is missing a referenced rule
|
||||||
|
const std::string grammar_str = R"""(root ::= expr
|
||||||
|
expr ::= term ("+" term)*
|
||||||
|
term ::= numero
|
||||||
|
number ::= [0-9]+)""";
|
||||||
|
|
||||||
|
fprintf(stderr, "Expected error: ");
|
||||||
|
|
||||||
|
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
||||||
|
|
||||||
|
// Ensure we did NOT parsed correctly
|
||||||
|
assert(parsed_grammar.rules.empty());
|
||||||
|
|
||||||
|
fprintf(stderr, "End of expected error. Test successful.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
test_simple_grammar();
|
||||||
|
test_complex_grammar();
|
||||||
|
test_failure_missing_root();
|
||||||
|
test_failure_missing_reference();
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue