Merge commit 'c05e8c9934' into concedo_experimental

This commit is contained in:
Concedo 2025-01-16 15:20:43 +08:00
commit f5cf054335
9 changed files with 112 additions and 12 deletions

View file

@ -478,6 +478,11 @@ class Model:
return modelcls
return func
@classmethod
def print_registered_models(cls):
for name in sorted(cls._model_classes.keys()):
logger.error(f"- {name}")
@classmethod
def from_model_architecture(cls, arch: str) -> type[Model]:
try:
@ -4929,6 +4934,7 @@ def parse_args() -> argparse.Namespace:
parser.add_argument(
"model", type=Path,
help="directory containing model file",
nargs="?",
)
parser.add_argument(
"--use-temp-file", action="store_true",
@ -4966,8 +4972,15 @@ def parse_args() -> argparse.Namespace:
"--metadata", type=Path,
help="Specify the path for an authorship metadata override file"
)
parser.add_argument(
"--print-supported-models", action="store_true",
help="Print the supported models"
)
return parser.parse_args()
args = parser.parse_args()
if not args.print_supported_models and args.model is None:
parser.error("the following arguments are required: model")
return args
def split_str_to_n_bytes(split_str: str) -> int:
@ -4991,6 +5004,11 @@ def split_str_to_n_bytes(split_str: str) -> int:
def main() -> None:
args = parse_args()
if args.print_supported_models:
logger.error("Supported models:")
Model.print_registered_models()
sys.exit(0)
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:

80
examples/tts/README.md Normal file
View file

@ -0,0 +1,80 @@
# llama.cpp/example/tts
This example demonstrates the Text To Speech feature. It uses a
[model](https://www.outeai.com/blog/outetts-0.2-500m) from
[outeai](https://www.outeai.com/).
## Quickstart
If you have built llama.cpp with `-DLLAMA_CURL=ON` you can simply run the
following command and the required models will be downloaded automatically:
```console
$ build/bin/llama-tts --tts-oute-default -p "Hello world" && aplay output.wav
```
For details about the models and how to convert them to the required format
see the following sections.
### Model conversion
Checkout or download the model that contains the LLM model:
```console
$ pushd models
$ git clone --branch main --single-branch --depth 1 https://huggingface.co/OuteAI/OuteTTS-0.2-500M
$ cd OuteTTS-0.2-500M && git lfs install && git lfs pull
$ popd
```
Convert the model to .gguf format:
```console
(venv) python convert_hf_to_gguf.py models/OuteTTS-0.2-500M \
--outfile models/outetts-0.2-0.5B-f16.gguf --outtype f16
```
The generated model will be `models/outetts-0.2-0.5B-f16.gguf`.
We can optionally quantize this to Q8_0 using the following command:
```console
$ build/bin/llama-quantize models/outetts-0.2-0.5B-f16.gguf \
models/outetts-0.2-0.5B-q8_0.gguf q8_0
```
The quantized model will be `models/outetts-0.2-0.5B-q8_0.gguf`.
Next we do something simlar for the audio decoder. First download or checkout
the model for the voice decoder:
```console
$ pushd models
$ git clone --branch main --single-branch --depth 1 https://huggingface.co/novateur/WavTokenizer-large-speech-75token
$ cd WavTokenizer-large-speech-75token && git lfs install && git lfs pull
$ popd
```
This model file is PyTorch checkpoint (.ckpt) and we first need to convert it to
huggingface format:
```console
(venv) python examples/tts/convert_pt_to_hf.py \
models/WavTokenizer-large-speech-75token/wavtokenizer_large_speech_320_24k.ckpt
...
Model has been successfully converted and saved to models/WavTokenizer-large-speech-75token/model.safetensors
Metadata has been saved to models/WavTokenizer-large-speech-75token/index.json
Config has been saved to models/WavTokenizer-large-speech-75tokenconfig.json
```
Then we can convert the huggingface format to gguf:
```console
(venv) python convert_hf_to_gguf.py models/WavTokenizer-large-speech-75token \
--outfile models/wavtokenizer-large-75-f16.gguf --outtype f16
...
INFO:hf-to-gguf:Model successfully exported to models/wavtokenizer-large-75-f16.gguf
```
### Running the example
With both of the models generated, the LLM model and the voice decoder model,
we can run the example:
```console
$ build/bin/llama-tts -m ./models/outetts-0.2-0.5B-q8_0.gguf \
-mv ./models/wavtokenizer-large-75-f16.gguf \
-p "Hello world"
...
main: audio written to file 'output.wav'
```
The output.wav file will contain the audio of the prompt. This can be heard
by playing the file with a media player. On Linux the following command will
play the audio:
```console
$ aplay output.wav
```

View file

@ -15,6 +15,8 @@ pip install gguf
[examples/writer.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/writer.py) — Generates `example.gguf` in the current directory to demonstrate generating a GGUF file. Note that this file cannot be used as a model.
[examples/reader.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/reader.py) — Extracts and displays key-value pairs and tensor details from a GGUF file in a readable format.
[gguf/scripts/gguf_dump.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_dump.py) — Dumps a GGUF file's metadata to the console.
[gguf/scripts/gguf_set_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_set_metadata.py) — Allows changing simple metadata values in a GGUF file by key.

View file

@ -11,8 +11,8 @@ from pathlib import Path
import numpy as np
# Necessary to load the local gguf package
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent))
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
import gguf

View file

@ -12,8 +12,8 @@ from typing import Any
import numpy as np
# Necessary to load the local gguf package
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent))
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from gguf import GGUFReader, GGUFValueType, ReaderTensor # noqa: E402

View file

@ -13,8 +13,8 @@ from pathlib import Path
from tqdm import tqdm
# Necessary to load the local gguf package
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent))
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from gguf import GGUFReader # noqa: E402

View file

@ -13,8 +13,8 @@ from tqdm import tqdm
from typing import Any, Sequence, NamedTuple
# Necessary to load the local gguf package
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent))
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
import gguf

View file

@ -6,8 +6,8 @@ import sys
from pathlib import Path
# Necessary to load the local gguf package
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent))
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists():
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from gguf import GGUFReader # noqa: E402

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "gguf"
version = "0.14.0"
version = "0.15.0"
description = "Read and write ML models in GGUF for GGML"
authors = ["GGML <ggml@ggml.ai>"]
packages = [