From ff3fcabc727b2dd0c477d23a258217b27cc639fb Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Fri, 10 Jan 2025 11:30:53 +0100 Subject: [PATCH 1/4] convert : add --print-supported-models option (#11172) * convert : add --print-supported-models option This commit adds a new option to the convert_hf_to_gguf.py script to print the supported models. The motivation for this is that it can be useful to know which models are supported by the script without having to look at the code. Example usage: ```console $ ./convert_hf_to_gguf.py --print-supported-models Supported models: - GPTNeoXForCausalLM - BloomForCausalLM - BloomModel - MPTForCausalLM - OrionForCausalLM - BaichuanForCausalLM - BaiChuanForCausalLM - XverseForCausalLM - FalconForCausalLM - RWForCausalLM - GPTBigCodeForCausalLM - GPTRefactForCausalLM - StableLmForCausalLM - StableLMEpochForCausalLM - LlavaStableLMEpochForCausalLM - LLaMAForCausalLM - LlamaForCausalLM - MistralForCausalLM - MixtralForCausalLM - DeciLMForCausalLM - BitnetForCausalLM - GrokForCausalLM - DbrxForCausalLM - MiniCPMForCausalLM - MiniCPM3ForCausalLM - QWenLMHeadModel - Qwen2ForCausalLM - Qwen2VLForConditionalGeneration - WavTokenizerDec - Qwen2MoeForCausalLM - GPT2LMHeadModel - PhiForCausalLM - Phi3ForCausalLM - PhiMoEForCausalLM - PlamoForCausalLM - CodeShellForCausalLM - InternLM2ForCausalLM - BertModel - BertForMaskedLM - CamembertModel - RobertaModel - NomicBertModel - XLMRobertaModel - XLMRobertaForSequenceClassification - GemmaForCausalLM - Gemma2ForCausalLM - Starcoder2ForCausalLM - Rwkv6ForCausalLM - RWKV6Qwen2ForCausalLM - MambaForCausalLM - MambaLMHeadModel - FalconMambaForCausalLM - CohereForCausalLM - Cohere2ForCausalLM - OLMoForCausalLM - OlmoForCausalLM - Olmo2ForCausalLM - OlmoeForCausalLM - JinaBertModel - JinaBertForMaskedLM - OpenELMForCausalLM - ArcticForCausalLM - DeepseekForCausalLM - DeepseekV3ForCausalLM - DeepseekV2ForCausalLM - UMT5ForConditionalGeneration - MT5ForConditionalGeneration - T5ForConditionalGeneration - T5WithLMHeadModel - T5EncoderModel - JAISLMHeadModel - ChatGLMModel - ChatGLMForConditionalGeneration - NemotronForCausalLM - ExaoneForCausalLM - GraniteForCausalLM - GraniteMoeForCausalLM - ChameleonForCausalLM - ChameleonForConditionalGeneration ``` * squash! convert : add --print-supported-models option Fix flake8 error. --- convert_hf_to_gguf.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index cf317eeae..81f19bf37 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -478,6 +478,11 @@ class Model: return modelcls return func + @classmethod + def print_registered_models(cls): + for name in cls._model_classes.keys(): + logger.error(f"- {name}") + @classmethod def from_model_architecture(cls, arch: str) -> type[Model]: try: @@ -4929,6 +4934,7 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "model", type=Path, help="directory containing model file", + nargs="?", ) parser.add_argument( "--use-temp-file", action="store_true", @@ -4966,8 +4972,15 @@ def parse_args() -> argparse.Namespace: "--metadata", type=Path, help="Specify the path for an authorship metadata override file" ) + parser.add_argument( + "--print-supported-models", action="store_true", + help="Print the supported models" + ) - return parser.parse_args() + args = parser.parse_args() + if not args.print_supported_models and args.model is None: + parser.error("the following arguments are required: model") + return args def split_str_to_n_bytes(split_str: str) -> int: @@ -4991,6 +5004,11 @@ def split_str_to_n_bytes(split_str: str) -> int: def main() -> None: args = parse_args() + if args.print_supported_models: + logger.error("Supported models:") + Model.print_registered_models() + sys.exit(0) + if args.verbose: logging.basicConfig(level=logging.DEBUG) else: From ba8a1f9c5b675459c55a83e3f97f10df3a66c788 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Fri, 10 Jan 2025 13:16:16 +0100 Subject: [PATCH 2/4] examples : add README.md to tts example [no ci] (#11155) * examples : add README.md to tts example [no ci] * squash! examples : add README.md to tts example [no ci] Fix heading to be consistent with other examples, and add a quickstart section to README.md. * squash! examples : add README.md to tts example [no ci] Fix spelling mistake. --- examples/tts/README.md | 80 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 examples/tts/README.md diff --git a/examples/tts/README.md b/examples/tts/README.md new file mode 100644 index 000000000..b0d20111a --- /dev/null +++ b/examples/tts/README.md @@ -0,0 +1,80 @@ +# llama.cpp/example/tts +This example demonstrates the Text To Speech feature. It uses a +[model](https://www.outeai.com/blog/outetts-0.2-500m) from +[outeai](https://www.outeai.com/). + +## Quickstart +If you have built llama.cpp with `-DLLAMA_CURL=ON` you can simply run the +following command and the required models will be downloaded automatically: +```console +$ build/bin/llama-tts --tts-oute-default -p "Hello world" && aplay output.wav +``` +For details about the models and how to convert them to the required format +see the following sections. + +### Model conversion +Checkout or download the model that contains the LLM model: +```console +$ pushd models +$ git clone --branch main --single-branch --depth 1 https://huggingface.co/OuteAI/OuteTTS-0.2-500M +$ cd OuteTTS-0.2-500M && git lfs install && git lfs pull +$ popd +``` +Convert the model to .gguf format: +```console +(venv) python convert_hf_to_gguf.py models/OuteTTS-0.2-500M \ + --outfile models/outetts-0.2-0.5B-f16.gguf --outtype f16 +``` +The generated model will be `models/outetts-0.2-0.5B-f16.gguf`. + +We can optionally quantize this to Q8_0 using the following command: +```console +$ build/bin/llama-quantize models/outetts-0.2-0.5B-f16.gguf \ + models/outetts-0.2-0.5B-q8_0.gguf q8_0 +``` +The quantized model will be `models/outetts-0.2-0.5B-q8_0.gguf`. + +Next we do something simlar for the audio decoder. First download or checkout +the model for the voice decoder: +```console +$ pushd models +$ git clone --branch main --single-branch --depth 1 https://huggingface.co/novateur/WavTokenizer-large-speech-75token +$ cd WavTokenizer-large-speech-75token && git lfs install && git lfs pull +$ popd +``` +This model file is PyTorch checkpoint (.ckpt) and we first need to convert it to +huggingface format: +```console +(venv) python examples/tts/convert_pt_to_hf.py \ + models/WavTokenizer-large-speech-75token/wavtokenizer_large_speech_320_24k.ckpt +... +Model has been successfully converted and saved to models/WavTokenizer-large-speech-75token/model.safetensors +Metadata has been saved to models/WavTokenizer-large-speech-75token/index.json +Config has been saved to models/WavTokenizer-large-speech-75tokenconfig.json +``` +Then we can convert the huggingface format to gguf: +```console +(venv) python convert_hf_to_gguf.py models/WavTokenizer-large-speech-75token \ + --outfile models/wavtokenizer-large-75-f16.gguf --outtype f16 +... +INFO:hf-to-gguf:Model successfully exported to models/wavtokenizer-large-75-f16.gguf +``` + +### Running the example + +With both of the models generated, the LLM model and the voice decoder model, +we can run the example: +```console +$ build/bin/llama-tts -m ./models/outetts-0.2-0.5B-q8_0.gguf \ + -mv ./models/wavtokenizer-large-75-f16.gguf \ + -p "Hello world" +... +main: audio written to file 'output.wav' +``` +The output.wav file will contain the audio of the prompt. This can be heard +by playing the file with a media player. On Linux the following command will +play the audio: +```console +$ aplay output.wav +``` + From 2739a71e4b88474833b64aa974ca4515574fd3c4 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Sat, 11 Jan 2025 05:50:33 +0100 Subject: [PATCH 3/4] convert : sort print supported models [no ci] (#11179) This commit sorts the list of supported models when printing them out. The motivation for this change is to make it easier to find a specific model in the list of supported models. For example: ```console $ ./convert_hf_to_gguf.py --print-supported-models Supported models: - ArcticForCausalLM - BaiChuanForCausalLM - BaichuanForCausalLM - BertForMaskedLM - BertModel - BitnetForCausalLM - BloomForCausalLM - BloomModel - CamembertModel - ChameleonForCausalLM - ChameleonForConditionalGeneration - ChatGLMForConditionalGeneration - ChatGLMModel - CodeShellForCausalLM - Cohere2ForCausalLM - CohereForCausalLM - DbrxForCausalLM - DeciLMForCausalLM - DeepseekForCausalLM - DeepseekV2ForCausalLM - DeepseekV3ForCausalLM - ExaoneForCausalLM - FalconForCausalLM - FalconMambaForCausalLM - GPT2LMHeadModel - GPTBigCodeForCausalLM - GPTNeoXForCausalLM - GPTRefactForCausalLM - Gemma2ForCausalLM - GemmaForCausalLM - GraniteForCausalLM - GraniteMoeForCausalLM - GrokForCausalLM - InternLM2ForCausalLM - JAISLMHeadModel - JinaBertForMaskedLM - JinaBertModel - LLaMAForCausalLM - LlamaForCausalLM - LlavaStableLMEpochForCausalLM - MPTForCausalLM - MT5ForConditionalGeneration - MambaForCausalLM - MambaLMHeadModel - MiniCPM3ForCausalLM - MiniCPMForCausalLM - MistralForCausalLM - MixtralForCausalLM - NemotronForCausalLM - NomicBertModel - OLMoForCausalLM - Olmo2ForCausalLM - OlmoForCausalLM - OlmoeForCausalLM - OpenELMForCausalLM - OrionForCausalLM - Phi3ForCausalLM - PhiForCausalLM - PhiMoEForCausalLM - PlamoForCausalLM - QWenLMHeadModel - Qwen2ForCausalLM - Qwen2MoeForCausalLM - Qwen2VLForConditionalGeneration - RWForCausalLM - RWKV6Qwen2ForCausalLM - RobertaModel - Rwkv6ForCausalLM - StableLMEpochForCausalLM - StableLmForCausalLM - Starcoder2ForCausalLM - T5EncoderModel - T5ForConditionalGeneration - T5WithLMHeadModel - UMT5ForConditionalGeneration - WavTokenizerDec - XLMRobertaForSequenceClassification - XLMRobertaModel - XverseForCausalLM ``` --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 81f19bf37..4dc9837ab 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -480,7 +480,7 @@ class Model: @classmethod def print_registered_models(cls): - for name in cls._model_classes.keys(): + for name in sorted(cls._model_classes.keys()): logger.error(f"- {name}") @classmethod From c05e8c9934f94fde49bc1bc9dc51eed282605150 Mon Sep 17 00:00:00 2001 From: Vinesh Janarthanan <36610342+VJHack@users.noreply.github.com> Date: Sat, 11 Jan 2025 03:42:31 -0600 Subject: [PATCH 4/4] gguf-py: fixed local detection of gguf package (#11180) * updated path to gguf package for non-installed setups * added reader.py to readme * Bumped gguf version to 0.15.0 --- gguf-py/README.md | 2 ++ gguf-py/gguf/scripts/gguf_convert_endian.py | 4 ++-- gguf-py/gguf/scripts/gguf_dump.py | 4 ++-- gguf-py/gguf/scripts/gguf_hash.py | 4 ++-- gguf-py/gguf/scripts/gguf_new_metadata.py | 4 ++-- gguf-py/gguf/scripts/gguf_set_metadata.py | 4 ++-- gguf-py/pyproject.toml | 2 +- 7 files changed, 13 insertions(+), 11 deletions(-) diff --git a/gguf-py/README.md b/gguf-py/README.md index 37a75923b..2e513633d 100644 --- a/gguf-py/README.md +++ b/gguf-py/README.md @@ -15,6 +15,8 @@ pip install gguf [examples/writer.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/writer.py) — Generates `example.gguf` in the current directory to demonstrate generating a GGUF file. Note that this file cannot be used as a model. +[examples/reader.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/examples/reader.py) — Extracts and displays key-value pairs and tensor details from a GGUF file in a readable format. + [gguf/scripts/gguf_dump.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_dump.py) — Dumps a GGUF file's metadata to the console. [gguf/scripts/gguf_set_metadata.py](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/scripts/gguf_set_metadata.py) — Allows changing simple metadata values in a GGUF file by key. diff --git a/gguf-py/gguf/scripts/gguf_convert_endian.py b/gguf-py/gguf/scripts/gguf_convert_endian.py index b698af0fe..f97e91bd4 100755 --- a/gguf-py/gguf/scripts/gguf_convert_endian.py +++ b/gguf-py/gguf/scripts/gguf_convert_endian.py @@ -11,8 +11,8 @@ from pathlib import Path import numpy as np # Necessary to load the local gguf package -if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): - sys.path.insert(0, str(Path(__file__).parent.parent)) +if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists(): + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) import gguf diff --git a/gguf-py/gguf/scripts/gguf_dump.py b/gguf-py/gguf/scripts/gguf_dump.py index 1b6546541..f95b4fd48 100755 --- a/gguf-py/gguf/scripts/gguf_dump.py +++ b/gguf-py/gguf/scripts/gguf_dump.py @@ -12,8 +12,8 @@ from typing import Any import numpy as np # Necessary to load the local gguf package -if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): - sys.path.insert(0, str(Path(__file__).parent.parent)) +if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists(): + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from gguf import GGUFReader, GGUFValueType, ReaderTensor # noqa: E402 diff --git a/gguf-py/gguf/scripts/gguf_hash.py b/gguf-py/gguf/scripts/gguf_hash.py index ee34d09bf..3ef989921 100755 --- a/gguf-py/gguf/scripts/gguf_hash.py +++ b/gguf-py/gguf/scripts/gguf_hash.py @@ -13,8 +13,8 @@ from pathlib import Path from tqdm import tqdm # Necessary to load the local gguf package -if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): - sys.path.insert(0, str(Path(__file__).parent.parent)) +if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists(): + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from gguf import GGUFReader # noqa: E402 diff --git a/gguf-py/gguf/scripts/gguf_new_metadata.py b/gguf-py/gguf/scripts/gguf_new_metadata.py index fce52a8c1..a8cfc9d58 100755 --- a/gguf-py/gguf/scripts/gguf_new_metadata.py +++ b/gguf-py/gguf/scripts/gguf_new_metadata.py @@ -13,8 +13,8 @@ from tqdm import tqdm from typing import Any, Sequence, NamedTuple # Necessary to load the local gguf package -if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): - sys.path.insert(0, str(Path(__file__).parent.parent)) +if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists(): + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) import gguf diff --git a/gguf-py/gguf/scripts/gguf_set_metadata.py b/gguf-py/gguf/scripts/gguf_set_metadata.py index e35b651b8..f5809c35c 100755 --- a/gguf-py/gguf/scripts/gguf_set_metadata.py +++ b/gguf-py/gguf/scripts/gguf_set_metadata.py @@ -6,8 +6,8 @@ import sys from pathlib import Path # Necessary to load the local gguf package -if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): - sys.path.insert(0, str(Path(__file__).parent.parent)) +if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent.parent / 'gguf-py').exists(): + sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from gguf import GGUFReader # noqa: E402 diff --git a/gguf-py/pyproject.toml b/gguf-py/pyproject.toml index 92d7f22ec..78c6baa64 100644 --- a/gguf-py/pyproject.toml +++ b/gguf-py/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "gguf" -version = "0.14.0" +version = "0.15.0" description = "Read and write ML models in GGUF for GGML" authors = ["GGML "] packages = [