added a gguf file analyzer

This commit is contained in:
Concedo 2025-01-10 16:27:48 +08:00
parent 91b6e29af3
commit 0305841dd5

View file

@ -27,6 +27,7 @@ import html
import urllib.parse as urlparse import urllib.parse as urlparse
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path
# constants # constants
sampler_order_max = 7 sampler_order_max = 7
@ -3604,6 +3605,8 @@ def show_gui():
ctk.CTkButton(extra_tab , text = "Unpack KoboldCpp To Folder", command = unpack_to_dir ).grid(row=3,column=0, stick="w", padx= 8, pady=2) ctk.CTkButton(extra_tab , text = "Unpack KoboldCpp To Folder", command = unpack_to_dir ).grid(row=3,column=0, stick="w", padx= 8, pady=2)
makelabel(extra_tab, "Export as launcher .kcppt template (Expert Only)", 4, 0,tooltiptxt="Creates a KoboldCpp launch template for others to use.\nEmbeds JSON files directly into exported file when saving.\nWhen loaded, forces the backend to be automatically determined.\nWarning! Not recommended for beginners!") makelabel(extra_tab, "Export as launcher .kcppt template (Expert Only)", 4, 0,tooltiptxt="Creates a KoboldCpp launch template for others to use.\nEmbeds JSON files directly into exported file when saving.\nWhen loaded, forces the backend to be automatically determined.\nWarning! Not recommended for beginners!")
ctk.CTkButton(extra_tab , text = "Generate LaunchTemplate", command = kcpp_export_template ).grid(row=5,column=0, stick="w", padx= 8, pady=2) ctk.CTkButton(extra_tab , text = "Generate LaunchTemplate", command = kcpp_export_template ).grid(row=5,column=0, stick="w", padx= 8, pady=2)
makelabel(extra_tab, "Analyze GGUF Metadata", 6, 0,tooltiptxt="Reads the metadata, weight types and tensor names in any GGUF file.")
ctk.CTkButton(extra_tab , text = "Analyze GGUF", command = analyze_gguf_model_wrapper ).grid(row=7,column=0, stick="w", padx= 8, pady=2)
# launch # launch
def guilaunch(): def guilaunch():
@ -4526,6 +4529,33 @@ def download_model_from_url(url,permitted_types=[".gguf",".safetensors"]):
return dlfile return dlfile
return None return None
def analyze_gguf_model(args,filename):
try:
stime = datetime.now()
from gguf.scripts.gguf_dump import dump_metadata
from gguf import GGUFReader
reader = GGUFReader(filename, 'r')
ns = argparse.Namespace()
ns.no_tensors = False
dump_metadata(reader, ns)
atime = (datetime.now() - stime).total_seconds()
print(f"---\nAnalyzing completed in {atime:.2f}s.\n---",flush=True)
except Exception as e:
print(f"Cannot Analyze File: {e}")
return
def analyze_gguf_model_wrapper(filename=""):
if not filename or filename=="":
from tkinter.filedialog import askopenfilename
filename = askopenfilename(title="Select GGUF to analyze")
if not filename or filename=="" or not os.path.exists(filename):
print("Selected GGUF file not found. Please select a valid GGUF file to analyze.")
return
print("---")
print(f"Analyzing {filename}, please wait...\n---",flush=True)
dumpthread = threading.Thread(target=analyze_gguf_model, args=(args,filename))
dumpthread.start()
def main(launch_args,start_server=True): def main(launch_args,start_server=True):
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath
@ -4539,6 +4569,13 @@ def main(launch_args,start_server=True):
print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") # just update version manually print(f"***\nWelcome to KoboldCpp - Version {KcppVersion}") # just update version manually
# print("Python version: " + sys.version) # print("Python version: " + sys.version)
# connect path
try:
if (Path(__file__).parent / "gguf-py").exists():
ggufpy_path = str(Path(__file__).parent / "gguf-py")
sys.path.append(ggufpy_path)
except Exception as e:
print(f"Cannot import gguf-py path: {e}")
#perform some basic cleanup of old temporary directories #perform some basic cleanup of old temporary directories
try: try:
@ -4550,6 +4587,10 @@ def main(launch_args,start_server=True):
unpack_to_dir(args.unpack) unpack_to_dir(args.unpack)
return return
if args.analyze:
analyze_gguf_model_wrapper(args.analyze)
return
if args.config and len(args.config)==1: if args.config and len(args.config)==1:
cfgname = args.config[0] cfgname = args.config[0]
if isinstance(cfgname, str): if isinstance(cfgname, str):
@ -5182,6 +5223,7 @@ if __name__ == '__main__':
#more advanced params #more advanced params
advparser = parser.add_argument_group('Advanced Commands') advparser = parser.add_argument_group('Advanced Commands')
advparser.add_argument("--version", help="Prints version and exits.", action='store_true') advparser.add_argument("--version", help="Prints version and exits.", action='store_true')
advparser.add_argument("--analyze", metavar=('[filename]'), help="Reads the metadata, weight types and tensor names in any GGUF file.", default="")
advparser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+') advparser.add_argument("--ropeconfig", help="If set, uses customized RoPE scaling from configured frequency scale and frequency base (e.g. --ropeconfig 0.25 10000). Otherwise, uses NTK-Aware scaling set automatically based on context size. For linear rope, simply set the freq-scale and ignore the freq-base",metavar=('[rope-freq-scale]', '[rope-freq-base]'), default=[0.0, 10000.0], type=float, nargs='+')
advparser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512) advparser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512). Setting it to -1 disables BLAS mode, but keeps other benefits like GPU offload.", type=int,choices=[-1,32,64,128,256,512,1024,2048], default=512)
advparser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0) advparser.add_argument("--blasthreads", help="Use a different number of threads during BLAS if specified. Otherwise, has the same value as --threads",metavar=('[threads]'), type=int, default=0)