mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-28 03:30:20 +00:00
kv snapshots save and load last logits for correctness. added some text for musicui, updated docs
This commit is contained in:
parent
54cf43ae64
commit
4f1b22c415
4 changed files with 178 additions and 14 deletions
|
|
@ -1644,6 +1644,144 @@
|
|||
},
|
||||
}
|
||||
},
|
||||
"/api/extra/music/prepare": {
|
||||
"post": {
|
||||
"summary": "Creates song generation parameters such as caption, lyrics, BPM and duration",
|
||||
"description": "Creates song generation parameters such as caption, lyrics, BPM and duration. This should be called to produce the generation input for /api/extra/music/generate",
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"example": {
|
||||
"caption": "An emotional rap song about the kobold war.",
|
||||
},
|
||||
"schema": {
|
||||
"properties": {
|
||||
"caption": {
|
||||
"type": "string",
|
||||
"description": "A short description of the song to create"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"tags": [
|
||||
"api/extra"
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"example":
|
||||
{
|
||||
"caption": "A melancholic and narrative-driven rap track built around a heavy beat",
|
||||
"lyrics": "\n[Verse 1]\nKobold is love, yeah, kobold is life\nKobold is the only way we survive\nShe said okay, yo, keep it that way\nIn kobold we trust, just kobold today\n\n[Chorus]\nKobold, kobold, we carry on now\nKobold, kobold, each way and how\nKobold, kobold, right here we go\nKobold, kobold, all that I know.\n\n[Outro]",
|
||||
"bpm": 120,
|
||||
"duration": 64.0,
|
||||
"keyscale": "G minor",
|
||||
"timesignature": "2",
|
||||
"vocal_language": "en",
|
||||
"task_type": "text2music",
|
||||
"seed": 622315,
|
||||
"thinking": false,
|
||||
"lm_temperature": 0.85,
|
||||
"lm_cfg_scale": 2.0,
|
||||
"lm_top_p": 0.9,
|
||||
"lm_negative_prompt": "",
|
||||
"inference_steps": 8,
|
||||
"guidance_scale": 1.0,
|
||||
"shift": 3.0,
|
||||
"audio_codes": ""
|
||||
},
|
||||
"schema": {
|
||||
"properties": {},
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Successful request"
|
||||
}
|
||||
},
|
||||
}
|
||||
},
|
||||
"/api/extra/music/generate": {
|
||||
"post": {
|
||||
"summary": "Generates music based on provided captions, lyrics and configurations",
|
||||
"description": "Generates music based on provided captions, lyrics and configurations. The config can be generated using /api/extra/music/prepare or crafted manually",
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"example": {
|
||||
"caption": "A melancholic and narrative-driven rap track built around a heavy beat",
|
||||
"lyrics": "\n[Verse 1]\nKobold is love, yeah, kobold is life\nKobold is the only way we survive\nShe said okay, yo, keep it that way\nIn kobold we trust, just kobold today\n\n[Chorus]\nKobold, kobold, we carry on now\nKobold, kobold, each way and how\nKobold, kobold, right here we go\nKobold, kobold, all that I know.\n\n[Outro]",
|
||||
"bpm": 120,
|
||||
"duration": 64.0,
|
||||
"keyscale": "G minor",
|
||||
"timesignature": "2",
|
||||
"vocal_language": "en",
|
||||
"inference_steps": 8
|
||||
},
|
||||
"schema": {
|
||||
"properties": {
|
||||
"caption": {
|
||||
"type": "string",
|
||||
"description": "A short description of the song to create"
|
||||
},
|
||||
"lyrics": {
|
||||
"type": "string",
|
||||
"description": "The full lyrics of the song to generate"
|
||||
},
|
||||
"bpm": {
|
||||
"type": "number",
|
||||
"description": "The song Beats Per Minute"
|
||||
},
|
||||
"duration": {
|
||||
"type": "number",
|
||||
"description": "The length of the song, in seconds."
|
||||
},
|
||||
"keyscale": {
|
||||
"type": "string",
|
||||
"description": "The musical key of the song."
|
||||
},
|
||||
"timesignature": {
|
||||
"type": "string",
|
||||
"description": "The musical time signature of the song."
|
||||
},
|
||||
"vocal_language": {
|
||||
"type": "string",
|
||||
"description": "The language of the song lyrics."
|
||||
},
|
||||
"inference_steps": {
|
||||
"type": "number",
|
||||
"description": "How many diffusion steps to use."
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"tags": [
|
||||
"api/extra"
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"content": {
|
||||
"audio/wav": {
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"format": "binary"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Successful request"
|
||||
}
|
||||
},
|
||||
}
|
||||
},
|
||||
"/api/extra/json_to_grammar": {
|
||||
"post": {
|
||||
"summary": "Converts a provided JSON schema into GBNF grammar.",
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ body{
|
|||
color:var(--text);
|
||||
}
|
||||
header{
|
||||
padding:16px 20px;
|
||||
padding:16px 12px;
|
||||
font-size:20px;
|
||||
font-weight:600;
|
||||
background:rgba(0,0,0,0.3);
|
||||
|
|
@ -32,8 +32,8 @@ header{
|
|||
.wrapper{
|
||||
display:grid;
|
||||
grid-template-columns:minmax(340px,500px) 1fr;
|
||||
gap:20px;
|
||||
padding:20px;
|
||||
gap:14px;
|
||||
padding:10px;
|
||||
}
|
||||
@media(max-width:1100px){
|
||||
.wrapper{grid-template-columns:1fr;}
|
||||
|
|
@ -45,7 +45,7 @@ header{
|
|||
box-shadow:0 10px 40px rgba(0,0,0,.4);
|
||||
}
|
||||
h2{
|
||||
margin:0 0 14px 0;
|
||||
margin:0 0 10px 0;
|
||||
font-size:16px;
|
||||
color:var(--accent2);
|
||||
}
|
||||
|
|
@ -112,7 +112,7 @@ button{
|
|||
audio{width:100%;margin-top:6px;}
|
||||
.advanced-toggle{
|
||||
margin-top:8px;
|
||||
font-size:12px;
|
||||
font-size:14px;
|
||||
cursor:pointer;
|
||||
color:var(--accent2);
|
||||
}
|
||||
|
|
@ -174,11 +174,11 @@ input[type="checkbox"] {
|
|||
<h2>Song Setup</h2>
|
||||
|
||||
<label>Caption</label>
|
||||
<input id="caption">
|
||||
<input id="caption" placeholder="Describe the song">
|
||||
|
||||
<div style="margin-top:10px">
|
||||
<label>Lyrics</label>
|
||||
<textarea id="lyrics"></textarea>
|
||||
<textarea id="lyrics" placeholder="Enter song lyrics, or press 'Plan' to generate them."></textarea>
|
||||
</div>
|
||||
|
||||
<div class="form-grid" style="margin-top:12px">
|
||||
|
|
@ -217,7 +217,7 @@ input[type="checkbox"] {
|
|||
|
||||
<div class="actions" id="actionContainer">
|
||||
<div id="normalActions" style="display:flex; gap:10px; flex-wrap:wrap;">
|
||||
<button class="secondary" onclick="planSong()">Plan</button>
|
||||
<button class="primary" onclick="planSong()">Plan</button>
|
||||
<button class="primary" onclick="generateSong()">Generate</button>
|
||||
<button class="danger" onclick="clearFields()">Clear</button>
|
||||
<button onclick="exportPlan()">Export JSON</button>
|
||||
|
|
@ -229,6 +229,10 @@ input[type="checkbox"] {
|
|||
|
||||
<input type="file" id="importFile" hidden accept="application/json" onchange="importPlan(event)">
|
||||
</div>
|
||||
<div>
|
||||
<p style="font-size:14px">Click 'Plan' first to generate lyrics, BPM and duration. Edit as needed.
|
||||
<br>When satisfied, click 'Generate' to make the music</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
|
|
|
|||
|
|
@ -127,6 +127,7 @@ static int debugmode = 0; //-1 = hide all, 0 = normal, 1 = showall
|
|||
static bool is_quiet = false;
|
||||
static std::vector<gpt_vocab::id> last_n_tokens;
|
||||
static std::vector<gpt_vocab::id> current_context_tokens;
|
||||
static std::vector<float> loaded_latest_logits; //do not use normally, this is only required when loading state happens and we need to override logits
|
||||
static size_t mem_per_token = 0;
|
||||
static std::vector<float> logits;
|
||||
static std::vector<int> smartcontext;
|
||||
|
|
@ -4668,12 +4669,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
}
|
||||
while(logits_sampled<logits_to_sample && remaining_tokens>0 && !abort_draft && !early_abort)
|
||||
{
|
||||
if(!firstdecodedone && current_context_tokens.size()>0)
|
||||
{
|
||||
embd.clear();
|
||||
embd.push_back(current_context_tokens[current_context_tokens.size()-1]);
|
||||
break;
|
||||
}
|
||||
if(logits_sampled>0)
|
||||
{
|
||||
//this is not the first loop, so we need to increment some things
|
||||
|
|
@ -4708,6 +4703,28 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
lowestLogit = LowestLogit(logits);
|
||||
}
|
||||
|
||||
if(!firstdecodedone && current_context_tokens.size()>0)
|
||||
{
|
||||
if(loaded_latest_logits.size()>0)
|
||||
{
|
||||
if(debugmode==1 && !is_quiet)
|
||||
{
|
||||
printf("\nLoading %d saved logits...\n",loaded_latest_logits.size());
|
||||
}
|
||||
//first decode was not done. this can happen when reloading from a perfectly matched state.
|
||||
//to prevent a catastrophic failure, we must prepare emergency logits for usage
|
||||
logitsPtr = loaded_latest_logits.data();
|
||||
lowestLogit = LowestLogit(logitsPtr,n_vocab);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("\nNo cached logits and we need them, emergency fallback with degraded quality...\n");
|
||||
embd.clear();
|
||||
embd.push_back(current_context_tokens[current_context_tokens.size()-1]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//if adaptive p sampling is used, we need to cache the original probabilities
|
||||
std::vector<llama_token_data> original_candidates;
|
||||
if(adaptive_target > 0.0f)
|
||||
|
|
@ -5237,6 +5254,7 @@ size_t gpttype_save_state_kv(int slot)
|
|||
savestates[slot].current_savestate_buffer.clear();
|
||||
savestates[slot].current_draft_savestate_buffer.clear();
|
||||
savestates[slot].savestate_context_tokens.clear();
|
||||
savestates[slot].latest_logits.clear();
|
||||
savestates[slot].current_savestate_size = 0;
|
||||
savestates[slot].current_draft_savestate_size = 0;
|
||||
savestates[slot].media_signature = "";
|
||||
|
|
@ -5258,6 +5276,8 @@ size_t gpttype_save_state_kv(int slot)
|
|||
savestates[slot].current_savestate_size = newsize;
|
||||
savestates[slot].savestate_context_tokens = current_context_tokens;
|
||||
savestates[slot].media_signature = media_composite_image_signature;
|
||||
float * lgptr = llama_get_logits(llama_ctx_v4);
|
||||
savestates[slot].latest_logits.assign(lgptr,lgptr+n_vocab);
|
||||
int maxedpos = llama_memory_seq_pos_max(llama_get_memory(llama_ctx_v4),0);
|
||||
//kcpp: so maxedpos appears to always be equal to ctx tokens - 2, if savestate_ctx_tokens > maxedpos + 2 then trim excess
|
||||
if(maxedpos > 0 && savestates[slot].savestate_context_tokens.size() > maxedpos + 2)
|
||||
|
|
@ -5316,6 +5336,7 @@ bool gpttype_load_state_kv(int slot)
|
|||
if(res > 0)
|
||||
{
|
||||
current_context_tokens = savestates[slot].savestate_context_tokens;
|
||||
loaded_latest_logits = savestates[slot].latest_logits;
|
||||
printf("\nKV Load SaveState %d: Restored KV with %zu tokens.\n", slot,current_context_tokens.size());
|
||||
if(draft_ctx && savestates[slot].current_draft_savestate_size>0)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -537,6 +537,7 @@ struct savestate_data
|
|||
size_t current_draft_savestate_size = 0;
|
||||
std::vector<uint8_t> current_draft_savestate_buffer;
|
||||
std::vector<gpt_vocab::id> savestate_context_tokens; //for context clones
|
||||
std::vector<float> latest_logits;
|
||||
int64_t last_used = 0; //unix timestamp, updated on save or load
|
||||
std::string media_signature = "";
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue