exploration of alternative wavtokenizer

This commit is contained in:
Concedo 2025-01-20 23:02:50 +08:00
parent 02d5bb5b05
commit 2c0239fcf2
2 changed files with 4 additions and 4 deletions

View file

@ -154,7 +154,7 @@ config = {
"architectures": [ "architectures": [
"WavTokenizerDec" "WavTokenizerDec"
], ],
"hidden_size": 1282, "hidden_size": 1282, # or 2402 for 40t/s
"n_embd_features": 512, "n_embd_features": 512,
"n_ff": 2304, "n_ff": 2304,
"vocab_size": 4096, "vocab_size": 4096,

View file

@ -152,9 +152,9 @@ static std::vector<float> embd_to_audio(
const int n_codes, const int n_codes,
const int n_embd, const int n_embd,
const int n_thread) { const int n_thread) {
const int n_fft = 1280; const int n_hop = 600;
const int n_hop = 320; const int n_fft = n_hop*4; //its 1280 at 320, or 2400 at 600
const int n_win = 1280; const int n_win = n_hop*4;
const int n_pad = (n_win - n_hop)/2; const int n_pad = (n_win - n_hop)/2;
const int n_out = (n_codes - 1)*n_hop + n_win; const int n_out = (n_codes - 1)*n_hop + n_win;