exploration of alternative wavtokenizer

This commit is contained in:
Concedo 2025-01-20 23:02:50 +08:00
parent 02d5bb5b05
commit 2c0239fcf2
2 changed files with 4 additions and 4 deletions

View file

@ -154,7 +154,7 @@ config = {
"architectures": [
"WavTokenizerDec"
],
"hidden_size": 1282,
"hidden_size": 1282, # or 2402 for 40t/s
"n_embd_features": 512,
"n_ff": 2304,
"vocab_size": 4096,

View file

@ -152,9 +152,9 @@ static std::vector<float> embd_to_audio(
const int n_codes,
const int n_embd,
const int n_thread) {
const int n_fft = 1280;
const int n_hop = 320;
const int n_win = 1280;
const int n_hop = 600;
const int n_fft = n_hop*4; //its 1280 at 320, or 2400 at 600
const int n_win = n_hop*4;
const int n_pad = (n_win - n_hop)/2;
const int n_out = (n_codes - 1)*n_hop + n_win;