diff --git a/examples/outetts/readme.md b/examples/outetts/readme.md
new file mode 100644
index 000000000..45b9d63cd
--- /dev/null
+++ b/examples/outetts/readme.md
@@ -0,0 +1,23 @@
+## KoboldCpp now supports OuteTTS Voice Cloning
+
+However, it can be slightly challenging to set it up.
+- The easiest way to get started is to use already pre-cloned voices, which you can find within the [speakers directory](speakers).
+- Simply download the .json speaker file, open KoboldCpp with an OuteTTS model and WavTokenizer loaded.
+- Paste the voice JSON into Kobold Lite UI's Settings > Media > TTS > voiceclone option.
+
+
+
+
+- If you don't know how to get the OuteTTS and WavTokenizer models, [please read the wiki](https://github.com/LostRuins/koboldcpp/wiki#getting-an-ai-model-file).
+- Once configured, you are good to go. Narrations will be done with that selected cloned speaker.
+
+### Cloning your own speaker
+You can also create your own cloned speaker voice.
+To do that, you will need Python 3.10+ with the `outetts` package installed. You can install it with this commands:
+```
+pip install outetts --no-deps
+pip install scipy einops pyyaml huggingface-hub encodec matplotlib transformers pytorch-lightning tensorboardX soundfile numpy jsonargparse torchcrepe librosa inflect loguru polars natsort tqdm sounddevice mecab-python3 unidic-lite uroman openai-whisper pygame
+```
+Then after that, obtain a wav file containing a sample of the voice you want to clone.
+Modify the [voice_cloning.py](voice_cloning.py) script with your input audio file, and let it generate the JSON.
+Finally, use the JSON file in the same way as the above with the precloned voices.
\ No newline at end of file
diff --git a/examples/outetts/speakers/david_attenborough.json b/examples/outetts/speakers/david_attenborough.json
new file mode 100644
index 000000000..b6e4880b7
--- /dev/null
+++ b/examples/outetts/speakers/david_attenborough.json
@@ -0,0 +1,1093 @@
+{
+ "text": "to a surprising degree nearly all the major events in this history can be told using living animals to represent the ancestral creatures which were the actual protagonists",
+ "words": [
+ {
+ "word": "to",
+ "duration": 0.19,
+ "codes": [
+ 561,
+ 585,
+ 1813,
+ 852,
+ 285,
+ 23,
+ 1408,
+ 366,
+ 231,
+ 759,
+ 51,
+ 1593,
+ 1332,
+ 1233
+ ]
+ },
+ {
+ "word": "a",
+ "duration": 0.25,
+ "codes": [
+ 113,
+ 505,
+ 787,
+ 343,
+ 29,
+ 761,
+ 1588,
+ 1368,
+ 1415,
+ 1663,
+ 58,
+ 1735,
+ 504,
+ 567,
+ 740,
+ 1213,
+ 1358,
+ 1666,
+ 966
+ ]
+ },
+ {
+ "word": "surprising",
+ "duration": 0.64,
+ "codes": [
+ 146,
+ 807,
+ 286,
+ 471,
+ 1441,
+ 245,
+ 767,
+ 1337,
+ 596,
+ 1310,
+ 1526,
+ 537,
+ 1026,
+ 1612,
+ 1628,
+ 1676,
+ 1040,
+ 1511,
+ 1761,
+ 1005,
+ 127,
+ 1420,
+ 1795,
+ 755,
+ 1152,
+ 1263,
+ 1097,
+ 1483,
+ 1047,
+ 687,
+ 1404,
+ 809,
+ 842,
+ 1247,
+ 1831,
+ 556,
+ 1752,
+ 328,
+ 796,
+ 1800,
+ 1785,
+ 546,
+ 1022,
+ 1507,
+ 328,
+ 1409,
+ 920,
+ 1048
+ ]
+ },
+ {
+ "word": "degree",
+ "duration": 0.55,
+ "codes": [
+ 1319,
+ 1176,
+ 150,
+ 505,
+ 1772,
+ 769,
+ 1281,
+ 865,
+ 319,
+ 1493,
+ 1527,
+ 1440,
+ 1689,
+ 345,
+ 1236,
+ 866,
+ 1319,
+ 1037,
+ 1153,
+ 1644,
+ 347,
+ 1794,
+ 901,
+ 127,
+ 1515,
+ 514,
+ 568,
+ 788,
+ 716,
+ 1343,
+ 319,
+ 1545,
+ 428,
+ 1251,
+ 197,
+ 1451,
+ 1813,
+ 1527,
+ 1360,
+ 1755,
+ 1580
+ ]
+ },
+ {
+ "word": "nearly",
+ "duration": 0.88,
+ "codes": [
+ 585,
+ 1759,
+ 34,
+ 1525,
+ 1563,
+ 1543,
+ 1680,
+ 731,
+ 1465,
+ 1756,
+ 1580,
+ 681,
+ 416,
+ 1338,
+ 1806,
+ 1379,
+ 762,
+ 1489,
+ 423,
+ 241,
+ 559,
+ 68,
+ 1705,
+ 964,
+ 331,
+ 495,
+ 762,
+ 627,
+ 1665,
+ 612,
+ 1754,
+ 866,
+ 538,
+ 1730,
+ 168,
+ 555,
+ 1359,
+ 612,
+ 686,
+ 288,
+ 227,
+ 1242,
+ 765,
+ 258,
+ 464,
+ 1789,
+ 738,
+ 540,
+ 50,
+ 1604,
+ 1452,
+ 1296,
+ 952,
+ 1518,
+ 1563,
+ 1243,
+ 1732,
+ 1722,
+ 62,
+ 1535,
+ 1530,
+ 319,
+ 184,
+ 92,
+ 286,
+ 1534
+ ]
+ },
+ {
+ "word": "all",
+ "duration": 0.41,
+ "codes": [
+ 1646,
+ 968,
+ 1037,
+ 1279,
+ 585,
+ 54,
+ 417,
+ 288,
+ 44,
+ 1274,
+ 1218,
+ 1558,
+ 321,
+ 532,
+ 1461,
+ 1296,
+ 492,
+ 1514,
+ 110,
+ 1060,
+ 1710,
+ 545,
+ 1570,
+ 107,
+ 1102,
+ 1536,
+ 700,
+ 545,
+ 377,
+ 1460,
+ 718
+ ]
+ },
+ {
+ "word": "the",
+ "duration": 0.16,
+ "codes": [
+ 1233,
+ 1354,
+ 361,
+ 1409,
+ 1769,
+ 1597,
+ 1516,
+ 653,
+ 1304,
+ 931,
+ 1532,
+ 1217
+ ]
+ },
+ {
+ "word": "major",
+ "duration": 0.39,
+ "codes": [
+ 935,
+ 1386,
+ 1345,
+ 78,
+ 1240,
+ 853,
+ 579,
+ 926,
+ 430,
+ 1014,
+ 440,
+ 1295,
+ 1711,
+ 1368,
+ 1061,
+ 807,
+ 1740,
+ 1362,
+ 1440,
+ 1500,
+ 980,
+ 1313,
+ 858,
+ 307,
+ 966,
+ 333,
+ 662,
+ 813,
+ 1416
+ ]
+ },
+ {
+ "word": "events",
+ "duration": 0.48,
+ "codes": [
+ 1267,
+ 1638,
+ 1585,
+ 1079,
+ 1215,
+ 1722,
+ 1351,
+ 1312,
+ 1263,
+ 1470,
+ 1773,
+ 1811,
+ 1612,
+ 1773,
+ 1758,
+ 512,
+ 1344,
+ 688,
+ 1078,
+ 302,
+ 1393,
+ 608,
+ 1554,
+ 831,
+ 1791,
+ 1820,
+ 1815,
+ 1558,
+ 804,
+ 686,
+ 995,
+ 1821,
+ 1034,
+ 1621,
+ 870,
+ 1034
+ ]
+ },
+ {
+ "word": "in",
+ "duration": 0.13,
+ "codes": [
+ 1797,
+ 178,
+ 1824,
+ 1257,
+ 1353,
+ 1810,
+ 1733,
+ 0,
+ 1768,
+ 1729
+ ]
+ },
+ {
+ "word": "this",
+ "duration": 0.2,
+ "codes": [
+ 1554,
+ 593,
+ 501,
+ 1679,
+ 1323,
+ 1565,
+ 1027,
+ 1416,
+ 679,
+ 909,
+ 1685,
+ 947,
+ 1744,
+ 471,
+ 1410
+ ]
+ },
+ {
+ "word": "history",
+ "duration": 0.4,
+ "codes": [
+ 1496,
+ 979,
+ 1681,
+ 1801,
+ 1656,
+ 895,
+ 1353,
+ 1444,
+ 719,
+ 1721,
+ 1340,
+ 796,
+ 1532,
+ 471,
+ 989,
+ 1669,
+ 1364,
+ 1016,
+ 1397,
+ 989,
+ 1667,
+ 1074,
+ 1700,
+ 1060,
+ 1215,
+ 917,
+ 1738,
+ 1542,
+ 1525,
+ 1078
+ ]
+ },
+ {
+ "word": "can",
+ "duration": 0.48,
+ "codes": [
+ 1301,
+ 750,
+ 1750,
+ 1068,
+ 1681,
+ 694,
+ 761,
+ 257,
+ 235,
+ 458,
+ 976,
+ 538,
+ 1486,
+ 1754,
+ 469,
+ 1360,
+ 509,
+ 538,
+ 694,
+ 559,
+ 851,
+ 445,
+ 21,
+ 20,
+ 187,
+ 288,
+ 457,
+ 1797,
+ 1027,
+ 1753,
+ 1002,
+ 996,
+ 480,
+ 1008,
+ 1659,
+ 1545
+ ]
+ },
+ {
+ "word": "be",
+ "duration": 0.11,
+ "codes": [
+ 1734,
+ 1684,
+ 264,
+ 1637,
+ 1078,
+ 1053,
+ 1445,
+ 629
+ ]
+ },
+ {
+ "word": "told",
+ "duration": 0.48,
+ "codes": [
+ 634,
+ 1512,
+ 559,
+ 551,
+ 603,
+ 639,
+ 570,
+ 117,
+ 599,
+ 1663,
+ 1280,
+ 658,
+ 427,
+ 679,
+ 1161,
+ 600,
+ 917,
+ 98,
+ 1506,
+ 1633,
+ 1660,
+ 1700,
+ 877,
+ 615,
+ 98,
+ 310,
+ 143,
+ 1706,
+ 1326,
+ 1439,
+ 1731,
+ 1410,
+ 1014,
+ 444,
+ 1016,
+ 1759
+ ]
+ },
+ {
+ "word": "using",
+ "duration": 0.73,
+ "codes": [
+ 1259,
+ 979,
+ 1381,
+ 1205,
+ 257,
+ 464,
+ 1011,
+ 824,
+ 1108,
+ 1230,
+ 1163,
+ 1429,
+ 1823,
+ 1096,
+ 1432,
+ 1479,
+ 751,
+ 1495,
+ 673,
+ 1705,
+ 1665,
+ 624,
+ 1322,
+ 561,
+ 445,
+ 551,
+ 1791,
+ 1545,
+ 198,
+ 1530,
+ 1698,
+ 11,
+ 1715,
+ 1019,
+ 202,
+ 875,
+ 844,
+ 1249,
+ 882,
+ 1718,
+ 1445,
+ 889,
+ 1035,
+ 858,
+ 1700,
+ 556,
+ 1093,
+ 356,
+ 1715,
+ 1008,
+ 1518,
+ 1528,
+ 1804,
+ 1737,
+ 629
+ ]
+ },
+ {
+ "word": "living",
+ "duration": 0.48,
+ "codes": [
+ 1756,
+ 1715,
+ 1501,
+ 1798,
+ 1362,
+ 1211,
+ 1798,
+ 1776,
+ 1323,
+ 1521,
+ 1518,
+ 1463,
+ 1605,
+ 1009,
+ 1661,
+ 127,
+ 1472,
+ 69,
+ 1460,
+ 158,
+ 1697,
+ 1777,
+ 1465,
+ 233,
+ 1778,
+ 1722,
+ 1217,
+ 395,
+ 1332,
+ 344,
+ 372,
+ 1357,
+ 143,
+ 546,
+ 22,
+ 123
+ ]
+ },
+ {
+ "word": "animals",
+ "duration": 0.6,
+ "codes": [
+ 1787,
+ 605,
+ 813,
+ 1682,
+ 1110,
+ 1768,
+ 797,
+ 1393,
+ 1326,
+ 1126,
+ 1775,
+ 1437,
+ 975,
+ 1312,
+ 1751,
+ 892,
+ 1697,
+ 1342,
+ 804,
+ 1634,
+ 1019,
+ 1440,
+ 1644,
+ 1490,
+ 1463,
+ 1321,
+ 1321,
+ 1696,
+ 1620,
+ 662,
+ 1572,
+ 1704,
+ 1780,
+ 1729,
+ 1341,
+ 503,
+ 1019,
+ 1379,
+ 1742,
+ 617,
+ 1804,
+ 1537,
+ 736,
+ 833,
+ 1797
+ ]
+ },
+ {
+ "word": "to",
+ "duration": 0.27,
+ "codes": [
+ 1002,
+ 1349,
+ 1790,
+ 1147,
+ 1683,
+ 1385,
+ 820,
+ 567,
+ 668,
+ 551,
+ 670,
+ 1770,
+ 437,
+ 559,
+ 420,
+ 1178,
+ 24,
+ 1583,
+ 1423,
+ 903
+ ]
+ },
+ {
+ "word": "represent",
+ "duration": 0.67,
+ "codes": [
+ 385,
+ 1312,
+ 160,
+ 1734,
+ 1717,
+ 1768,
+ 1496,
+ 1449,
+ 803,
+ 1498,
+ 178,
+ 1326,
+ 225,
+ 1498,
+ 1269,
+ 859,
+ 1570,
+ 68,
+ 194,
+ 1684,
+ 886,
+ 886,
+ 1472,
+ 926,
+ 1439,
+ 1673,
+ 1445,
+ 802,
+ 1735,
+ 519,
+ 382,
+ 22,
+ 863,
+ 1083,
+ 1487,
+ 1511,
+ 1443,
+ 1036,
+ 1345,
+ 607,
+ 1785,
+ 1627,
+ 1598,
+ 1676,
+ 1219,
+ 1152,
+ 786,
+ 1288,
+ 544,
+ 17
+ ]
+ },
+ {
+ "word": "the",
+ "duration": 0.24,
+ "codes": [
+ 382,
+ 1088,
+ 1478,
+ 1813,
+ 1635,
+ 1333,
+ 1587,
+ 1792,
+ 1319,
+ 5,
+ 160,
+ 1681,
+ 368,
+ 1698,
+ 1394,
+ 952,
+ 836,
+ 175
+ ]
+ },
+ {
+ "word": "ancestral",
+ "duration": 0.69,
+ "codes": [
+ 1761,
+ 398,
+ 1640,
+ 1758,
+ 1532,
+ 1526,
+ 1399,
+ 1426,
+ 203,
+ 1526,
+ 947,
+ 473,
+ 783,
+ 1647,
+ 1757,
+ 835,
+ 1607,
+ 1388,
+ 1558,
+ 746,
+ 1748,
+ 372,
+ 1379,
+ 1088,
+ 581,
+ 1565,
+ 1382,
+ 1124,
+ 621,
+ 1161,
+ 1633,
+ 1008,
+ 1355,
+ 1711,
+ 1633,
+ 499,
+ 1104,
+ 581,
+ 980,
+ 795,
+ 1244,
+ 343,
+ 1170,
+ 1332,
+ 1666,
+ 1690,
+ 583,
+ 438,
+ 1019,
+ 1666,
+ 1348,
+ 1491
+ ]
+ },
+ {
+ "word": "creatures",
+ "duration": 0.55,
+ "codes": [
+ 1205,
+ 704,
+ 559,
+ 762,
+ 72,
+ 680,
+ 1397,
+ 1583,
+ 948,
+ 1440,
+ 973,
+ 890,
+ 1243,
+ 1278,
+ 1375,
+ 335,
+ 1249,
+ 1554,
+ 365,
+ 509,
+ 1465,
+ 1009,
+ 1611,
+ 522,
+ 919,
+ 719,
+ 1351,
+ 1392,
+ 1521,
+ 1563,
+ 55,
+ 1630,
+ 1349,
+ 1825,
+ 1345,
+ 361,
+ 1243,
+ 767,
+ 1422,
+ 848,
+ 318
+ ]
+ },
+ {
+ "word": "which",
+ "duration": 0.61,
+ "codes": [
+ 1677,
+ 927,
+ 1360,
+ 534,
+ 1448,
+ 1763,
+ 1668,
+ 1074,
+ 1741,
+ 1331,
+ 1278,
+ 1814,
+ 1412,
+ 1476,
+ 1820,
+ 1036,
+ 1830,
+ 1404,
+ 1411,
+ 1704,
+ 1355,
+ 1637,
+ 1809,
+ 1395,
+ 1779,
+ 1489,
+ 1393,
+ 1233,
+ 1391,
+ 1668,
+ 986,
+ 54,
+ 311,
+ 464,
+ 987,
+ 1635,
+ 844,
+ 576,
+ 850,
+ 870,
+ 925,
+ 1491,
+ 889,
+ 1583,
+ 595,
+ 292
+ ]
+ },
+ {
+ "word": "were",
+ "duration": 0.16,
+ "codes": [
+ 1649,
+ 1005,
+ 1019,
+ 1502,
+ 482,
+ 1056,
+ 1287,
+ 545,
+ 1423,
+ 1492,
+ 1483,
+ 1603
+ ]
+ },
+ {
+ "word": "the",
+ "duration": 0.11,
+ "codes": [
+ 332,
+ 1659,
+ 767,
+ 1415,
+ 856,
+ 92,
+ 1368,
+ 848
+ ]
+ },
+ {
+ "word": "actual",
+ "duration": 0.41,
+ "codes": [
+ 1061,
+ 1141,
+ 1235,
+ 721,
+ 1309,
+ 800,
+ 1469,
+ 1086,
+ 1601,
+ 1407,
+ 987,
+ 1170,
+ 1772,
+ 32,
+ 930,
+ 257,
+ 1404,
+ 947,
+ 1677,
+ 535,
+ 782,
+ 1701,
+ 1466,
+ 1069,
+ 1388,
+ 1372,
+ 1572,
+ 614,
+ 1675,
+ 861,
+ 1537
+ ]
+ },
+ {
+ "word": "protagonists",
+ "duration": 0.99,
+ "codes": [
+ 501,
+ 641,
+ 765,
+ 1525,
+ 711,
+ 1247,
+ 1736,
+ 1675,
+ 1008,
+ 1174,
+ 1717,
+ 1629,
+ 888,
+ 704,
+ 1233,
+ 715,
+ 1036,
+ 1494,
+ 1511,
+ 587,
+ 935,
+ 1229,
+ 901,
+ 1258,
+ 1110,
+ 1332,
+ 1758,
+ 50,
+ 1575,
+ 702,
+ 608,
+ 1634,
+ 1370,
+ 1280,
+ 1707,
+ 775,
+ 1787,
+ 1570,
+ 1219,
+ 1805,
+ 1775,
+ 1492,
+ 0,
+ 1497,
+ 1500,
+ 1833,
+ 1473,
+ 1459,
+ 1652,
+ 1364,
+ 715,
+ 1817,
+ 656,
+ 1355,
+ 1439,
+ 382,
+ 1632,
+ 870,
+ 1329,
+ 1669,
+ 1139,
+ 1322,
+ 1568,
+ 1053,
+ 325,
+ 1139,
+ 793,
+ 1763,
+ 767,
+ 976,
+ 1422,
+ 1329,
+ 1682,
+ 1410
+ ]
+ }
+ ],
+ "language": "en"
+}
\ No newline at end of file
diff --git a/examples/outetts/speakers/en_female_1.json b/examples/outetts/speakers/en_female_1.json
new file mode 100644
index 000000000..b4fe19459
--- /dev/null
+++ b/examples/outetts/speakers/en_female_1.json
@@ -0,0 +1,277 @@
+{
+ "text": " Uhm, now being the one to say, I know the worst of you and I've been directly affected by people like you, but it's a clean slate with me, buddy. You know, like that's really powerful in and of itself.",
+ "words": [
+ {
+ "word": "uhm",
+ "duration": 0.36,
+ "codes": [
+ 447, 223, 967, 301, 965, 827, 393, 908, 764, 1167, 711, 1222, 324, 1318, 806, 498, 1198, 1127, 1178,
+ 916, 1234, 1411, 1428, 706, 427, 1605, 1578
+ ]
+ },
+ {
+ "word": "now",
+ "duration": 0.36,
+ "codes": [
+ 1049, 327, 385, 1070, 732, 1480, 450, 1025, 1469, 174, 1013, 1710, 1674, 775, 771, 251, 778, 1400, 897,
+ 1487, 366, 441, 1000, 393, 271, 1000, 768
+ ]
+ },
+ {
+ "word": "being",
+ "duration": 0.27,
+ "codes": [
+ 926, 406, 1457, 437, 1231, 672, 1785, 521, 1179, 1559, 198, 1086, 733, 122, 1344, 845, 348, 1389, 470,
+ 1773
+ ]
+ },
+ {
+ "word": "the",
+ "duration": 0.08,
+ "codes": [1775, 562, 768, 1222, 768, 963]
+ },
+ {
+ "word": "one",
+ "duration": 0.21,
+ "codes": [1757, 744, 144, 1610, 655, 616, 1317, 225, 1325, 913, 1342, 992, 1018, 80, 1777, 883]
+ },
+ {
+ "word": "to",
+ "duration": 0.08,
+ "codes": [487, 1363, 1682, 1426, 655, 1483]
+ },
+ {
+ "word": "say",
+ "duration": 0.27,
+ "codes": [
+ 1644, 1804, 731, 273, 1592, 731, 1523, 1404, 984, 1207, 430, 1132, 1123, 768, 1116, 829, 1082, 1095,
+ 440, 1162
+ ]
+ },
+ {
+ "word": "i",
+ "duration": 0.33,
+ "codes": [
+ 1330, 335, 1162, 1155, 308, 1162, 1150, 1481, 612, 674, 712, 1745, 1188, 1787, 1135, 1275, 1237, 1143,
+ 408, 1063, 393, 927, 1298, 132, 1686
+ ]
+ },
+ {
+ "word": "know",
+ "duration": 0.27,
+ "codes": [
+ 983, 1677, 586, 1528, 1435, 835, 1396, 706, 987, 22, 1172, 218, 1404, 1001, 521, 1389, 775, 1416, 877,
+ 120
+ ]
+ },
+ {
+ "word": "the",
+ "duration": 0.16,
+ "codes": [916, 1756, 513, 1245, 1392, 89, 1266, 12, 1045, 1075, 904, 35]
+ },
+ {
+ "word": "worst",
+ "duration": 0.32,
+ "codes": [
+ 1607, 174, 1231, 144, 932, 490, 771, 1504, 798, 674, 364, 80, 1314, 1636, 449, 1704, 713, 1795, 968,
+ 1527, 1302, 1529, 1176, 795
+ ]
+ },
+ {
+ "word": "of",
+ "duration": 0.12,
+ "codes": [1193, 1205, 390, 1128, 1091, 883, 322, 377, 1070]
+ },
+ {
+ "word": "you",
+ "duration": 0.17,
+ "codes": [1016, 1332, 926, 281, 927, 1368, 1687, 918, 67, 1638, 1317, 1265, 1770]
+ },
+ {
+ "word": "and",
+ "duration": 0.28,
+ "codes": [
+ 1129, 1633, 1373, 1207, 405, 879, 1030, 1253, 1071, 612, 724, 1770, 665, 1046, 1351, 1450, 1541, 1384,
+ 111, 1477, 284
+ ]
+ },
+ {
+ "word": "ive",
+ "duration": 0.35,
+ "codes": [
+ 674, 266, 89, 1333, 1183, 1526, 1143, 883, 1135, 732, 827, 1119, 594, 1261, 1024, 1347, 92, 1392, 825,
+ 1710, 1289, 1598, 1070, 1525, 1442, 555
+ ]
+ },
+ {
+ "word": "been",
+ "duration": 0.17,
+ "codes": [1461, 194, 337, 1128, 188, 892, 848, 1280, 959, 754, 231, 649, 1304]
+ },
+ {
+ "word": "directly",
+ "duration": 0.87,
+ "codes": [
+ 1030, 353, 570, 1331, 470, 1832, 1362, 1809, 1383, 101, 325, 1557, 1242, 1512, 180, 227, 1242, 643, 209,
+ 464, 171, 1219, 174, 1723, 734, 118, 1269, 643, 209, 187, 612, 1231, 68, 567, 1242, 505, 319, 1268, 794,
+ 678, 40, 1286, 470, 1454, 199, 965, 188, 300, 1234, 1125, 794, 1289, 1224, 257, 469, 1121, 101, 823,
+ 1769, 1683, 95, 255, 59, 67, 832
+ ]
+ },
+ {
+ "word": "affected",
+ "duration": 0.44,
+ "codes": [
+ 510, 873, 787, 1228, 771, 1428, 501, 751, 696, 258, 845, 1818, 1112, 498, 1111, 985, 1073, 832, 1427,
+ 168, 163, 447, 119, 567, 1626, 1820, 903, 635, 1060, 10, 1632, 35, 1635
+ ]
+ },
+ {
+ "word": "by",
+ "duration": 0.19,
+ "codes": [144, 144, 460, 185, 1112, 1044, 498, 1192, 656, 1333, 1001, 1186, 1186, 454]
+ },
+ {
+ "word": "people",
+ "duration": 0.48,
+ "codes": [
+ 1260, 747, 351, 526, 612, 1151, 1262, 1791, 344, 1752, 1547, 930, 1302, 1703, 1289, 92, 1407, 1482, 508,
+ 1431, 355, 1696, 337, 199, 1157, 223, 464, 568, 845, 411, 826, 718, 1786, 545, 712, 580
+ ]
+ },
+ {
+ "word": "like",
+ "duration": 0.32,
+ "codes": [
+ 630, 532, 526, 607, 526, 839, 1305, 660, 459, 339, 717, 1178, 1148, 687, 149, 1390, 229, 199, 513, 712,
+ 1451, 731, 582, 1551
+ ]
+ },
+ {
+ "word": "you",
+ "duration": 0.21,
+ "codes": [1389, 954, 1781, 1047, 1236, 930, 809, 1621, 1268, 384, 242, 587, 869, 816, 1680, 405]
+ },
+ {
+ "word": "but",
+ "duration": 0.59,
+ "codes": [
+ 1089, 1590, 908, 80, 594, 1046, 1706, 1025, 1150, 405, 548, 893, 1285, 464, 301, 939, 643, 23, 285, 161,
+ 209, 453, 72, 167, 417, 244, 151, 643, 391, 199, 651, 1023, 337, 1010, 54, 331, 1167, 756, 388, 934,
+ 1060, 18, 1624, 1060
+ ]
+ },
+ {
+ "word": "its",
+ "duration": 0.16,
+ "codes": [1102, 183, 1199, 1258, 1285, 35, 659, 180, 426, 1587, 1733, 942]
+ },
+ {
+ "word": "a",
+ "duration": 0.04,
+ "codes": [791, 1012, 818]
+ },
+ {
+ "word": "clean",
+ "duration": 0.61,
+ "codes": [
+ 1819, 976, 163, 447, 316, 223, 763, 457, 1208, 1808, 1697, 1162, 1660, 1833, 1054, 1734, 1121, 1309,
+ 1643, 924, 1677, 1548, 869, 1268, 223, 674, 111, 792, 1670, 912, 174, 1554, 90, 80, 1563, 1621, 1698,
+ 1544, 992, 988, 175, 793, 1661, 1026, 80, 1761
+ ]
+ },
+ {
+ "word": "slate",
+ "duration": 0.4,
+ "codes": [
+ 1802, 322, 1689, 1577, 1302, 1552, 1529, 1722, 1580, 582, 1642, 1529, 1020, 582, 1538, 970, 437, 1141,
+ 1477, 988, 335, 1611, 922, 1558, 1120, 1189, 423, 188, 171, 562
+ ]
+ },
+ {
+ "word": "with",
+ "duration": 0.15,
+ "codes": [963, 1347, 1274, 747, 1230, 712, 1408, 1290, 957, 1279, 258]
+ },
+ {
+ "word": "me",
+ "duration": 0.09,
+ "codes": [638, 1058, 174, 1452, 1038, 894, 1571]
+ },
+ {
+ "word": "buddy",
+ "duration": 0.32,
+ "codes": [
+ 1003, 130, 1341, 938, 40, 804, 167, 89, 1456, 1189, 1155, 1171, 1434, 1077, 1029, 1455, 1622, 1037, 163,
+ 1411, 1165, 1463, 837, 1202
+ ]
+ },
+ {
+ "word": "you",
+ "duration": 0.36,
+ "codes": [
+ 1354, 1165, 615, 1588, 1192, 1445, 1033, 982, 401, 1079, 684, 1570, 266, 31, 420, 163, 893, 845, 905,
+ 1827, 1804, 153, 627, 243, 1179, 298, 1147
+ ]
+ },
+ {
+ "word": "know",
+ "duration": 0.19,
+ "codes": [163, 1542, 1366, 698, 1753, 206, 916, 1499, 245, 665, 600, 894, 587, 1741]
+ },
+ {
+ "word": "like",
+ "duration": 0.24,
+ "codes": [1106, 1280, 1062, 1304, 945, 809, 598, 104, 1001, 822, 965, 189, 693, 1810, 1293, 199, 1277, 44]
+ },
+ {
+ "word": "thats",
+ "duration": 0.24,
+ "codes": [
+ 121, 1789, 1443, 370, 1154, 393, 1178, 1200, 1264, 424, 1391, 381, 978, 1346, 704, 1808, 1579, 1492
+ ]
+ },
+ {
+ "word": "really",
+ "duration": 0.56,
+ "codes": [
+ 1177, 1761, 1723, 1360, 1413, 830, 551, 193, 59, 332, 598, 734, 1684, 1802, 60, 1590, 353, 89, 1636,
+ 1396, 893, 143, 455, 1501, 435, 1082, 621, 1593, 677, 474, 971, 1513, 913, 828, 1381, 1148, 1798, 1186,
+ 1443, 38, 335, 883
+ ]
+ },
+ {
+ "word": "powerful",
+ "duration": 0.63,
+ "codes": [
+ 1773, 458, 1070, 964, 826, 1220, 1012, 1738, 1125, 669, 490, 1169, 922, 958, 1204, 489, 1001, 886, 1045,
+ 675, 1471, 1652, 732, 698, 1124, 480, 897, 1484, 1028, 35, 594, 1465, 505, 1669, 436, 851, 1288, 31,
+ 1501, 1187, 394, 909, 1541, 1793, 1720, 922, 840
+ ]
+ },
+ {
+ "word": "in",
+ "duration": 0.16,
+ "codes": [1317, 523, 630, 1343, 1187, 719, 907, 636, 111, 1524, 188, 1382]
+ },
+ {
+ "word": "and",
+ "duration": 0.13,
+ "codes": [1074, 922, 1280, 1496, 1050, 832, 133, 1435, 1049, 1774]
+ },
+ {
+ "word": "of",
+ "duration": 0.12,
+ "codes": [960, 1052, 1192, 1303, 1112, 970, 417, 60, 1155]
+ },
+ {
+ "word": "itself",
+ "duration": 0.47,
+ "codes": [
+ 1682, 1209, 1410, 513, 1222, 861, 167, 406, 1551, 582, 634, 1529, 786, 1363, 1578, 1739, 873, 424, 1041,
+ 1328, 955, 1110, 1490, 1424, 1199, 988, 1162, 1133, 1193, 978, 470, 832, 963, 1251, 733
+ ]
+ }
+ ],
+ "language": "en"
+}
diff --git a/examples/outetts/speakers/en_female_2.json b/examples/outetts/speakers/en_female_2.json
new file mode 100644
index 000000000..db67aa1c8
--- /dev/null
+++ b/examples/outetts/speakers/en_female_2.json
@@ -0,0 +1,210 @@
+{
+ "text": "So we have five words here, um, all to do with a plumber and water pipes. Now the first word they talked about that there was water leaking everywhere.",
+ "words": [
+ {
+ "word": "so",
+ "duration": 0.15,
+ "codes": [391, 1319, 1478, 895, 1580, 533, 166, 1015, 1169, 1186, 380]
+ },
+ {
+ "word": "we",
+ "duration": 0.12,
+ "codes": [1403, 1150, 80, 1187, 1230, 529, 182, 398, 853]
+ },
+ {
+ "word": "have",
+ "duration": 0.17,
+ "codes": [243, 1049, 759, 1477, 371, 1158, 422, 1516, 203, 449, 1351, 1546, 1130]
+ },
+ {
+ "word": "five",
+ "duration": 0.51,
+ "codes": [
+ 1241, 719, 239, 886, 1795, 1160, 1412, 1638, 943, 1588, 497, 1162, 106, 1185, 1183, 477, 840, 164, 581,
+ 775, 474, 518, 361, 764, 628, 153, 234, 125, 584, 1029, 149, 276, 1243, 1800, 1468, 1728, 1439, 1772
+ ]
+ },
+ {
+ "word": "words",
+ "duration": 0.45,
+ "codes": [
+ 1455, 1366, 1734, 920, 703, 111, 1497, 1521, 302, 111, 128, 747, 687, 747, 104, 66, 208, 1727, 545, 69,
+ 684, 380, 1666, 1368, 183, 145, 1771, 1613, 1807, 1238, 1549, 1393, 1027, 736
+ ]
+ },
+ {
+ "word": "here",
+ "duration": 0.36,
+ "codes": [
+ 117, 399, 1325, 1447, 1226, 1761, 778, 1120, 1764, 47, 348, 1416, 921, 50, 372, 437, 104, 771, 218, 982,
+ 162, 754, 997, 162, 912, 907, 245
+ ]
+ },
+ {
+ "word": "um",
+ "duration": 0.39,
+ "codes": [
+ 1234, 511, 81, 1270, 1475, 841, 595, 64, 1268, 1461, 680, 1320, 573, 818, 764, 1068, 637, 684, 402,
+ 1222, 877, 537, 1103, 779, 300, 1629, 177, 529, 1672
+ ]
+ },
+ {
+ "word": "all",
+ "duration": 0.41,
+ "codes": [
+ 653, 1811, 131, 516, 1525, 727, 1338, 1749, 758, 1097, 173, 116, 102, 660, 693, 1177, 159, 799, 221,
+ 1187, 984, 152, 95, 942, 1046, 236, 756, 1042, 1159, 1159, 1297
+ ]
+ },
+ {
+ "word": "to",
+ "duration": 0.24,
+ "codes": [577, 1542, 1356, 1122, 58, 47, 1759, 566, 1478, 1814, 599, 276, 1403, 62, 822, 1568, 95, 691]
+ },
+ {
+ "word": "do",
+ "duration": 0.23,
+ "codes": [65, 1619, 1606, 1584, 251, 85, 1802, 1250, 350, 714, 1814, 631, 510, 1463, 307, 202, 50]
+ },
+ {
+ "word": "with",
+ "duration": 0.45,
+ "codes": [
+ 853, 893, 213, 4, 143, 120, 1141, 38, 1637, 182, 389, 747, 890, 1637, 1618, 843, 145, 1445, 381, 234,
+ 1274, 228, 1484, 374, 245, 1131, 254, 1062, 844, 1635, 1491, 1795, 345, 891
+ ]
+ },
+ {
+ "word": "a",
+ "duration": 0.19,
+ "codes": [1693, 1462, 874, 1511, 1408, 1284, 450, 274, 1466, 1682, 1173, 1062, 1073, 1675]
+ },
+ {
+ "word": "plumber",
+ "duration": 0.6,
+ "codes": [
+ 935, 1389, 1178, 1803, 1006, 1419, 1614, 1495, 1711, 1779, 1423, 874, 1407, 1626, 1351, 1748, 1145,
+ 1768, 555, 1389, 252, 1274, 1356, 717, 2, 1147, 1076, 1333, 234, 23, 1453, 704, 143, 1243, 1174, 1483,
+ 1247, 157, 37, 393, 988, 1141, 206, 1052, 818
+ ]
+ },
+ {
+ "word": "and",
+ "duration": 0.43,
+ "codes": [
+ 329, 1801, 926, 756, 1469, 1040, 1313, 516, 1166, 1129, 1369, 263, 1144, 1026, 318, 1809, 803, 364,
+ 1646, 383, 1074, 109, 113, 775, 1415, 863, 1352, 1369, 1796, 1385, 1765, 1618
+ ]
+ },
+ {
+ "word": "water",
+ "duration": 0.48,
+ "codes": [
+ 315, 1075, 1675, 86, 970, 679, 1491, 1635, 1833, 60, 1619, 1595, 414, 126, 908, 425, 277, 1159, 798,
+ 186, 1179, 321, 349, 497, 1611, 157, 1480, 1502, 1075, 1162, 993, 74, 520, 113, 1731, 1314
+ ]
+ },
+ {
+ "word": "pipes",
+ "duration": 0.53,
+ "codes": [
+ 1415, 150, 1705, 1813, 401, 1456, 1325, 1671, 1820, 1170, 1489, 1328, 931, 410, 359, 836, 1253, 1074,
+ 1192, 1283, 1055, 298, 1711, 937, 1375, 1144, 1321, 1672, 630, 1623, 1828, 1041, 230, 519, 1139, 749,
+ 1638, 1139, 1641, 1393
+ ]
+ },
+ {
+ "word": "now",
+ "duration": 0.63,
+ "codes": [
+ 231, 1688, 788, 333, 1830, 938, 1461, 1668, 1355, 1666, 1412, 1701, 1705, 1088, 1746, 1397, 1002, 1793,
+ 1259, 1459, 1803, 927, 1790, 1720, 985, 1803, 1259, 1400, 1803, 927, 1638, 1438, 1411, 1010, 933, 851,
+ 1764, 1493, 485, 74, 1794, 1181, 208, 1492, 1046, 1193, 774
+ ]
+ },
+ {
+ "word": "the",
+ "duration": 0.08,
+ "codes": [1657, 1022, 76, 1116, 1272, 95]
+ },
+ {
+ "word": "first",
+ "duration": 0.49,
+ "codes": [
+ 1730, 1823, 1447, 1217, 1751, 1098, 1287, 1795, 1342, 1711, 1735, 788, 1383, 1532, 1252, 173, 1173, 661,
+ 695, 412, 1139, 913, 113, 1668, 1437, 848, 1364, 962, 132, 719, 1018, 863, 1379, 1535, 696, 1486, 715
+ ]
+ },
+ {
+ "word": "word",
+ "duration": 0.43,
+ "codes": [
+ 1364, 1287, 1698, 1100, 526, 443, 199, 1644, 302, 98, 1625, 1025, 682, 629, 307, 170, 671, 254, 1218,
+ 574, 565, 788, 993, 427, 626, 1398, 738, 587, 1755, 1453, 1617, 406
+ ]
+ },
+ {
+ "word": "they",
+ "duration": 0.2,
+ "codes": [969, 1400, 1491, 486, 1693, 426, 1287, 569, 1168, 550, 1563, 278, 79, 1718, 1321]
+ },
+ {
+ "word": "talked",
+ "duration": 0.4,
+ "codes": [
+ 286, 1023, 673, 1419, 1776, 599, 1041, 1629, 1125, 1671, 1687, 1320, 1473, 1260, 1254, 297, 1004, 1611,
+ 840, 969, 1759, 772, 1556, 1809, 869, 1704, 986, 1450, 335, 1477
+ ]
+ },
+ {
+ "word": "about",
+ "duration": 0.24,
+ "codes": [839, 376, 1553, 301, 426, 1346, 1671, 215, 1194, 971, 25, 838, 152, 1501, 1665, 1757, 1092, 710]
+ },
+ {
+ "word": "that",
+ "duration": 0.12,
+ "codes": [436, 1166, 53, 1153, 324, 576, 1689, 1717, 566]
+ },
+ {
+ "word": "there",
+ "duration": 0.16,
+ "codes": [1108, 1338, 1634, 903, 1355, 228, 813, 881, 102, 396, 1415, 518]
+ },
+ {
+ "word": "was",
+ "duration": 0.12,
+ "codes": [793, 87, 1252, 911, 83, 1793, 634, 183, 124]
+ },
+ {
+ "word": "water",
+ "duration": 0.59,
+ "codes": [
+ 110, 1301, 1639, 520, 1219, 509, 1662, 748, 363, 224, 1753, 877, 1802, 1185, 500, 1148, 1435, 128, 951,
+ 942, 531, 1075, 324, 1402, 1159, 1202, 711, 218, 901, 441, 571, 1317, 1753, 1722, 1001, 157, 1137, 254,
+ 607, 808, 125, 661, 51, 798
+ ]
+ },
+ {
+ "word": "leaking",
+ "duration": 0.96,
+ "codes": [
+ 1171, 1743, 1260, 1331, 91, 1118, 1782, 555, 1385, 505, 121, 1234, 998, 1360, 836, 214, 1048, 423, 13,
+ 439, 686, 303, 1352, 864, 727, 107, 1038, 1767, 479, 1685, 806, 1639, 719, 125, 365, 1237, 1732, 298,
+ 1471, 485, 1141, 1734, 1803, 971, 1798, 691, 1648, 1166, 1082, 1535, 1645, 1154, 388, 11, 110, 890, 449,
+ 736, 276, 1103, 714, 407, 1749, 243, 1694, 479, 426, 1368, 1348, 700, 562, 519
+ ]
+ },
+ {
+ "word": "everywhere",
+ "duration": 1.0,
+ "codes": [
+ 4, 952, 856, 755, 1686, 372, 570, 1816, 1675, 227, 1675, 1145, 674, 1419, 933, 346, 956, 1739, 1519,
+ 192, 1778, 346, 1106, 1174, 1403, 175, 1491, 1758, 1451, 1777, 1411, 1773, 1716, 1407, 1743, 1186, 1031,
+ 902, 1174, 7, 231, 737, 1106, 1189, 592, 481, 1659, 1339, 1155, 110, 384, 583, 1656, 626, 279, 1172,
+ 326, 560, 326, 755, 1097, 1105, 1203, 675, 208, 1002, 1158, 1204, 1132, 1063, 595, 1102, 1216, 855, 1229
+ ]
+ }
+ ],
+ "language": "en"
+}
diff --git a/examples/outetts/speakers/en_male_1.json b/examples/outetts/speakers/en_male_1.json
new file mode 100644
index 000000000..122ed9ae7
--- /dev/null
+++ b/examples/outetts/speakers/en_male_1.json
@@ -0,0 +1,207 @@
+{
+ "text": "The overall package from just two people is pretty remarkable. Sure I have some critiques about some of the gameplay aspects, but it's still really enjoyable and it looks lovely.",
+ "words": [
+ {
+ "word": "the",
+ "duration": 0.08,
+ "codes": [257, 740, 636, 913, 788, 1703]
+ },
+ {
+ "word": "overall",
+ "duration": 0.36,
+ "codes": [
+ 127, 201, 191, 774, 700, 532, 1056, 557, 798, 298, 1741, 747, 1662, 1617, 1702, 1527, 368, 1588, 1049,
+ 1008, 1625, 747, 1576, 728, 1019, 1696, 1765
+ ]
+ },
+ {
+ "word": "package",
+ "duration": 0.56,
+ "codes": [
+ 935, 584, 1319, 627, 1016, 1491, 1344, 1117, 1526, 1040, 239, 1435, 951, 498, 723, 1180, 535, 789, 1649,
+ 1637, 78, 465, 1668, 901, 595, 1675, 117, 1009, 1667, 320, 840, 79, 507, 1762, 1508, 1228, 1768, 802,
+ 1450, 1457, 232, 639
+ ]
+ },
+ {
+ "word": "from",
+ "duration": 0.19,
+ "codes": [604, 782, 1682, 872, 1532, 1600, 1036, 1761, 647, 1554, 1371, 653, 1595, 950]
+ },
+ {
+ "word": "just",
+ "duration": 0.25,
+ "codes": [
+ 1782, 1670, 317, 786, 1748, 631, 599, 1155, 1364, 1524, 36, 1591, 889, 1535, 541, 440, 1532, 50, 870
+ ]
+ },
+ {
+ "word": "two",
+ "duration": 0.24,
+ "codes": [1681, 1510, 673, 799, 805, 1342, 330, 519, 62, 640, 1138, 565, 1552, 1497, 1552, 572, 1715, 1732]
+ },
+ {
+ "word": "people",
+ "duration": 0.39,
+ "codes": [
+ 593, 274, 136, 740, 691, 633, 1484, 1061, 1138, 1485, 344, 428, 397, 1562, 645, 917, 1035, 1449, 1669,
+ 487, 442, 1484, 1329, 1832, 1704, 600, 761, 653, 269
+ ]
+ },
+ {
+ "word": "is",
+ "duration": 0.16,
+ "codes": [566, 583, 1755, 646, 1337, 709, 802, 1008, 485, 1583, 652, 10]
+ },
+ {
+ "word": "pretty",
+ "duration": 0.32,
+ "codes": [
+ 1818, 1747, 692, 733, 1010, 534, 406, 1697, 1053, 1521, 1355, 1274, 816, 1398, 211, 1218, 817, 1472,
+ 1703, 686, 13, 822, 445, 1068
+ ]
+ },
+ {
+ "word": "remarkable",
+ "duration": 0.68,
+ "codes": [
+ 230, 1048, 1705, 355, 706, 1149, 1535, 1787, 1356, 1396, 835, 1583, 486, 1249, 286, 937, 1076, 1150,
+ 614, 42, 1058, 705, 681, 798, 934, 490, 514, 1399, 572, 1446, 1703, 1346, 1040, 1426, 1304, 664, 171,
+ 1530, 625, 64, 1708, 1830, 1030, 443, 1509, 1063, 1605, 1785, 721, 1440, 923
+ ]
+ },
+ {
+ "word": "sure",
+ "duration": 0.36,
+ "codes": [
+ 792, 1780, 923, 1640, 265, 261, 1525, 567, 1491, 1250, 1730, 362, 919, 1766, 543, 1, 333, 113, 970, 252,
+ 1606, 133, 302, 1810, 1046, 1190, 1675
+ ]
+ },
+ {
+ "word": "i",
+ "duration": 0.08,
+ "codes": [123, 439, 1074, 705, 1799, 637]
+ },
+ {
+ "word": "have",
+ "duration": 0.16,
+ "codes": [1509, 599, 518, 1170, 552, 1029, 1267, 864, 419, 143, 1061, 0]
+ },
+ {
+ "word": "some",
+ "duration": 0.16,
+ "codes": [619, 400, 1270, 62, 1370, 1832, 917, 1661, 167, 269, 1366, 1508]
+ },
+ {
+ "word": "critiques",
+ "duration": 0.6,
+ "codes": [
+ 559, 584, 1163, 1129, 1313, 1728, 721, 1146, 1093, 577, 928, 27, 630, 1080, 1346, 1337, 320, 1382, 1175,
+ 1682, 1556, 990, 1683, 860, 1721, 110, 786, 376, 1085, 756, 1523, 234, 1334, 1506, 1578, 659, 612, 1108,
+ 1466, 1647, 308, 1470, 746, 556, 1061
+ ]
+ },
+ {
+ "word": "about",
+ "duration": 0.29,
+ "codes": [
+ 26, 1649, 545, 1367, 1263, 1728, 450, 859, 1434, 497, 1220, 1285, 179, 755, 1154, 779, 179, 1229, 1213,
+ 922, 1774, 1408
+ ]
+ },
+ {
+ "word": "some",
+ "duration": 0.23,
+ "codes": [986, 28, 1649, 778, 858, 1519, 1, 18, 26, 1042, 1174, 1309, 1499, 1712, 1692, 1516, 1574]
+ },
+ {
+ "word": "of",
+ "duration": 0.07,
+ "codes": [197, 716, 1039, 1662, 64]
+ },
+ {
+ "word": "the",
+ "duration": 0.08,
+ "codes": [1811, 1568, 569, 886, 1025, 1374]
+ },
+ {
+ "word": "gameplay",
+ "duration": 0.48,
+ "codes": [
+ 1269, 1092, 933, 1362, 1762, 1700, 1675, 215, 781, 1086, 461, 838, 1022, 759, 649, 1416, 1004, 551, 909,
+ 787, 343, 830, 1391, 1040, 1622, 1779, 1360, 1231, 1187, 1317, 76, 997, 989, 978, 737, 189
+ ]
+ },
+ {
+ "word": "aspects",
+ "duration": 0.56,
+ "codes": [
+ 1423, 797, 1316, 1222, 147, 719, 1347, 386, 1390, 1558, 154, 440, 634, 592, 1097, 1718, 712, 763, 1118,
+ 1721, 1311, 868, 580, 362, 1435, 868, 247, 221, 886, 1145, 1274, 1284, 457, 1043, 1459, 1818, 62, 599,
+ 1035, 62, 1649, 778
+ ]
+ },
+ {
+ "word": "but",
+ "duration": 0.2,
+ "codes": [780, 1825, 1681, 1007, 861, 710, 702, 939, 1669, 1491, 613, 1739, 823, 1469, 648]
+ },
+ {
+ "word": "its",
+ "duration": 0.09,
+ "codes": [92, 688, 1623, 962, 1670, 527, 599]
+ },
+ {
+ "word": "still",
+ "duration": 0.27,
+ "codes": [
+ 636, 10, 1217, 344, 713, 957, 823, 154, 1649, 1286, 508, 214, 1760, 1250, 456, 1352, 1368, 921, 615, 5
+ ]
+ },
+ {
+ "word": "really",
+ "duration": 0.36,
+ "codes": [
+ 55, 420, 1008, 1659, 27, 644, 1266, 617, 761, 1712, 109, 1465, 1587, 503, 1541, 619, 197, 1019, 817,
+ 269, 377, 362, 1381, 507, 1488, 4, 1695
+ ]
+ },
+ {
+ "word": "enjoyable",
+ "duration": 0.49,
+ "codes": [
+ 678, 501, 864, 319, 288, 1472, 1341, 686, 562, 1463, 619, 1563, 471, 911, 730, 1811, 1006, 520, 861,
+ 1274, 125, 1431, 638, 621, 153, 876, 1770, 437, 987, 1653, 1109, 898, 1285, 80, 593, 1709, 843
+ ]
+ },
+ {
+ "word": "and",
+ "duration": 0.15,
+ "codes": [1285, 987, 303, 1037, 730, 1164, 502, 120, 1737, 1655, 1318]
+ },
+ {
+ "word": "it",
+ "duration": 0.09,
+ "codes": [848, 1366, 395, 1601, 1513, 593, 1302]
+ },
+ {
+ "word": "looks",
+ "duration": 0.27,
+ "codes": [
+ 1281, 1266, 1755, 572, 248, 1751, 1257, 695, 1380, 457, 659, 585, 1315, 1105, 1776, 736, 24, 736, 654,
+ 1027
+ ]
+ },
+ {
+ "word": "lovely",
+ "duration": 0.56,
+ "codes": [
+ 634, 596, 1766, 1556, 1306, 1285, 1481, 1721, 1123, 438, 1246, 1251, 795, 659, 1381, 1658, 217, 1772,
+ 562, 952, 107, 1129, 1112, 467, 550, 1079, 840, 1615, 1469, 1380, 168, 917, 836, 1827, 437, 583, 67,
+ 595, 1087, 1646, 1493, 1677
+ ]
+ }
+ ],
+ "language": "en"
+}
diff --git a/examples/outetts/speakers/en_male_2.json b/examples/outetts/speakers/en_male_2.json
new file mode 100644
index 000000000..6d69ddddf
--- /dev/null
+++ b/examples/outetts/speakers/en_male_2.json
@@ -0,0 +1,246 @@
+{
+ "text": "Difficult action game. While you have your melee attacks, the majority of combat takes place using your guns. From long rifles to shotguns and my trusty magnum. There is an emphasis on cooperative play.",
+ "words": [
+ {
+ "word": "difficult",
+ "duration": 0.44,
+ "codes": [
+ 1567, 714, 1238, 243, 810, 104, 1826, 1081, 865, 1627, 1176, 1535, 1238, 1832, 346, 647, 36, 631, 1750,
+ 1273, 1806, 861, 1429, 1332, 1633, 326, 1530, 1661, 503, 1597, 326, 733, 1715
+ ]
+ },
+ {
+ "word": "action",
+ "duration": 0.48,
+ "codes": [
+ 1319, 1792, 1238, 939, 1765, 1282, 847, 621, 1147, 595, 514, 1180, 1235, 1438, 1057, 1472, 1489, 1822,
+ 1092, 1178, 796, 1511, 290, 1018, 687, 290, 1403, 253, 1755, 1107, 780, 1376, 341, 1614, 176, 317
+ ]
+ },
+ {
+ "word": "game",
+ "duration": 0.4,
+ "codes": [
+ 1617, 703, 1617, 662, 101, 1530, 1678, 782, 1829, 146, 1175, 1771, 422, 370, 1079, 1069, 282, 996, 1134,
+ 814, 430, 296, 1534, 1742, 947, 1285, 1710, 1465, 1152, 640
+ ]
+ },
+ {
+ "word": "while",
+ "duration": 0.32,
+ "codes": [
+ 1506, 1765, 1451, 1590, 1637, 1390, 727, 1010, 766, 1007, 1585, 1341, 48, 600, 1715, 1226, 887, 840,
+ 310, 362, 972, 1765, 67, 1572
+ ]
+ },
+ {
+ "word": "you",
+ "duration": 0.11,
+ "codes": [1581, 527, 1513, 1267, 774, 108, 29, 1366]
+ },
+ {
+ "word": "have",
+ "duration": 0.21,
+ "codes": [1591, 434, 949, 1407, 117, 907, 874, 1105, 1132, 498, 1161, 13, 742, 1528, 1653, 1710]
+ },
+ {
+ "word": "your",
+ "duration": 0.15,
+ "codes": [1337, 990, 234, 831, 4, 616, 1820, 1083, 1710, 1564, 529]
+ },
+ {
+ "word": "melee",
+ "duration": 0.33,
+ "codes": [
+ 75, 682, 311, 455, 786, 834, 804, 292, 492, 947, 474, 1720, 142, 1628, 614, 758, 789, 117, 1372, 402,
+ 896, 1537, 128, 927, 1108
+ ]
+ },
+ {
+ "word": "attacks",
+ "duration": 0.56,
+ "codes": [
+ 1218, 218, 907, 1005, 1793, 1520, 1694, 458, 1428, 276, 315, 639, 231, 1455, 347, 1447, 1361, 370, 1224,
+ 498, 1509, 499, 588, 1188, 993, 775, 1004, 1591, 716, 999, 1548, 1464, 1720, 681, 1043, 639, 639, 543,
+ 592, 546, 356, 1217
+ ]
+ },
+ {
+ "word": "the",
+ "duration": 0.24,
+ "codes": [654, 335, 1524, 350, 493, 1644, 1724, 591, 799, 1176, 1023, 694, 1585, 806, 1344, 596, 503, 1557]
+ },
+ {
+ "word": "majority",
+ "duration": 0.52,
+ "codes": [
+ 1416, 240, 62, 443, 1660, 549, 160, 1729, 567, 32, 1703, 1553, 1337, 45, 631, 335, 184, 395, 576, 1068,
+ 759, 548, 587, 427, 1455, 775, 673, 983, 1791, 647, 398, 1595, 1664, 592, 1310, 356, 440, 495, 812
+ ]
+ },
+ {
+ "word": "of",
+ "duration": 0.11,
+ "codes": [1415, 455, 1452, 821, 1300, 1048, 1485, 1626]
+ },
+ {
+ "word": "combat",
+ "duration": 0.44,
+ "codes": [
+ 1588, 1649, 861, 727, 1671, 609, 847, 1004, 0, 1535, 1801, 1100, 1494, 1178, 1224, 623, 1620, 742, 1507,
+ 1794, 932, 1634, 1285, 1192, 764, 1245, 587, 521, 55, 1230, 1532, 1469, 1758
+ ]
+ },
+ {
+ "word": "takes",
+ "duration": 0.32,
+ "codes": [
+ 1408, 681, 1385, 1737, 858, 46, 1649, 1091, 1320, 1015, 756, 269, 787, 230, 1777, 911, 91, 1152, 1412,
+ 1710, 232, 46, 1337, 782
+ ]
+ },
+ {
+ "word": "place",
+ "duration": 0.4,
+ "codes": [
+ 1496, 1821, 1681, 1725, 1324, 1088, 1668, 1568, 1801, 1275, 1776, 741, 621, 1749, 901, 829, 640, 940,
+ 1464, 541, 1008, 1747, 631, 1653, 7, 202, 416, 198, 1337, 202
+ ]
+ },
+ {
+ "word": "using",
+ "duration": 0.4,
+ "codes": [
+ 290, 1459, 778, 1819, 1689, 175, 1569, 182, 1512, 361, 778, 716, 231, 1446, 1240, 1763, 1734, 189, 631,
+ 654, 440, 1280, 1439, 986, 753, 1387, 805, 338, 1606, 1679
+ ]
+ },
+ {
+ "word": "your",
+ "duration": 0.16,
+ "codes": [1520, 1250, 1440, 47, 1374, 1731, 963, 1368, 1426, 1506, 1545, 1730]
+ },
+ {
+ "word": "guns",
+ "duration": 0.48,
+ "codes": [
+ 450, 1553, 1570, 651, 1648, 899, 971, 1573, 1684, 28, 107, 1744, 236, 1067, 1312, 412, 1110, 1112, 1014,
+ 1241, 1726, 875, 1830, 1792, 1219, 1565, 1534, 751, 1726, 1598, 1600, 320, 26, 1018, 1337, 1270
+ ]
+ },
+ {
+ "word": "from",
+ "duration": 0.32,
+ "codes": [
+ 372, 1238, 6, 1436, 1654, 1637, 799, 1244, 1525, 1285, 1390, 1657, 1790, 1061, 634, 493, 972, 977, 1036,
+ 158, 540, 749, 317, 1552
+ ]
+ },
+ {
+ "word": "long",
+ "duration": 0.24,
+ "codes": [875, 529, 710, 63, 1822, 158, 1647, 809, 1022, 1155, 774, 1118, 63, 669, 1069, 1513, 1469, 47]
+ },
+ {
+ "word": "rifles",
+ "duration": 0.59,
+ "codes": [
+ 1007, 1650, 1015, 954, 843, 1075, 803, 1078, 839, 1382, 424, 1674, 800, 657, 1487, 382, 95, 41, 979,
+ 1619, 1485, 362, 1646, 1400, 333, 1761, 1827, 806, 389, 1562, 789, 481, 1606, 647, 1597, 1340, 593,
+ 1627, 1401, 1791, 202, 368, 213, 356
+ ]
+ },
+ {
+ "word": "to",
+ "duration": 0.12,
+ "codes": [1485, 6, 1817, 1623, 440, 631, 1366, 547, 1479]
+ },
+ {
+ "word": "shotguns",
+ "duration": 0.64,
+ "codes": [
+ 987, 62, 154, 790, 290, 1161, 430, 543, 1332, 24, 1270, 1700, 412, 462, 1629, 595, 260, 1819, 1435,
+ 1828, 1752, 1380, 1803, 1465, 1321, 1300, 1330, 1459, 547, 913, 719, 303, 1811, 595, 716, 344, 909, 784,
+ 203, 1269, 1394, 734, 415, 1633, 233, 202, 1649, 409
+ ]
+ },
+ {
+ "word": "and",
+ "duration": 0.36,
+ "codes": [
+ 858, 416, 328, 1793, 1390, 957, 1262, 1601, 1628, 1346, 1466, 764, 621, 1459, 844, 158, 626, 827, 1466,
+ 1110, 18, 1540, 1515, 615, 593, 1627, 1145
+ ]
+ },
+ {
+ "word": "my",
+ "duration": 0.21,
+ "codes": [1445, 481, 1035, 1593, 1718, 1464, 1808, 644, 1344, 1381, 466, 936, 123, 604, 1545, 1340]
+ },
+ {
+ "word": "trusty",
+ "duration": 0.44,
+ "codes": [
+ 1583, 572, 1749, 1114, 1462, 308, 1332, 471, 863, 1410, 993, 1524, 1100, 1758, 958, 1453, 768, 1638,
+ 1625, 1002, 117, 1364, 213, 631, 202, 454, 957, 820, 1748, 822, 1335, 295, 1039
+ ]
+ },
+ {
+ "word": "magnum",
+ "duration": 0.48,
+ "codes": [
+ 1285, 1679, 1626, 1257, 694, 1339, 678, 1815, 271, 1656, 535, 607, 383, 550, 1372, 769, 815, 1763, 1525,
+ 513, 1637, 1782, 1439, 539, 867, 297, 1476, 1620, 65, 1361, 804, 577, 1700, 91, 1583, 1745
+ ]
+ },
+ {
+ "word": "there",
+ "duration": 0.36,
+ "codes": [
+ 761, 1532, 32, 1828, 950, 761, 1037, 1293, 1023, 1510, 1657, 1821, 1284, 603, 1669, 634, 757, 940, 782,
+ 955, 18, 296, 1636, 455, 108, 1629, 333
+ ]
+ },
+ {
+ "word": "is",
+ "duration": 0.23,
+ "codes": [1571, 1517, 399, 863, 1649, 302, 103, 213, 1409, 1733, 356, 736, 104, 184, 1778, 198, 298]
+ },
+ {
+ "word": "an",
+ "duration": 0.12,
+ "codes": [654, 276, 1462, 104, 1388, 815, 736, 889, 1069]
+ },
+ {
+ "word": "emphasis",
+ "duration": 0.51,
+ "codes": [
+ 695, 678, 319, 262, 1396, 583, 473, 1728, 1706, 865, 1616, 1270, 1078, 1487, 911, 319, 1586, 335, 519,
+ 507, 356, 26, 1, 937, 1833, 335, 909, 990, 132, 616, 1814, 1, 1563, 38, 276, 368, 7, 335
+ ]
+ },
+ {
+ "word": "on",
+ "duration": 0.24,
+ "codes": [183, 844, 1477, 1720, 1751, 1302, 1743, 1435, 1789, 997, 644, 1828, 1253, 688, 1672, 473, 835, 26]
+ },
+ {
+ "word": "cooperative",
+ "duration": 0.61,
+ "codes": [
+ 856, 22, 1425, 1461, 901, 357, 1174, 1175, 1691, 1661, 626, 1776, 1286, 429, 1702, 757, 1702, 730, 1015,
+ 86, 321, 1628, 763, 1792, 1760, 1499, 1053, 1825, 1661, 1364, 1553, 1609, 409, 315, 166, 1027, 1295,
+ 927, 1231, 113, 1513, 1747, 1753, 1617, 1614, 274
+ ]
+ },
+ {
+ "word": "play",
+ "duration": 0.32,
+ "codes": [
+ 1669, 1764, 1298, 1411, 1487, 454, 1035, 1814, 1295, 973, 787, 834, 798, 617, 1720, 1331, 768, 252,
+ 1757, 318, 695, 884, 775, 631
+ ]
+ }
+ ],
+ "language": "en"
+}
diff --git a/examples/outetts/speakers/en_male_3.json b/examples/outetts/speakers/en_male_3.json
new file mode 100644
index 000000000..7506e166c
--- /dev/null
+++ b/examples/outetts/speakers/en_male_3.json
@@ -0,0 +1,185 @@
+{
+ "text": "Uhm, your way of celebrating your mum, but also I think, you know, spreading awareness, which is so important. And so, you know, I think you've taken.",
+ "words": [
+ {
+ "word": "uhm",
+ "duration": 0.31,
+ "codes": [
+ 119, 548, 926, 164, 142, 214, 979, 142, 1497, 498, 616, 1160, 359, 1812, 571, 1336, 498, 456, 1741,
+ 1708, 1371, 1646, 878
+ ]
+ },
+ {
+ "word": "your",
+ "duration": 0.49,
+ "codes": [
+ 615, 1470, 853, 1607, 685, 1640, 1717, 1626, 1052, 1467, 1536, 845, 257, 180, 1657, 1586, 659, 54, 666,
+ 235, 1747, 1405, 935, 714, 1665, 1618, 340, 286, 211, 1485, 1018, 83, 280, 1650, 1278, 569, 439
+ ]
+ },
+ {
+ "word": "way",
+ "duration": 0.2,
+ "codes": [1609, 1701, 395, 748, 1696, 1001, 280, 1517, 306, 803, 499, 373, 947, 179, 1449]
+ },
+ {
+ "word": "of",
+ "duration": 0.08,
+ "codes": [232, 472, 796, 825, 1299, 1225]
+ },
+ {
+ "word": "celebrating",
+ "duration": 0.84,
+ "codes": [
+ 1650, 821, 117, 1436, 330, 687, 979, 407, 886, 596, 430, 1139, 253, 69, 777, 370, 1072, 755, 81, 222,
+ 1804, 1216, 1506, 945, 1013, 520, 179, 1272, 683, 1812, 564, 1570, 1664, 959, 791, 306, 413, 966, 252,
+ 326, 349, 492, 570, 948, 789, 24, 1129, 1407, 781, 99, 1633, 522, 63, 190, 753, 683, 368, 644, 1679,
+ 1168, 1295, 788, 1793
+ ]
+ },
+ {
+ "word": "your",
+ "duration": 0.35,
+ "codes": [
+ 1350, 4, 515, 1520, 317, 1573, 1383, 615, 815, 896, 1157, 1684, 392, 641, 733, 1538, 1725, 530, 154,
+ 1785, 297, 463, 1743, 407, 623, 1371
+ ]
+ },
+ {
+ "word": "mum",
+ "duration": 0.21,
+ "codes": [1009, 519, 211, 98, 1101, 364, 106, 429, 827, 1143, 367, 221, 211, 1709, 1024, 515]
+ },
+ {
+ "word": "but",
+ "duration": 0.16,
+ "codes": [183, 1069, 539, 1796, 1151, 1676, 525, 1127, 366, 618, 1695, 127]
+ },
+ {
+ "word": "also",
+ "duration": 0.28,
+ "codes": [
+ 850, 393, 413, 1673, 868, 291, 1673, 1344, 462, 385, 1073, 1398, 906, 592, 519, 448, 592, 348, 1258,
+ 711, 115
+ ]
+ },
+ {
+ "word": "i",
+ "duration": 0.16,
+ "codes": [539, 1519, 390, 389, 441, 413, 715, 390, 410, 294, 726, 269]
+ },
+ {
+ "word": "think",
+ "duration": 0.32,
+ "codes": [
+ 1791, 1781, 1619, 1393, 1649, 1741, 1535, 1703, 1721, 1134, 1435, 112, 1660, 232, 98, 654, 593, 485,
+ 593, 368, 530, 1584, 379, 338
+ ]
+ },
+ {
+ "word": "you",
+ "duration": 0.4,
+ "codes": [
+ 379, 387, 1767, 543, 1410, 546, 1117, 614, 65, 793, 414, 284, 396, 538, 1039, 226, 1277, 1632, 1152,
+ 1796, 888, 953, 888, 464, 1762, 1446, 10, 876, 184, 1009
+ ]
+ },
+ {
+ "word": "know",
+ "duration": 0.11,
+ "codes": [473, 623, 383, 352, 1814, 147, 1289, 164]
+ },
+ {
+ "word": "spreading",
+ "duration": 0.44,
+ "codes": [
+ 482, 796, 134, 1387, 232, 1310, 308, 330, 400, 162, 535, 197, 1039, 274, 1442, 1701, 1138, 723, 39, 280,
+ 410, 412, 160, 645, 225, 1519, 581, 815, 415, 835, 884, 312, 1779
+ ]
+ },
+ {
+ "word": "awareness",
+ "duration": 0.48,
+ "codes": [
+ 173, 988, 93, 1727, 374, 1442, 312, 436, 482, 1752, 373, 1727, 173, 1706, 522, 909, 543, 590, 773, 210,
+ 1057, 215, 1020, 623, 1379, 147, 1427, 1182, 1816, 1221, 877, 1626, 178, 175, 780, 985
+ ]
+ },
+ {
+ "word": "which",
+ "duration": 0.19,
+ "codes": [1214, 1031, 698, 1820, 547, 211, 605, 1152, 442, 837, 1774, 232, 522, 876]
+ },
+ {
+ "word": "is",
+ "duration": 0.16,
+ "codes": [1649, 581, 211, 884, 110, 1530, 1217, 1409, 4, 1027, 1624, 849]
+ },
+ {
+ "word": "so",
+ "duration": 0.53,
+ "codes": [
+ 1337, 1034, 656, 1410, 979, 790, 1436, 471, 1403, 750, 782, 1697, 1748, 1380, 288, 668, 17, 585, 866,
+ 457, 975, 534, 401, 1570, 401, 1474, 1762, 1352, 1545, 178, 362, 356, 206, 276, 263, 356, 253, 492, 441,
+ 1101
+ ]
+ },
+ {
+ "word": "important",
+ "duration": 0.64,
+ "codes": [
+ 474, 211, 1797, 626, 1396, 571, 593, 1160, 530, 1624, 317, 1568, 835, 1570, 1327, 892, 1208, 1463, 1577,
+ 895, 1378, 897, 1781, 82, 539, 1748, 238, 416, 390, 1732, 306, 312, 1832, 445, 338, 418, 60, 1502, 1808,
+ 503, 1334, 1581, 1554, 1781, 506, 1519, 1681, 269
+ ]
+ },
+ {
+ "word": "and",
+ "duration": 0.24,
+ "codes": [989, 1121, 932, 1393, 1103, 1164, 1185, 32, 1459, 671, 874, 872, 1056, 539, 1626, 1091, 539, 1569]
+ },
+ {
+ "word": "so",
+ "duration": 0.16,
+ "codes": [1620, 1594, 10, 599, 746, 172, 1175, 980, 1317, 1402, 115, 148]
+ },
+ {
+ "word": "you",
+ "duration": 0.63,
+ "codes": [
+ 1094, 248, 1384, 393, 611, 522, 425, 947, 326, 930, 503, 1215, 1813, 99, 284, 1070, 447, 1168, 732,
+ 1120, 1749, 900, 743, 1684, 1360, 1225, 490, 743, 1268, 673, 793, 490, 620, 1279, 469, 649, 717, 986,
+ 669, 174, 555, 284, 139, 1479, 1463, 79, 214
+ ]
+ },
+ {
+ "word": "know",
+ "duration": 0.08,
+ "codes": [368, 471, 112, 1077, 299, 472]
+ },
+ {
+ "word": "i",
+ "duration": 0.08,
+ "codes": [1170, 210, 1526, 499, 615, 1287]
+ },
+ {
+ "word": "think",
+ "duration": 0.17,
+ "codes": [1654, 1709, 1057, 1817, 1012, 1076, 1470, 768, 539, 778, 1698, 1507, 888]
+ },
+ {
+ "word": "youve",
+ "duration": 0.16,
+ "codes": [1797, 62, 605, 1800, 576, 1444, 434, 952, 485, 512, 1751, 691]
+ },
+ {
+ "word": "taken",
+ "duration": 0.39,
+ "codes": [
+ 1806, 822, 537, 700, 999, 1462, 791, 159, 1477, 558, 291, 1797, 901, 1336, 599, 1408, 1555, 707, 1677,
+ 919, 1411, 1381, 512, 525, 1583, 847, 78, 1596, 810
+ ]
+ }
+ ],
+ "language": "en"
+}
diff --git a/examples/outetts/speakers/en_male_4.json b/examples/outetts/speakers/en_male_4.json
new file mode 100644
index 000000000..c3333099c
--- /dev/null
+++ b/examples/outetts/speakers/en_male_4.json
@@ -0,0 +1,254 @@
+{
+ "text": "Corrosion by running water opens a window to a subterranean world. Jijin Cave covers an area of more than 700,000 square meters, which is equivalent to 100 football fields.",
+ "words": [
+ {
+ "word": "corrosion",
+ "duration": 0.56,
+ "codes": [
+ 1231, 1183, 393, 1069, 773, 1394, 217, 200, 211, 783, 1616, 814, 1680, 1381, 1372, 648, 211, 433, 57,
+ 679, 375, 1496, 1149, 328, 289, 748, 300, 1692, 1286, 318, 1633, 463, 596, 1436, 454, 1417, 1414, 593,
+ 848, 1422, 738, 1412
+ ]
+ },
+ {
+ "word": "by",
+ "duration": 0.31,
+ "codes": [
+ 1526, 1674, 1411, 1048, 1829, 1473, 804, 1355, 1453, 1396, 1390, 78, 1722, 1113, 1206, 1063, 703, 609,
+ 1129, 1373, 983, 271, 668
+ ]
+ },
+ {
+ "word": "running",
+ "duration": 0.33,
+ "codes": [
+ 514, 239, 304, 1008, 673, 850, 1506, 1315, 768, 280, 1403, 206, 112, 1336, 1436, 1801, 52, 46, 200, 503,
+ 1773, 1305, 1663, 348, 312
+ ]
+ },
+ {
+ "word": "water",
+ "duration": 0.44,
+ "codes": [
+ 742, 1782, 1176, 1352, 1597, 1365, 662, 1614, 1281, 90, 1692, 843, 397, 891, 1219, 1347, 644, 1415,
+ 1816, 1581, 1153, 1702, 1139, 357, 175, 780, 1372, 1361, 1063, 876, 1160, 1795, 1013
+ ]
+ },
+ {
+ "word": "opens",
+ "duration": 0.71,
+ "codes": [
+ 1404, 1680, 1745, 1125, 1357, 1774, 1126, 1392, 1314, 758, 5, 1738, 516, 1199, 1393, 1758, 1330, 1212,
+ 550, 1488, 384, 439, 1712, 587, 389, 1768, 32, 59, 795, 1319, 1331, 349, 1427, 1124, 1278, 753, 192,
+ 539, 1818, 1228, 1732, 439, 1472, 1215, 473, 211, 1800, 790, 143, 1376, 202, 6, 430
+ ]
+ },
+ {
+ "word": "a",
+ "duration": 0.13,
+ "codes": [996, 1245, 1790, 1010, 669, 1741, 1431, 997, 225, 1005]
+ },
+ {
+ "word": "window",
+ "duration": 0.55,
+ "codes": [
+ 239, 92, 539, 1446, 1786, 909, 633, 1727, 150, 1244, 385, 1781, 1535, 55, 539, 1240, 1310, 966, 640,
+ 1590, 230, 455, 1597, 755, 97, 200, 719, 144, 952, 1546, 1605, 1125, 1222, 892, 1400, 1368, 1077, 1393,
+ 1427, 1058, 1738
+ ]
+ },
+ {
+ "word": "to",
+ "duration": 0.2,
+ "codes": [1830, 653, 1638, 1227, 355, 852, 566, 559, 1447, 206, 1300, 989, 1823, 1052, 1456]
+ },
+ {
+ "word": "a",
+ "duration": 0.09,
+ "codes": [703, 1612, 810, 657, 27, 102, 1261]
+ },
+ {
+ "word": "subterranean",
+ "duration": 0.91,
+ "codes": [
+ 1315, 882, 276, 183, 769, 452, 493, 471, 844, 454, 596, 870, 889, 592, 1018, 596, 328, 1358, 626, 179,
+ 95, 214, 1536, 1827, 1239, 1775, 32, 850, 1145, 1732, 1215, 986, 187, 1360, 1498, 206, 1139, 1769, 874,
+ 148, 1104, 877, 487, 1506, 414, 359, 131, 569, 1039, 876, 637, 423, 63, 844, 316, 264, 183, 1401, 1734,
+ 1489, 1013, 263, 1485, 1488, 1707, 363, 987, 1741
+ ]
+ },
+ {
+ "word": "world",
+ "duration": 0.45,
+ "codes": [
+ 1813, 738, 343, 859, 1006, 406, 893, 716, 1762, 909, 973, 1684, 834, 1078, 1813, 1013, 1320, 1435, 1663,
+ 1282, 361, 1584, 19, 1454, 1708, 1489, 1454, 1647, 1505, 1312, 1225, 1433, 1741, 581
+ ]
+ },
+ {
+ "word": "jijin",
+ "duration": 1.05,
+ "codes": [
+ 1382, 927, 1426, 744, 1735, 1525, 1294, 1699, 1684, 1414, 1543, 1025, 1478, 1543, 1301, 1428, 1655,
+ 1433, 1479, 1248, 805, 1273, 17, 862, 899, 1440, 1293, 1348, 1684, 1010, 1231, 1348, 1007, 1231, 957,
+ 1293, 1236, 869, 810, 585, 964, 954, 1601, 670, 861, 1601, 729, 1479, 1348, 870, 434, 117, 614, 398,
+ 395, 764, 343, 200, 863, 627, 91, 691, 1727, 943, 769, 502, 599, 1781, 367, 968, 217, 335, 1674, 500,
+ 1121, 150, 1345, 1742, 850
+ ]
+ },
+ {
+ "word": "cave",
+ "duration": 0.64,
+ "codes": [
+ 1799, 1084, 1488, 1034, 987, 1491, 1486, 850, 686, 1352, 944, 596, 1671, 1451, 1393, 1404, 829, 512,
+ 402, 801, 1370, 822, 518, 1713, 328, 877, 333, 1346, 529, 938, 1285, 1708, 1528, 1490, 416, 1639, 1014,
+ 231, 1581, 1666, 1432, 1406, 1496, 1422, 1811, 1012, 1399, 1256
+ ]
+ },
+ {
+ "word": "covers",
+ "duration": 0.52,
+ "codes": [
+ 535, 1344, 958, 1259, 1285, 1305, 1465, 727, 1390, 1767, 958, 658, 1700, 1651, 910, 570, 146, 222, 825,
+ 1098, 1789, 314, 1715, 506, 1710, 532, 688, 1563, 588, 117, 1397, 58, 129, 214, 1695, 687, 473, 778, 18
+ ]
+ },
+ {
+ "word": "an",
+ "duration": 0.13,
+ "codes": [176, 1002, 466, 827, 1138, 297, 399, 1467, 1490, 1031]
+ },
+ {
+ "word": "area",
+ "duration": 0.39,
+ "codes": [
+ 1295, 1811, 1215, 761, 793, 1705, 1476, 632, 1501, 262, 173, 225, 434, 599, 146, 280, 1224, 821, 1678,
+ 280, 1801, 350, 429, 231, 402, 686, 212, 522, 1271
+ ]
+ },
+ {
+ "word": "of",
+ "duration": 0.49,
+ "codes": [
+ 705, 535, 438, 261, 929, 355, 1303, 1412, 1736, 1098, 1820, 1801, 1170, 1785, 1393, 1104, 1152, 1130,
+ 1448, 1096, 1153, 1433, 1361, 19, 1380, 1752, 950, 131, 465, 1613, 635, 860, 277, 618, 1712, 639, 1610
+ ]
+ },
+ {
+ "word": "more",
+ "duration": 0.23,
+ "codes": [1779, 820, 302, 1564, 1688, 1401, 74, 395, 268, 683, 644, 397, 1482, 397, 834, 629, 332]
+ },
+ {
+ "word": "than",
+ "duration": 0.16,
+ "codes": [501, 1604, 607, 1755, 549, 222, 1773, 300, 835, 556, 593, 863]
+ },
+ {
+ "word": "seven",
+ "duration": 0.44,
+ "codes": [
+ 617, 1240, 1449, 1387, 202, 858, 796, 844, 1519, 471, 1166, 796, 543, 0, 1124, 147, 768, 836, 324, 361,
+ 1350, 1824, 763, 392, 1796, 684, 1698, 1091, 1790, 1371, 1670, 662, 1516
+ ]
+ },
+ {
+ "word": "hundred",
+ "duration": 0.48,
+ "codes": [
+ 1810, 1640, 1741, 1689, 1540, 1355, 1723, 1282, 1171, 1794, 1651, 944, 248, 1281, 535, 616, 1510, 203,
+ 1550, 678, 64, 123, 551, 716, 1667, 1614, 757, 1631, 617, 76, 838, 653, 1769, 505, 1803, 1139
+ ]
+ },
+ {
+ "word": "zero",
+ "duration": 0.61,
+ "codes": [
+ 1769, 1752, 1400, 1718, 1040, 1175, 1724, 1832, 525, 1040, 352, 1034, 411, 570, 825, 271, 624, 87, 461,
+ 1282, 706, 877, 782, 1433, 1627, 782, 1606, 778, 1436, 1817, 1256, 617, 1787, 999, 1048, 1720, 1277,
+ 1357, 1808, 1731, 1756, 328, 1518, 1061, 400, 416
+ ]
+ },
+ {
+ "word": "square",
+ "duration": 0.33,
+ "codes": [
+ 463, 1166, 640, 1292, 659, 1277, 1429, 1285, 921, 664, 148, 320, 635, 1241, 1497, 147, 869, 1013, 330,
+ 1372, 1427, 1212, 1427, 1492, 878
+ ]
+ },
+ {
+ "word": "meters",
+ "duration": 0.44,
+ "codes": [
+ 663, 1185, 1084, 1463, 1660, 659, 682, 290, 1267, 667, 1312, 428, 986, 1625, 823, 1734, 999, 889, 782,
+ 809, 985, 935, 243, 1281, 1449, 1476, 1109, 1638, 1002, 1542, 308, 67, 927
+ ]
+ },
+ {
+ "word": "which",
+ "duration": 1.84,
+ "codes": [
+ 581, 1766, 669, 1573, 824, 707, 567, 551, 1038, 68, 559, 1154, 445, 680, 563, 235, 516, 378, 275, 967,
+ 68, 513, 378, 94, 516, 526, 68, 1154, 223, 566, 1137, 167, 717, 378, 241, 967, 223, 445, 1137, 167, 513,
+ 526, 241, 717, 223, 275, 1137, 187, 516, 316, 445, 1003, 551, 1601, 1741, 257, 693, 995, 5, 735, 669,
+ 729, 976, 799, 171, 1589, 484, 1114, 1510, 5, 1322, 633, 638, 669, 562, 1114, 820, 1070, 995, 1080, 832,
+ 995, 458, 957, 1634, 1157, 1003, 676, 486, 1305, 169, 735, 1390, 470, 795, 795, 548, 1652, 169, 751,
+ 1033, 538, 1251, 651, 486, 1118, 624, 681, 751, 845, 735, 939, 651, 1033, 1080, 1652, 1600, 1157, 981,
+ 933, 655, 838, 1076, 171, 459, 548, 1327, 427, 1406, 858, 87, 211, 1468, 533, 1603, 96, 844, 520
+ ]
+ },
+ {
+ "word": "is",
+ "duration": 0.09,
+ "codes": [152, 92, 47, 132, 1452, 432, 631]
+ },
+ {
+ "word": "equivalent",
+ "duration": 0.55,
+ "codes": [
+ 1473, 364, 522, 783, 1508, 1543, 1023, 964, 1285, 1260, 1122, 1663, 495, 1710, 1392, 557, 276, 389, 48,
+ 1732, 1564, 1538, 1370, 105, 1481, 443, 1533, 1770, 1019, 1041, 1102, 1494, 1247, 1388, 1637, 1736, 733,
+ 1735, 1381, 1364, 656
+ ]
+ },
+ {
+ "word": "to",
+ "duration": 0.24,
+ "codes": [1748, 1082, 901, 1568, 1329, 1231, 666, 1822, 486, 1305, 1813, 822, 358, 368, 1823, 547, 539, 775]
+ },
+ {
+ "word": "one",
+ "duration": 0.36,
+ "codes": [
+ 193, 126, 1717, 338, 1807, 121, 807, 758, 59, 395, 1321, 909, 836, 690, 1741, 339, 443, 623, 1542, 1230,
+ 1357, 485, 1625, 455, 317, 1826, 1282
+ ]
+ },
+ {
+ "word": "hundred",
+ "duration": 0.44,
+ "codes": [
+ 1365, 1502, 1126, 1709, 1053, 1125, 1466, 1041, 1117, 1459, 1034, 955, 291, 797, 1177, 1635, 1599, 197,
+ 312, 1564, 636, 1520, 1695, 1320, 1765, 959, 142, 1496, 810, 926, 325, 240, 1671
+ ]
+ },
+ {
+ "word": "football",
+ "duration": 0.41,
+ "codes": [
+ 1379, 1807, 1313, 1681, 1788, 1355, 1825, 1088, 1337, 941, 798, 1224, 806, 1450, 850, 240, 1011, 627,
+ 1033, 1779, 1532, 599, 1490, 1014, 1311, 1384, 1574, 354, 264, 1758, 738
+ ]
+ },
+ {
+ "word": "fields",
+ "duration": 0.4,
+ "codes": [
+ 1439, 895, 1748, 715, 1535, 1034, 919, 770, 1035, 36, 784, 217, 380, 121, 1257, 384, 1141, 385, 1258,
+ 1312, 1732, 1820, 1605, 1724, 1812, 318, 858, 28, 858, 1342
+ ]
+ }
+ ],
+ "language": "en"
+}
diff --git a/examples/outetts/speakers/gordon_ramsay.json b/examples/outetts/speakers/gordon_ramsay.json
new file mode 100644
index 000000000..f20208776
--- /dev/null
+++ b/examples/outetts/speakers/gordon_ramsay.json
@@ -0,0 +1,1229 @@
+{
+ "text": " because it made me ignite. I could see this incredible sort of beginning of something unique. And I think when we look at individuals, figureheads in our lives, and we want to aspire to become them.",
+ "words": [
+ {
+ "word": "because",
+ "duration": 0.63,
+ "codes": [
+ 257,
+ 704,
+ 765,
+ 1011,
+ 692,
+ 94,
+ 544,
+ 1163,
+ 400,
+ 806,
+ 769,
+ 455,
+ 183,
+ 1699,
+ 5,
+ 1334,
+ 1713,
+ 1212,
+ 375,
+ 1797,
+ 901,
+ 961,
+ 1400,
+ 88,
+ 1825,
+ 212,
+ 1790,
+ 248,
+ 1700,
+ 705,
+ 1056,
+ 1502,
+ 422,
+ 1763,
+ 142,
+ 1710,
+ 376,
+ 521,
+ 1655,
+ 57,
+ 416,
+ 22,
+ 273,
+ 6,
+ 1679,
+ 323,
+ 440
+ ]
+ },
+ {
+ "word": "it",
+ "duration": 0.28,
+ "codes": [
+ 1400,
+ 1166,
+ 1065,
+ 933,
+ 509,
+ 313,
+ 544,
+ 258,
+ 669,
+ 154,
+ 1245,
+ 581,
+ 102,
+ 1726,
+ 11,
+ 39,
+ 1492,
+ 1217,
+ 1456,
+ 976,
+ 311
+ ]
+ },
+ {
+ "word": "made",
+ "duration": 0.41,
+ "codes": [
+ 351,
+ 16,
+ 23,
+ 1346,
+ 1775,
+ 1784,
+ 455,
+ 1553,
+ 382,
+ 742,
+ 719,
+ 932,
+ 76,
+ 1303,
+ 215,
+ 318,
+ 303,
+ 769,
+ 937,
+ 296,
+ 697,
+ 1642,
+ 407,
+ 1537,
+ 296,
+ 1456,
+ 1712,
+ 56,
+ 900,
+ 1444,
+ 954
+ ]
+ },
+ {
+ "word": "me",
+ "duration": 0.15,
+ "codes": [
+ 363,
+ 806,
+ 193,
+ 647,
+ 1120,
+ 1396,
+ 236,
+ 914,
+ 806,
+ 230,
+ 1621
+ ]
+ },
+ {
+ "word": "ignite",
+ "duration": 1.44,
+ "codes": [
+ 113,
+ 527,
+ 1407,
+ 295,
+ 1039,
+ 13,
+ 335,
+ 795,
+ 57,
+ 510,
+ 649,
+ 1427,
+ 46,
+ 1649,
+ 1730,
+ 1384,
+ 70,
+ 1775,
+ 1710,
+ 1621,
+ 1775,
+ 166,
+ 1815,
+ 1756,
+ 1349,
+ 1092,
+ 415,
+ 1600,
+ 1629,
+ 867,
+ 1251,
+ 725,
+ 884,
+ 1092,
+ 1485,
+ 1512,
+ 331,
+ 257,
+ 3,
+ 417,
+ 1011,
+ 54,
+ 766,
+ 696,
+ 360,
+ 1284,
+ 360,
+ 766,
+ 387,
+ 287,
+ 325,
+ 696,
+ 509,
+ 325,
+ 391,
+ 1244,
+ 692,
+ 606,
+ 1011,
+ 765,
+ 257,
+ 274,
+ 360,
+ 1244,
+ 285,
+ 288,
+ 1728,
+ 1113,
+ 62,
+ 321,
+ 11,
+ 1025,
+ 1778,
+ 781,
+ 970,
+ 1543,
+ 1806,
+ 1509,
+ 1422,
+ 770,
+ 1145,
+ 98,
+ 510,
+ 93,
+ 656,
+ 106,
+ 738,
+ 1483,
+ 1356,
+ 1042,
+ 969,
+ 127,
+ 557,
+ 1361,
+ 658,
+ 1645,
+ 1002,
+ 582,
+ 1321,
+ 1730,
+ 1319,
+ 31,
+ 285,
+ 509,
+ 778,
+ 50,
+ 1270,
+ 350
+ ]
+ },
+ {
+ "word": "i",
+ "duration": 0.36,
+ "codes": [
+ 1436,
+ 533,
+ 581,
+ 1637,
+ 746,
+ 872,
+ 392,
+ 919,
+ 578,
+ 561,
+ 351,
+ 241,
+ 1474,
+ 1285,
+ 805,
+ 1242,
+ 288,
+ 669,
+ 360,
+ 23,
+ 343,
+ 1438,
+ 1229,
+ 1113,
+ 179,
+ 231,
+ 1620
+ ]
+ },
+ {
+ "word": "could",
+ "duration": 0.17,
+ "codes": [
+ 1468,
+ 1510,
+ 1266,
+ 689,
+ 1283,
+ 1668,
+ 1041,
+ 695,
+ 1825,
+ 925,
+ 1750,
+ 778,
+ 554
+ ]
+ },
+ {
+ "word": "see",
+ "duration": 0.19,
+ "codes": [
+ 533,
+ 350,
+ 1238,
+ 507,
+ 10,
+ 750,
+ 383,
+ 1324,
+ 36,
+ 1415,
+ 323,
+ 789,
+ 605,
+ 127
+ ]
+ },
+ {
+ "word": "this",
+ "duration": 0.17,
+ "codes": [
+ 678,
+ 275,
+ 562,
+ 1393,
+ 1407,
+ 665,
+ 1804,
+ 1004,
+ 4,
+ 1542,
+ 812,
+ 556,
+ 344
+ ]
+ },
+ {
+ "word": "incredible",
+ "duration": 1.19,
+ "codes": [
+ 24,
+ 400,
+ 1094,
+ 1642,
+ 391,
+ 465,
+ 445,
+ 765,
+ 1011,
+ 265,
+ 1244,
+ 766,
+ 487,
+ 325,
+ 54,
+ 704,
+ 274,
+ 287,
+ 862,
+ 765,
+ 257,
+ 325,
+ 360,
+ 1244,
+ 899,
+ 287,
+ 1284,
+ 765,
+ 765,
+ 1016,
+ 287,
+ 686,
+ 766,
+ 606,
+ 325,
+ 351,
+ 351,
+ 141,
+ 1471,
+ 440,
+ 254,
+ 395,
+ 1552,
+ 1488,
+ 110,
+ 923,
+ 1341,
+ 1721,
+ 12,
+ 1114,
+ 1259,
+ 1110,
+ 573,
+ 1229,
+ 1457,
+ 375,
+ 595,
+ 1160,
+ 1300,
+ 1392,
+ 955,
+ 388,
+ 84,
+ 705,
+ 993,
+ 1093,
+ 859,
+ 1354,
+ 545,
+ 1119,
+ 1823,
+ 120,
+ 343,
+ 830,
+ 1239,
+ 476,
+ 1381,
+ 1341,
+ 1224,
+ 894,
+ 1581,
+ 754,
+ 1540,
+ 1467,
+ 946,
+ 1440,
+ 250,
+ 988,
+ 1619
+ ]
+ },
+ {
+ "word": "sort",
+ "duration": 0.92,
+ "codes": [
+ 1266,
+ 893,
+ 1236,
+ 1246,
+ 1315,
+ 505,
+ 131,
+ 1236,
+ 1255,
+ 692,
+ 692,
+ 585,
+ 75,
+ 1288,
+ 1064,
+ 131,
+ 1064,
+ 1067,
+ 351,
+ 1031,
+ 1114,
+ 950,
+ 1031,
+ 567,
+ 1104,
+ 1130,
+ 387,
+ 872,
+ 1205,
+ 1707,
+ 872,
+ 274,
+ 1059,
+ 1059,
+ 795,
+ 1117,
+ 724,
+ 762,
+ 1007,
+ 391,
+ 766,
+ 710,
+ 54,
+ 1244,
+ 445,
+ 313,
+ 930,
+ 333,
+ 778,
+ 596,
+ 1287,
+ 645,
+ 430,
+ 1061,
+ 543,
+ 656,
+ 919,
+ 596,
+ 1229,
+ 965,
+ 847,
+ 553,
+ 1579,
+ 352,
+ 1007,
+ 738,
+ 1466,
+ 449,
+ 607
+ ]
+ },
+ {
+ "word": "of",
+ "duration": 0.12,
+ "codes": [
+ 146,
+ 752,
+ 1502,
+ 972,
+ 1437,
+ 832,
+ 671,
+ 533,
+ 1694
+ ]
+ },
+ {
+ "word": "beginning",
+ "duration": 1.48,
+ "codes": [
+ 1653,
+ 867,
+ 1242,
+ 1617,
+ 1085,
+ 1016,
+ 943,
+ 1375,
+ 979,
+ 800,
+ 1701,
+ 985,
+ 1411,
+ 1496,
+ 715,
+ 1565,
+ 1519,
+ 1175,
+ 1749,
+ 1370,
+ 1476,
+ 1762,
+ 1543,
+ 490,
+ 54,
+ 281,
+ 311,
+ 1244,
+ 696,
+ 765,
+ 1284,
+ 360,
+ 975,
+ 387,
+ 765,
+ 1011,
+ 606,
+ 544,
+ 862,
+ 445,
+ 704,
+ 287,
+ 391,
+ 1752,
+ 933,
+ 1639,
+ 993,
+ 751,
+ 1091,
+ 1130,
+ 1418,
+ 347,
+ 1279,
+ 1379,
+ 1031,
+ 1652,
+ 1170,
+ 1448,
+ 1033,
+ 1130,
+ 265,
+ 606,
+ 975,
+ 360,
+ 1016,
+ 986,
+ 401,
+ 1016,
+ 606,
+ 704,
+ 692,
+ 287,
+ 862,
+ 606,
+ 257,
+ 1244,
+ 287,
+ 862,
+ 766,
+ 287,
+ 325,
+ 241,
+ 360,
+ 54,
+ 1261,
+ 364,
+ 254,
+ 631,
+ 312,
+ 677,
+ 845,
+ 1595,
+ 780,
+ 782,
+ 521,
+ 502,
+ 657,
+ 157,
+ 1387,
+ 1103,
+ 1329,
+ 1505,
+ 521,
+ 202,
+ 1607,
+ 759,
+ 536,
+ 1589,
+ 1230,
+ 663,
+ 1745
+ ]
+ },
+ {
+ "word": "of",
+ "duration": 0.12,
+ "codes": [
+ 1408,
+ 550,
+ 1462,
+ 1630,
+ 610,
+ 1767,
+ 343,
+ 1585,
+ 911
+ ]
+ },
+ {
+ "word": "something",
+ "duration": 0.29,
+ "codes": [
+ 1354,
+ 350,
+ 178,
+ 636,
+ 1434,
+ 53,
+ 813,
+ 791,
+ 269,
+ 753,
+ 31,
+ 1014,
+ 630,
+ 59,
+ 1768,
+ 362,
+ 1344,
+ 375,
+ 1773,
+ 577,
+ 1512,
+ 1526
+ ]
+ },
+ {
+ "word": "unique",
+ "duration": 0.4,
+ "codes": [
+ 1642,
+ 1568,
+ 1715,
+ 276,
+ 1763,
+ 236,
+ 178,
+ 1469,
+ 1463,
+ 257,
+ 277,
+ 520,
+ 822,
+ 1139,
+ 1224,
+ 473,
+ 1376,
+ 1499,
+ 1545,
+ 233,
+ 1799,
+ 325,
+ 1713,
+ 458,
+ 1551,
+ 1571,
+ 1011,
+ 340,
+ 1623,
+ 1034
+ ]
+ },
+ {
+ "word": "and",
+ "duration": 0.67,
+ "codes": [
+ 943,
+ 1114,
+ 383,
+ 1432,
+ 672,
+ 989,
+ 360,
+ 544,
+ 606,
+ 509,
+ 1474,
+ 606,
+ 766,
+ 1244,
+ 391,
+ 1244,
+ 54,
+ 1128,
+ 1134,
+ 603,
+ 1379,
+ 1458,
+ 866,
+ 1074,
+ 1007,
+ 1214,
+ 1355,
+ 1601,
+ 1074,
+ 1752,
+ 995,
+ 872,
+ 1601,
+ 1215,
+ 1496,
+ 710,
+ 1011,
+ 287,
+ 509,
+ 766,
+ 360,
+ 257,
+ 851,
+ 20,
+ 1825,
+ 951,
+ 791,
+ 84,
+ 1827,
+ 65
+ ]
+ },
+ {
+ "word": "i",
+ "duration": 0.05,
+ "codes": [
+ 616,
+ 1139,
+ 376,
+ 1634
+ ]
+ },
+ {
+ "word": "think",
+ "duration": 0.19,
+ "codes": [
+ 1626,
+ 1569,
+ 1781,
+ 874,
+ 422,
+ 189,
+ 63,
+ 1791,
+ 914,
+ 1263,
+ 804,
+ 1550,
+ 1285,
+ 1660
+ ]
+ },
+ {
+ "word": "when",
+ "duration": 0.2,
+ "codes": [
+ 1248,
+ 612,
+ 925,
+ 696,
+ 469,
+ 1595,
+ 646,
+ 1811,
+ 757,
+ 1500,
+ 252,
+ 1748,
+ 616,
+ 1250,
+ 302
+ ]
+ },
+ {
+ "word": "we",
+ "duration": 0.09,
+ "codes": [
+ 1383,
+ 877,
+ 1310,
+ 1819,
+ 138,
+ 1383,
+ 110
+ ]
+ },
+ {
+ "word": "look",
+ "duration": 0.28,
+ "codes": [
+ 619,
+ 1596,
+ 648,
+ 933,
+ 1056,
+ 1563,
+ 442,
+ 1653,
+ 985,
+ 1684,
+ 774,
+ 1492,
+ 1633,
+ 553,
+ 1473,
+ 768,
+ 831,
+ 1788,
+ 1348,
+ 712,
+ 872
+ ]
+ },
+ {
+ "word": "at",
+ "duration": 0.2,
+ "codes": [
+ 1747,
+ 383,
+ 1270,
+ 1556,
+ 142,
+ 1833,
+ 212,
+ 1078,
+ 1133,
+ 756,
+ 79,
+ 1333,
+ 1456,
+ 1526,
+ 1327
+ ]
+ },
+ {
+ "word": "individuals",
+ "duration": 0.75,
+ "codes": [
+ 21,
+ 423,
+ 686,
+ 265,
+ 1244,
+ 509,
+ 765,
+ 1284,
+ 54,
+ 766,
+ 765,
+ 285,
+ 281,
+ 505,
+ 1215,
+ 1450,
+ 300,
+ 730,
+ 812,
+ 55,
+ 1805,
+ 1326,
+ 64,
+ 1460,
+ 522,
+ 1832,
+ 83,
+ 1510,
+ 1081,
+ 254,
+ 1650,
+ 2,
+ 1765,
+ 1473,
+ 668,
+ 1783,
+ 14,
+ 330,
+ 1494,
+ 1324,
+ 644,
+ 1075,
+ 1591,
+ 174,
+ 814,
+ 1715,
+ 921,
+ 1084,
+ 752,
+ 1093,
+ 1678,
+ 1790,
+ 1561,
+ 335,
+ 1748,
+ 404
+ ]
+ },
+ {
+ "word": "figureheads",
+ "duration": 0.61,
+ "codes": [
+ 1082,
+ 1290,
+ 10,
+ 1436,
+ 731,
+ 780,
+ 865,
+ 802,
+ 1720,
+ 1525,
+ 1248,
+ 272,
+ 436,
+ 744,
+ 136,
+ 584,
+ 209,
+ 504,
+ 1151,
+ 10,
+ 1161,
+ 492,
+ 498,
+ 1410,
+ 200,
+ 201,
+ 1048,
+ 722,
+ 1683,
+ 1228,
+ 1141,
+ 870,
+ 949,
+ 756,
+ 933,
+ 236,
+ 827,
+ 1505,
+ 1044,
+ 592,
+ 1778,
+ 1071,
+ 436,
+ 1603,
+ 273,
+ 166
+ ]
+ },
+ {
+ "word": "in",
+ "duration": 0.08,
+ "codes": [
+ 1731,
+ 1212,
+ 83,
+ 1259,
+ 921,
+ 1542
+ ]
+ },
+ {
+ "word": "our",
+ "duration": 0.11,
+ "codes": [
+ 1674,
+ 1384,
+ 1256,
+ 1190,
+ 1149,
+ 903,
+ 320,
+ 1127
+ ]
+ },
+ {
+ "word": "lives",
+ "duration": 0.33,
+ "codes": [
+ 1697,
+ 1497,
+ 205,
+ 466,
+ 1378,
+ 212,
+ 1097,
+ 73,
+ 1771,
+ 697,
+ 1402,
+ 446,
+ 1622,
+ 364,
+ 1278,
+ 1740,
+ 1429,
+ 231,
+ 1650,
+ 995,
+ 1510,
+ 1802,
+ 340,
+ 782,
+ 817
+ ]
+ },
+ {
+ "word": "and",
+ "duration": 0.11,
+ "codes": [
+ 62,
+ 1724,
+ 1119,
+ 1183,
+ 1224,
+ 774,
+ 1727,
+ 331
+ ]
+ },
+ {
+ "word": "we",
+ "duration": 0.05,
+ "codes": [
+ 598,
+ 1139,
+ 1388,
+ 547
+ ]
+ },
+ {
+ "word": "want",
+ "duration": 0.15,
+ "codes": [
+ 458,
+ 191,
+ 1670,
+ 1697,
+ 437,
+ 1267,
+ 248,
+ 1079,
+ 647,
+ 836,
+ 1634
+ ]
+ },
+ {
+ "word": "to",
+ "duration": 0.05,
+ "codes": [
+ 60,
+ 1494,
+ 1253,
+ 809
+ ]
+ },
+ {
+ "word": "aspire",
+ "duration": 0.59,
+ "codes": [
+ 1426,
+ 1052,
+ 1252,
+ 858,
+ 1497,
+ 1281,
+ 1423,
+ 1167,
+ 1155,
+ 901,
+ 1743,
+ 1118,
+ 1479,
+ 986,
+ 551,
+ 23,
+ 1754,
+ 1206,
+ 1174,
+ 1308,
+ 1300,
+ 1374,
+ 507,
+ 96,
+ 546,
+ 1,
+ 1479,
+ 16,
+ 501,
+ 824,
+ 1258,
+ 723,
+ 267,
+ 1731,
+ 210,
+ 1785,
+ 321,
+ 1361,
+ 789,
+ 474,
+ 1716,
+ 152,
+ 609,
+ 1806
+ ]
+ },
+ {
+ "word": "to",
+ "duration": 0.08,
+ "codes": [
+ 1352,
+ 823,
+ 1532,
+ 889,
+ 1721,
+ 118
+ ]
+ },
+ {
+ "word": "become",
+ "duration": 0.36,
+ "codes": [
+ 1343,
+ 861,
+ 1356,
+ 1513,
+ 269,
+ 870,
+ 1642,
+ 1467,
+ 1085,
+ 712,
+ 318,
+ 870,
+ 1459,
+ 901,
+ 83,
+ 1680,
+ 764,
+ 159,
+ 825,
+ 1126,
+ 1075,
+ 312,
+ 363,
+ 795,
+ 1809,
+ 503,
+ 1578
+ ]
+ },
+ {
+ "word": "them",
+ "duration": 0.16,
+ "codes": [
+ 1263,
+ 27,
+ 1584,
+ 1829,
+ 1077,
+ 1439,
+ 1340,
+ 1817,
+ 1671,
+ 1732,
+ 1684,
+ 1614
+ ]
+ }
+ ],
+ "language": "en"
+}
\ No newline at end of file
diff --git a/examples/outetts/speakers/ja_female_1.json b/examples/outetts/speakers/ja_female_1.json
new file mode 100644
index 000000000..644705926
--- /dev/null
+++ b/examples/outetts/speakers/ja_female_1.json
@@ -0,0 +1,190 @@
+{
+ "text": "\u4e0b\u30cd\u30bf\u3001\u3053\u308c\u306f\u5c11\u3057\u30a8\u30c3\u30c1\u306a\u3053\u3068\u3068\u304b\u5c11\u3057\u4e0b\u54c1\u306a\u3053\u3068\u3067\u3059\u306d\u3002\u5f7c\u3089\u3067\u3059\u304b\u3089\u7537\u306e\u4eba\u3067\u3059\u306d\u3002",
+ "words": [
+ {
+ "word": "xianeta",
+ "duration": 0.75,
+ "codes": [
+ 651, 343, 975, 1703, 1561, 1250, 1527, 176, 1568, 1450, 1287, 1375, 1238, 485, 416, 1435, 1634, 521,
+ 1068, 377, 1457, 662, 265, 158, 60, 149, 1424, 1810, 775, 1417, 1172, 326, 1278, 961, 1753, 905, 1509,
+ 1038, 1124, 29, 1403, 1265, 910, 182, 1447, 971, 1003, 1181, 630, 1719, 841, 610, 142, 110, 1177, 1044
+ ]
+ },
+ {
+ "word": "kore",
+ "duration": 0.65,
+ "codes": [
+ 697, 841, 1429, 723, 1304, 1121, 1276, 1123, 915, 1505, 693, 1162, 338, 717, 1328, 674, 1367, 1227, 334,
+ 1309, 651, 964, 1223, 445, 1108, 334, 612, 751, 31, 458, 651, 223, 670, 331, 651, 1697, 1219, 1159, 149,
+ 152, 523, 794, 807, 995, 934, 1205, 408, 950, 868
+ ]
+ },
+ {
+ "word": "ha",
+ "duration": 0.19,
+ "codes": [516, 794, 1198, 1430, 1389, 261, 981, 1289, 632, 1137, 1136, 951, 1277, 446]
+ },
+ {
+ "word": "shaoshi",
+ "duration": 0.85,
+ "codes": [
+ 1076, 847, 1368, 1479, 1758, 761, 1043, 1076, 169, 1162, 845, 967, 1486, 331, 1121, 1071, 169, 1071,
+ 538, 673, 627, 445, 555, 542, 1669, 1617, 1602, 1681, 1568, 1602, 1329, 863, 1439, 1495, 1523, 1244,
+ 1461, 696, 1739, 41, 978, 1058, 1156, 1189, 55, 753, 1815, 1738, 1238, 749, 1059, 416, 22, 1638, 749, 0,
+ 636, 176, 1600, 1690, 1431, 1774, 1172, 1172
+ ]
+ },
+ {
+ "word": "etchi",
+ "duration": 0.67,
+ "codes": [
+ 1492, 1344, 1761, 707, 65, 1205, 996, 1318, 1620, 1639, 879, 1432, 1255, 717, 1731, 287, 936, 1346, 337,
+ 1023, 287, 1437, 642, 1421, 110, 826, 1492, 1075, 1753, 1710, 1741, 1649, 1719, 1154, 1771, 591, 1289,
+ 1553, 601, 681, 859, 950, 1734, 634, 415, 4, 1710, 973, 1045, 1341
+ ]
+ },
+ {
+ "word": "na",
+ "duration": 0.12,
+ "codes": [945, 905, 1335, 1525, 1392, 321, 1216, 1172, 1264]
+ },
+ {
+ "word": "koto",
+ "duration": 0.37,
+ "codes": [
+ 658, 755, 1112, 693, 1344, 1006, 301, 1505, 486, 1448, 1655, 744, 818, 699, 1330, 510, 1487, 457, 1199,
+ 1373, 447, 1144, 606, 1824, 1253, 321, 443, 238
+ ]
+ },
+ {
+ "word": "to",
+ "duration": 0.51,
+ "codes": [
+ 1257, 687, 723, 1096, 1443, 1092, 1199, 1277, 526, 1199, 845, 1038, 1311, 68, 1065, 744, 601, 1433, 612,
+ 651, 1023, 316, 1163, 458, 275, 1248, 68, 633, 923, 601, 566, 925, 1725, 1743, 907, 93, 188, 955
+ ]
+ },
+ {
+ "word": "ka",
+ "duration": 0.37,
+ "codes": [
+ 1494, 91, 681, 1464, 526, 1065, 381, 1080, 1651, 1402, 1065, 271, 916, 1097, 1222, 1180, 1222, 1206,
+ 1112, 997, 984, 628, 1038, 408, 971, 446, 1044, 1306
+ ]
+ },
+ {
+ "word": "shaoshi",
+ "duration": 1.03,
+ "codes": [
+ 142, 813, 973, 303, 811, 1517, 565, 1258, 1291, 1204, 1138, 1181, 1666, 193, 1162, 568, 674, 1347, 331,
+ 1108, 1333, 334, 1397, 1683, 630, 1226, 727, 869, 1438, 566, 1223, 712, 670, 301, 21, 1080, 686, 101,
+ 1250, 1552, 1641, 1329, 183, 1403, 1225, 85, 1010, 686, 1514, 680, 763, 676, 1726, 1234, 717, 93, 31,
+ 1015, 396, 1457, 1100, 867, 428, 1642, 1600, 1580, 1519, 1524, 1711, 1638, 368, 1568, 867, 1753, 1179,
+ 1645, 1317
+ ]
+ },
+ {
+ "word": "xiapin",
+ "duration": 0.84,
+ "codes": [
+ 1726, 416, 1475, 286, 1372, 663, 1783, 572, 936, 1684, 351, 523, 1550, 458, 936, 1572, 337, 534, 1299,
+ 223, 1157, 633, 316, 1413, 538, 566, 591, 401, 1354, 1287, 1636, 252, 143, 1647, 473, 890, 828, 935,
+ 1633, 1278, 1651, 319, 1438, 1653, 1470, 1654, 634, 1259, 1528, 1450, 1513, 1641, 1424, 127, 679, 1209,
+ 310, 508, 1224, 1509, 1281, 1427, 594
+ ]
+ },
+ {
+ "word": "na",
+ "duration": 0.17,
+ "codes": [1497, 1103, 1133, 1401, 594, 1341, 1601, 1112, 95, 1220, 743, 441, 1438]
+ },
+ {
+ "word": "koto",
+ "duration": 0.36,
+ "codes": [
+ 545, 1067, 931, 160, 1486, 1767, 762, 1745, 594, 1499, 529, 73, 709, 1457, 940, 17, 1185, 1762, 378,
+ 1181, 505, 1654, 700, 424, 883, 319, 868
+ ]
+ },
+ {
+ "word": "desu",
+ "duration": 0.55,
+ "codes": [
+ 489, 1052, 1075, 1347, 169, 1486, 670, 1148, 1464, 171, 1260, 923, 54, 1071, 538, 624, 1373, 54, 651,
+ 486, 285, 923, 1677, 957, 1502, 1083, 1152, 1027, 1785, 1776, 1524, 1522, 1752, 1603, 1634, 1677, 1586,
+ 1724, 1556, 1273, 1060
+ ]
+ },
+ {
+ "word": "ne",
+ "duration": 0.17,
+ "codes": [729, 1800, 338, 1772, 987, 449, 1624, 1761, 774, 678, 789, 376, 303]
+ },
+ {
+ "word": "bi",
+ "duration": 1.31,
+ "codes": [
+ 616, 1083, 1187, 1511, 1057, 1784, 1494, 240, 1291, 1079, 337, 1505, 566, 301, 1499, 445, 717, 1251,
+ 285, 735, 727, 168, 891, 287, 54, 1346, 334, 121, 933, 566, 1721, 1338, 534, 1176, 54, 673, 785, 464,
+ 696, 933, 331, 933, 31, 275, 939, 336, 676, 601, 469, 686, 1428, 824, 1362, 1461, 710, 1037, 490, 724,
+ 680, 316, 1142, 612, 981, 727, 464, 1233, 1080, 458, 676, 171, 538, 287, 484, 585, 31, 643, 54, 31, 950,
+ 603, 445, 735, 1820, 1278, 1769, 1117, 1720, 697, 1108, 441, 1621, 973, 603, 609, 879, 407, 708, 823
+ ]
+ },
+ {
+ "word": "ra",
+ "duration": 0.19,
+ "codes": [524, 1187, 1028, 1310, 1087, 1015, 1804, 1052, 847, 1423, 565, 660, 1103, 393]
+ },
+ {
+ "word": "desu",
+ "duration": 0.48,
+ "codes": [
+ 1356, 874, 955, 1384, 1195, 1438, 727, 1201, 1289, 5, 1195, 776, 1271, 1414, 336, 1121, 824, 1658, 700,
+ 492, 926, 1441, 1216, 1671, 1728, 1731, 1729, 1759, 1714, 1602, 1360, 1649, 1577, 1720, 1511, 377
+ ]
+ },
+ {
+ "word": "kara",
+ "duration": 0.25,
+ "codes": [
+ 1154, 1725, 151, 555, 1738, 1511, 466, 294, 1140, 349, 993, 1005, 777, 494, 1462, 499, 797, 754, 874
+ ]
+ },
+ {
+ "word": "nan",
+ "duration": 0.32,
+ "codes": [
+ 386, 970, 1258, 1258, 891, 1328, 1251, 627, 1318, 1045, 361, 9, 1190, 239, 1534, 338, 1486, 420, 121,
+ 1308, 1307, 300, 82, 724
+ ]
+ },
+ {
+ "word": "no",
+ "duration": 0.27,
+ "codes": [
+ 1245, 381, 1108, 1023, 555, 1111, 1033, 115, 1052, 1214, 408, 322, 885, 1022, 747, 724, 985, 545, 1142,
+ 535
+ ]
+ },
+ {
+ "word": "ren",
+ "duration": 0.2,
+ "codes": [1055, 666, 642, 603, 1468, 1598, 1412, 1091, 1519, 1820, 121, 334, 1233, 1338, 1141]
+ },
+ {
+ "word": "desu",
+ "duration": 0.36,
+ "codes": [
+ 1105, 1191, 962, 115, 1716, 1410, 1799, 663, 1745, 1211, 1187, 1411, 1121, 1486, 1784, 1065, 1820, 450,
+ 1739, 1691, 1578, 1738, 1685, 16, 1478, 987, 405
+ ]
+ },
+ {
+ "word": "ne",
+ "duration": 0.13,
+ "codes": [1486, 1240, 77, 1504, 452, 1429, 1353, 1204, 1424, 1331]
+ }
+ ],
+ "language": "ja"
+}
diff --git a/examples/outetts/speakers/ja_female_2.json b/examples/outetts/speakers/ja_female_2.json
new file mode 100644
index 000000000..1b3f5775d
--- /dev/null
+++ b/examples/outetts/speakers/ja_female_2.json
@@ -0,0 +1,266 @@
+{
+ "text": "\u306a\u306e\u306b\u5f1f\u304c\u90aa\u9b54\u3092\u3057\u3066\u304d\u307e\u3059\u3002\u306d\u3048\u3001\u304a\u5144\u3061\u3083\u3093\u904a\u307c\u3046\u3088\u3002\u306d\u3048\u3001\u304a\u5144\u3061\u3083\u3093\u51fa\u304b\u3051\u3088\u3046\u3088\u3002\u90aa\u9b54\u3092\u3057\u3066\u304d\u307e\u3059\u3002\u306a\u306e\u3067\u3001\u5168\u7136\u96c6\u4e2d\u304c\u3067\u304d\u307e\u305b\u3093\u3002",
+ "words": [
+ {
+ "word": "na",
+ "duration": 0.19,
+ "codes": [1176, 566, 278, 584, 336, 394, 601, 1403, 269, 961, 1347, 1313, 466, 1352]
+ },
+ {
+ "word": "no",
+ "duration": 0.28,
+ "codes": [
+ 1204, 1007, 637, 1502, 984, 1264, 1690, 1240, 1078, 1316, 1051, 1227, 1780, 1509, 1670, 1767, 642, 1318,
+ 978, 1191, 1065
+ ]
+ },
+ {
+ "word": "ni",
+ "duration": 0.32,
+ "codes": [
+ 1489, 794, 1506, 80, 523, 565, 1656, 169, 565, 287, 1047, 226, 1743, 706, 1244, 532, 1010, 1698, 759,
+ 1148, 1731, 549, 1393, 1494
+ ]
+ },
+ {
+ "word": "di",
+ "duration": 0.81,
+ "codes": [
+ 1462, 1200, 1743, 915, 1317, 1468, 1675, 774, 1808, 1051, 1280, 1162, 301, 153, 1777, 355, 1766, 1043,
+ 1793, 1067, 259, 1780, 486, 490, 1640, 316, 169, 1408, 171, 633, 799, 336, 692, 601, 458, 981, 31, 805,
+ 603, 209, 673, 638, 690, 516, 474, 894, 1754, 1075, 830, 923, 32, 523, 1277, 27, 287, 1679, 1075, 446,
+ 149, 152, 449
+ ]
+ },
+ {
+ "word": "ga",
+ "duration": 0.55,
+ "codes": [
+ 13, 470, 321, 259, 83, 526, 95, 1042, 526, 186, 883, 1037, 1388, 396, 1199, 136, 405, 1684, 377, 1470,
+ 1308, 997, 1137, 236, 630, 466, 1063, 1038, 768, 953, 1142, 1575, 1003, 1447, 1656, 306, 826, 1178, 254,
+ 1202, 879
+ ]
+ },
+ {
+ "word": "xiemo",
+ "duration": 0.64,
+ "codes": [
+ 1256, 324, 9, 1207, 1046, 1259, 658, 1647, 1196, 1194, 1179, 1200, 1264, 1059, 595, 1245, 1356, 1132,
+ 1307, 1265, 1265, 1367, 858, 1811, 1070, 1484, 78, 42, 1711, 34, 362, 1821, 1261, 82, 809, 892, 1267,
+ 1497, 1156, 935, 1792, 385, 935, 1368, 728, 724, 339, 729
+ ]
+ },
+ {
+ "word": "o",
+ "duration": 0.12,
+ "codes": [1180, 994, 1324, 1189, 1136, 1137, 521, 1207, 1003]
+ },
+ {
+ "word": "shi",
+ "duration": 0.13,
+ "codes": [565, 1435, 1144, 319, 1599, 1400, 1296, 1818, 1739, 1595]
+ },
+ {
+ "word": "te",
+ "duration": 0.08,
+ "codes": [933, 1746, 1543, 910, 1297, 1328]
+ },
+ {
+ "word": "ki",
+ "duration": 0.16,
+ "codes": [104, 1154, 416, 86, 1096, 1418, 168, 1832, 1552, 1686, 495, 660]
+ },
+ {
+ "word": "masu",
+ "duration": 0.39,
+ "codes": [
+ 781, 907, 316, 1235, 66, 1303, 904, 1188, 1124, 1171, 978, 791, 1213, 791, 1181, 565, 1137, 1226, 741,
+ 229, 1268, 1753, 1548, 719, 1615, 1821, 1466, 1413, 1038
+ ]
+ },
+ {
+ "word": "nee",
+ "duration": 0.24,
+ "codes": [1421, 458, 1123, 1142, 360, 1346, 771, 548, 1807, 1323, 143, 1280, 1045, 852, 947, 777, 1471, 781]
+ },
+ {
+ "word": "o",
+ "duration": 0.01,
+ "codes": [764]
+ },
+ {
+ "word": "xiong",
+ "duration": 0.35,
+ "codes": [
+ 1055, 1360, 979, 660, 1179, 1028, 628, 963, 1411, 829, 669, 1069, 681, 655, 1097, 1714, 1043, 1492,
+ 1136, 677, 1066, 1818, 1429, 1389, 1788, 241
+ ]
+ },
+ {
+ "word": "chan",
+ "duration": 0.24,
+ "codes": [
+ 1762, 1602, 101, 1374, 1431, 1132, 1309, 1203, 1178, 1197, 1477, 1105, 1063, 1204, 1391, 902, 206, 1816
+ ]
+ },
+ {
+ "word": "youbou",
+ "duration": 0.48,
+ "codes": [
+ 1095, 1106, 1041, 1351, 729, 1137, 1468, 1619, 1735, 1773, 274, 1749, 1454, 1192, 19, 248, 1668, 1195,
+ 1295, 1771, 490, 266, 1453, 1013, 774, 1232, 839, 217, 1280, 973, 1324, 942, 798, 969, 1108, 818
+ ]
+ },
+ {
+ "word": "yo",
+ "duration": 0.21,
+ "codes": [903, 1277, 1332, 816, 743, 1254, 1308, 1633, 242, 1265, 1141, 1411, 829, 1447, 829, 1399]
+ },
+ {
+ "word": "nee",
+ "duration": 0.79,
+ "codes": [
+ 1192, 13, 1195, 910, 983, 1275, 123, 1314, 424, 1746, 569, 1417, 1172, 705, 13, 1202, 1154, 446, 792,
+ 1483, 1315, 627, 1317, 226, 1193, 1505, 396, 1331, 1144, 548, 1259, 674, 1489, 1489, 674, 1395, 772,
+ 1079, 1259, 630, 1226, 924, 601, 1770, 360, 490, 387, 119, 954, 40, 1179, 1468, 1090, 1351, 1090, 1232,
+ 1426, 1276, 1423
+ ]
+ },
+ {
+ "word": "o",
+ "duration": 0.01,
+ "codes": [1089]
+ },
+ {
+ "word": "xiong",
+ "duration": 0.28,
+ "codes": [
+ 1051, 1208, 1309, 1651, 1498, 1499, 1210, 1193, 1131, 1645, 1233, 887, 995, 1090, 789, 1349, 1100, 1583,
+ 829, 976, 1096
+ ]
+ },
+ {
+ "word": "chan",
+ "duration": 0.2,
+ "codes": [1092, 1474, 379, 1588, 1753, 1066, 19, 1184, 755, 1097, 1195, 1229, 1333, 1165, 1738]
+ },
+ {
+ "word": "chukakeyou",
+ "duration": 0.72,
+ "codes": [
+ 1451, 1526, 594, 89, 1374, 1784, 1055, 1477, 1090, 202, 886, 405, 1622, 1763, 624, 1761, 1144, 1122,
+ 1254, 1203, 79, 1332, 661, 1038, 1745, 1239, 717, 1279, 526, 1827, 1478, 1042, 793, 386, 1685, 1153,
+ 825, 640, 887, 1215, 632, 1230, 829, 1735, 965, 1421, 998, 1803, 839, 1745, 839, 1770, 545, 964
+ ]
+ },
+ {
+ "word": "yo",
+ "duration": 0.23,
+ "codes": [1107, 1252, 459, 1227, 966, 175, 1108, 1407, 1500, 1265, 1492, 661, 1223, 95, 1487, 918, 359]
+ },
+ {
+ "word": "xiemo",
+ "duration": 0.76,
+ "codes": [
+ 1304, 1297, 411, 967, 1196, 730, 1065, 1106, 1119, 775, 825, 814, 1378, 916, 548, 41, 921, 1414, 1352,
+ 338, 1277, 275, 717, 1414, 612, 1123, 1092, 674, 1251, 285, 486, 659, 241, 1321, 151, 765, 0, 1529,
+ 1832, 1045, 1256, 1320, 1100, 353, 1314, 1817, 1392, 111, 1213, 1510, 830, 1218, 327, 1131, 1126, 1682,
+ 1113
+ ]
+ },
+ {
+ "word": "o",
+ "duration": 0.16,
+ "codes": [580, 353, 1265, 1298, 1671, 1022, 721, 394, 982, 1690, 1803, 1324]
+ },
+ {
+ "word": "shi",
+ "duration": 0.16,
+ "codes": [679, 1060, 1378, 1619, 1808, 1217, 1586, 1459, 1826, 496, 1038, 1246]
+ },
+ {
+ "word": "te",
+ "duration": 0.11,
+ "codes": [241, 1455, 274, 1487, 63, 518, 1068, 22]
+ },
+ {
+ "word": "ki",
+ "duration": 0.17,
+ "codes": [924, 1717, 1070, 1736, 265, 523, 1572, 766, 1786, 1478, 1497, 1460, 813]
+ },
+ {
+ "word": "masu",
+ "duration": 0.43,
+ "codes": [
+ 747, 1311, 1430, 1079, 1331, 1219, 698, 1131, 1029, 1772, 1100, 353, 1264, 1410, 1745, 1113, 1150, 941,
+ 1505, 1266, 1314, 416, 1634, 1298, 1609, 1669, 1331, 1578, 1713, 1766, 1557, 1739
+ ]
+ },
+ {
+ "word": "na",
+ "duration": 0.48,
+ "codes": [
+ 1763, 258, 1289, 762, 419, 1112, 1168, 209, 1148, 1205, 54, 1328, 1699, 458, 1276, 995, 905, 1367, 54,
+ 1227, 724, 235, 603, 534, 31, 933, 551, 68, 747, 1141, 1736, 1447, 665, 913, 671, 1174
+ ]
+ },
+ {
+ "word": "no",
+ "duration": 0.17,
+ "codes": [1333, 1226, 1476, 1425, 1493, 114, 66, 1393, 1766, 813, 111, 1347, 1466]
+ },
+ {
+ "word": "de",
+ "duration": 0.24,
+ "codes": [1330, 327, 729, 1382, 1000, 984, 1403, 174, 469, 1750, 35, 1746, 882, 1159, 900, 6, 9, 531]
+ },
+ {
+ "word": "quanran",
+ "duration": 1.03,
+ "codes": [
+ 1027, 870, 1159, 1032, 780, 1282, 376, 511, 1017, 870, 1116, 1097, 844, 1196, 376, 1344, 1058, 529,
+ 1471, 1280, 1651, 1148, 1774, 1319, 1285, 1744, 20, 1504, 1729, 497, 982, 546, 178, 977, 1133, 440,
+ 1320, 1017, 1112, 1082, 1303, 1022, 489, 908, 1057, 724, 1317, 1826, 792, 1389, 1784, 1052, 1758, 1236,
+ 1464, 1608, 1569, 1487, 1416, 1191, 1671, 1116, 400, 934, 200, 1206, 1051, 689, 764, 910, 1305, 927,
+ 998, 447, 1332, 970, 1429
+ ]
+ },
+ {
+ "word": "jizhong",
+ "duration": 0.97,
+ "codes": [
+ 266, 560, 1736, 149, 1204, 1646, 1172, 1298, 1806, 1127, 1457, 1106, 661, 1107, 1281, 649, 1149, 1275,
+ 387, 1792, 1439, 1561, 1496, 546, 1524, 1638, 546, 1496, 1364, 328, 938, 654, 1179, 1111, 1682, 1329,
+ 202, 839, 353, 454, 822, 937, 40, 1364, 454, 1136, 754, 362, 1428, 1701, 702, 405, 1525, 445, 1154,
+ 1608, 1654, 865, 415, 888, 1600, 1102, 1277, 202, 756, 894, 1622, 440, 844, 737, 655, 1061, 1166
+ ]
+ },
+ {
+ "word": "ga",
+ "duration": 0.23,
+ "codes": [974, 655, 822, 931, 80, 1433, 859, 316, 1466, 1671, 547, 303, 972, 973, 497, 1258, 847]
+ },
+ {
+ "word": "deki",
+ "duration": 0.49,
+ "codes": [
+ 1200, 1256, 524, 825, 83, 705, 1122, 1216, 347, 179, 260, 1187, 999, 461, 1109, 1462, 761, 729, 1801,
+ 1092, 1470, 700, 718, 1381, 440, 1448, 905, 1746, 119, 1065, 1176, 1145, 1587, 1819, 1087, 957, 1091
+ ]
+ },
+ {
+ "word": "mase",
+ "duration": 0.43,
+ "codes": [
+ 628, 1257, 284, 1351, 904, 460, 1468, 728, 1108, 1076, 741, 651, 339, 693, 1361, 370, 905, 971, 679,
+ 1613, 1500, 1606, 1634, 1687, 1750, 1202, 1265, 1203, 1139, 318, 1129, 816
+ ]
+ },
+ {
+ "word": "n",
+ "duration": 0.13,
+ "codes": [1169, 1397, 245, 1126, 1154, 642, 999, 961, 1742, 1542]
+ }
+ ],
+ "language": "ja"
+}
diff --git a/examples/outetts/speakers/ja_female_3.json b/examples/outetts/speakers/ja_female_3.json
new file mode 100644
index 000000000..1a3982dce
--- /dev/null
+++ b/examples/outetts/speakers/ja_female_3.json
@@ -0,0 +1,181 @@
+{
+ "text": "\u5168\u529b\u3092\u5c3d\u304f\u3057\u305f\u306e\u306a\u3089\u3001\u7d50\u679c\u306f\u3069\u3046\u3067\u3042\u308c\u80f8\u3092\u5f35\u3063\u3066\u3044\u304d\u307e\u3057\u3087\u3046\u3002\u305d\u3046\u3059\u308c\u3070\u304d\u3063\u3068\u6b21\u306b\u7e4b\u304c\u308b\u306f\u305a\u3067\u3059\u3002",
+ "words": [
+ {
+ "word": "quanli",
+ "duration": 0.4,
+ "codes": [
+ 1007, 673, 1788, 101, 1354, 1539, 1337, 1742, 137, 225, 1521, 359, 189, 699, 1464, 508, 1298, 1458,
+ 1546, 1335, 78, 1251, 526, 1350, 469, 1010, 295, 1174, 188, 1426
+ ]
+ },
+ {
+ "word": "o",
+ "duration": 0.27,
+ "codes": [
+ 254, 717, 1733, 59, 151, 1305, 954, 976, 1382, 1828, 1118, 367, 908, 923, 138, 1218, 548, 113, 1030, 674
+ ]
+ },
+ {
+ "word": "jinkushi",
+ "duration": 0.44,
+ "codes": [
+ 587, 364, 1433, 1748, 1635, 544, 976, 1644, 1451, 906, 22, 512, 882, 1212, 1432, 1246, 1142, 1721, 1754,
+ 927, 95, 676, 2, 1210, 1189, 1647, 927, 122, 319, 11, 62, 1535, 1760
+ ]
+ },
+ {
+ "word": "ta",
+ "duration": 0.08,
+ "codes": [686, 1805, 1237, 944, 215, 1434]
+ },
+ {
+ "word": "no",
+ "duration": 0.12,
+ "codes": [1203, 1127, 416, 1574, 1605, 1187, 1038, 927, 270]
+ },
+ {
+ "word": "nara",
+ "duration": 0.24,
+ "codes": [509, 1409, 343, 98, 1632, 107, 1361, 1820, 43, 1286, 1389, 1187, 1671, 380, 560, 1553, 147, 1821]
+ },
+ {
+ "word": "jieguo",
+ "duration": 0.57,
+ "codes": [
+ 402, 1785, 581, 200, 1769, 1278, 1612, 1293, 1343, 627, 1448, 799, 1071, 1752, 406, 1380, 1305, 1016,
+ 175, 1345, 934, 508, 1439, 108, 63, 518, 1019, 906, 1215, 555, 976, 1114, 1185, 1809, 673, 1385, 1669,
+ 1748, 1001, 1395, 210, 1130, 215
+ ]
+ },
+ {
+ "word": "ha",
+ "duration": 0.15,
+ "codes": [1199, 271, 1140, 1059, 475, 717, 364, 492, 1367, 215, 718]
+ },
+ {
+ "word": "dou",
+ "duration": 0.28,
+ "codes": [
+ 1448, 236, 660, 1511, 1717, 1783, 160, 1557, 759, 1124, 248, 1287, 756, 1150, 981, 215, 988, 1198, 1119,
+ 1358, 678
+ ]
+ },
+ {
+ "word": "de",
+ "duration": 0.16,
+ "codes": [1415, 1245, 1173, 1043, 1604, 1602, 969, 1195, 1735, 157, 1016, 388]
+ },
+ {
+ "word": "are",
+ "duration": 0.28,
+ "codes": [
+ 1433, 4, 466, 1695, 1313, 57, 1482, 1156, 1760, 581, 205, 1747, 560, 1284, 293, 1223, 1755, 277, 1066,
+ 794, 309
+ ]
+ },
+ {
+ "word": "xiong",
+ "duration": 0.33,
+ "codes": [
+ 1647, 1678, 795, 1163, 1448, 712, 1803, 1694, 1023, 1652, 1465, 1262, 1280, 251, 1549, 203, 954, 113,
+ 454, 36, 633, 362, 1672, 1335, 1607
+ ]
+ },
+ {
+ "word": "o",
+ "duration": 0.16,
+ "codes": [1247, 1172, 1342, 759, 1059, 95, 1002, 1149, 695, 637, 1487, 1042]
+ },
+ {
+ "word": "zhangtsu",
+ "duration": 0.27,
+ "codes": [
+ 1136, 1323, 679, 1556, 1623, 1031, 1415, 1195, 1110, 1280, 1423, 1059, 104, 1300, 1446, 696, 805, 555,
+ 1205, 1474
+ ]
+ },
+ {
+ "word": "te",
+ "duration": 0.05,
+ "codes": [1641, 931, 1330, 183]
+ },
+ {
+ "word": "iki",
+ "duration": 0.2,
+ "codes": [552, 1557, 422, 1346, 510, 983, 1608, 1390, 606, 1028, 1724, 335, 546, 422, 568]
+ },
+ {
+ "word": "mashou",
+ "duration": 0.32,
+ "codes": [
+ 319, 134, 1674, 229, 1412, 537, 1468, 1437, 1297, 1366, 198, 333, 527, 276, 1395, 113, 1325, 1258, 1423,
+ 1258, 1280, 1075, 1156, 1066
+ ]
+ },
+ {
+ "word": "sou",
+ "duration": 1.11,
+ "codes": [
+ 1202, 271, 41, 450, 1305, 1157, 1652, 1824, 17, 1279, 1478, 891, 1754, 1288, 1648, 1669, 1023, 1458,
+ 1419, 939, 1707, 1157, 1648, 1707, 1163, 1458, 274, 1288, 976, 1480, 824, 1322, 1231, 957, 1390, 1231,
+ 1652, 1007, 1262, 1632, 1080, 1749, 1707, 591, 1760, 1233, 1262, 1707, 659, 1652, 1652, 17, 1632, 899,
+ 933, 1489, 986, 1705, 1525, 710, 1707, 591, 799, 1632, 555, 1023, 1255, 1293, 1772, 1451, 1664, 1600,
+ 399, 485, 408, 1309, 106, 1120, 260, 918, 88, 548, 609
+ ]
+ },
+ {
+ "word": "sure",
+ "duration": 0.28,
+ "codes": [
+ 626, 1147, 826, 519, 1606, 1302, 1681, 1617, 1641, 1691, 1127, 860, 1264, 1223, 1574, 1367, 1105, 1135,
+ 1183, 1286, 1031
+ ]
+ },
+ {
+ "word": "ba",
+ "duration": 0.12,
+ "codes": [324, 1462, 240, 1381, 1271, 1177, 282, 764, 977]
+ },
+ {
+ "word": "kitto",
+ "duration": 0.41,
+ "codes": [
+ 1181, 270, 870, 1584, 505, 591, 691, 1495, 1568, 308, 714, 1785, 1552, 234, 615, 85, 858, 1353, 1562,
+ 1699, 287, 1305, 891, 513, 1639, 1728, 1353, 1400, 260, 1292, 83
+ ]
+ },
+ {
+ "word": "ci",
+ "duration": 0.4,
+ "codes": [
+ 1305, 221, 1707, 550, 1338, 756, 1530, 1145, 1705, 659, 459, 1023, 981, 1749, 1233, 636, 1363, 298,
+ 1487, 344, 152, 1341, 1615, 1721, 1582, 1221, 1740, 239, 1428, 243
+ ]
+ },
+ {
+ "word": "ni",
+ "duration": 0.15,
+ "codes": [1043, 1624, 1286, 612, 1493, 523, 1807, 47, 1601, 47, 918]
+ },
+ {
+ "word": "jigaru",
+ "duration": 0.52,
+ "codes": [
+ 293, 1767, 1578, 939, 1273, 1028, 1693, 1688, 10, 863, 18, 725, 1369, 91, 1580, 1712, 60, 1756, 324,
+ 992, 1764, 452, 521, 843, 1635, 906, 1575, 1669, 115, 42, 461, 9, 471, 730, 1302, 466, 1188, 1437, 637
+ ]
+ },
+ {
+ "word": "hazu",
+ "duration": 0.21,
+ "codes": [1723, 1160, 1047, 793, 697, 1391, 1746, 254, 1433, 1679, 1608, 634, 328, 93, 964, 65]
+ },
+ {
+ "word": "desu",
+ "duration": 0.24,
+ "codes": [1695, 1561, 408, 967, 479, 658, 1229, 1205, 474, 890, 176, 1718, 440, 701, 719, 782, 1724, 1313]
+ }
+ ],
+ "language": "ja"
+}
diff --git a/examples/outetts/speakers/ja_male_1.json b/examples/outetts/speakers/ja_male_1.json
new file mode 100644
index 000000000..ab9c2097a
--- /dev/null
+++ b/examples/outetts/speakers/ja_male_1.json
@@ -0,0 +1,101 @@
+{
+ "text": "\u3053\u306e\u8fba\u308a\u3067\u306f\u975e\u5e38\u306b\u51ac\u3001\u96ea\u304c\u591a\u3044\u305f\u3081\u306b",
+ "words": [
+ {
+ "word": "kono",
+ "duration": 1.84,
+ "codes": [
+ 866, 1242, 169, 820, 16, 1071, 636, 1499, 782, 1368, 701, 664, 1638, 1667, 256, 1284, 231, 1411, 62,
+ 535, 1342, 24, 715, 1818, 979, 1004, 945, 714, 1413, 1727, 98, 730, 787, 1200, 1356, 376, 475, 616,
+ 1396, 202, 1755, 1419, 1241, 1145, 714, 1490, 1683, 69, 700, 735, 243, 1749, 626, 67, 713, 83, 1751,
+ 927, 1499, 471, 1811, 1227, 1331, 58, 1264, 1638, 1433, 696, 1246, 1331, 1034, 1079, 1738, 1370, 700,
+ 1270, 1549, 0, 767, 895, 1057, 333, 1760, 1565, 1151, 406, 1016, 751, 1205, 1519, 1123, 1379, 1714,
+ 1079, 1329, 1185, 1303, 1370, 1504, 1088, 1735, 1391, 1175, 1003, 19, 1343, 169, 1465, 16, 735, 1215,
+ 1505, 1077, 1024, 951, 1824, 1422, 1509, 1053, 1805, 1266, 1262, 799, 1333, 1484, 1798, 1558, 1189, 123,
+ 1204, 120, 153, 484, 1664, 286, 1150, 754, 931
+ ]
+ },
+ {
+ "word": "bianri",
+ "duration": 0.35,
+ "codes": [
+ 282, 777, 970, 208, 179, 1032, 208, 966, 121, 1007, 691, 620, 1743, 1200, 467, 366, 708, 39, 138, 738,
+ 431, 1034, 809, 398, 966, 888
+ ]
+ },
+ {
+ "word": "de",
+ "duration": 0.09,
+ "codes": [845, 925, 1792, 1091, 809, 750, 806]
+ },
+ {
+ "word": "ha",
+ "duration": 0.16,
+ "codes": [1665, 961, 1404, 959, 1761, 840, 1755, 635, 639, 525, 1738, 147]
+ },
+ {
+ "word": "feichang",
+ "duration": 0.96,
+ "codes": [
+ 475, 1259, 147, 657, 1282, 370, 876, 1432, 1822, 442, 1064, 1465, 1254, 1040, 1504, 1153, 1459, 1421,
+ 1153, 1832, 1064, 1355, 513, 1309, 1255, 673, 1360, 343, 984, 1215, 1007, 1716, 1320, 1307, 328, 1402,
+ 1332, 1037, 1308, 1349, 1742, 1438, 1091, 1775, 1091, 1214, 1663, 1642, 1282, 1503, 895, 1245, 1800,
+ 454, 1635, 34, 833, 368, 844, 843, 295, 102, 912, 376, 47, 730, 130, 63, 110, 811, 1019, 2
+ ]
+ },
+ {
+ "word": "ni",
+ "duration": 0.16,
+ "codes": [1218, 1240, 1388, 1387, 638, 1403, 735, 1350, 202, 1443, 1554, 409]
+ },
+ {
+ "word": "dong",
+ "duration": 1.16,
+ "codes": [
+ 1175, 1427, 182, 1332, 1718, 640, 548, 1319, 862, 316, 1412, 924, 1223, 1091, 1079, 1735, 1215, 717,
+ 1230, 694, 624, 1273, 275, 1118, 1228, 1414, 1476, 997, 1642, 1247, 58, 1286, 1565, 1402, 1436, 1816,
+ 1477, 1024, 32, 1429, 928, 1158, 1752, 584, 1172, 820, 351, 1293, 625, 601, 1430, 641, 516, 1383, 490,
+ 924, 1570, 651, 1639, 572, 967, 1510, 603, 1418, 1293, 986, 438, 696, 710, 1338, 258, 1003, 1144, 712,
+ 1276, 1803, 1421, 1129, 1668, 1720, 1697, 1013, 1651, 1697, 679, 296, 1732
+ ]
+ },
+ {
+ "word": "xue",
+ "duration": 1.19,
+ "codes": [
+ 952, 1633, 6, 469, 535, 510, 1618, 787, 853, 1827, 409, 1485, 752, 41, 1750, 555, 1118, 1242, 630, 1614,
+ 659, 1262, 1601, 696, 1360, 899, 585, 1763, 487, 274, 686, 287, 1176, 824, 712, 1176, 1080, 957, 1338,
+ 680, 1284, 505, 566, 928, 568, 1067, 786, 490, 1818, 1462, 1127, 1146, 1330, 1496, 1818, 1666, 788,
+ 1425, 1671, 1375, 1181, 1496, 1639, 1118, 333, 692, 1573, 1035, 1725, 1270, 1372, 1789, 605, 191, 1680,
+ 1434, 1483, 1554, 1787, 78, 1244, 1821, 440, 947, 1777, 279, 1098, 1735, 1337
+ ]
+ },
+ {
+ "word": "ga",
+ "duration": 0.16,
+ "codes": [277, 465, 67, 1590, 281, 1568, 1700, 82, 860, 1066, 43, 660]
+ },
+ {
+ "word": "duoi",
+ "duration": 0.4,
+ "codes": [
+ 324, 1072, 1129, 864, 1365, 1779, 1347, 1705, 934, 1357, 1633, 613, 1555, 1015, 456, 1605, 1793, 203,
+ 488, 439, 1674, 1279, 775, 214, 1639, 493, 1388, 308, 1418, 319
+ ]
+ },
+ {
+ "word": "tame",
+ "duration": 0.32,
+ "codes": [
+ 1528, 1666, 1238, 1746, 1832, 1536, 442, 899, 1419, 1261, 1426, 1378, 1429, 1318, 1358, 1098, 1464,
+ 1792, 1614, 921, 1677, 1404, 1245, 1519
+ ]
+ },
+ {
+ "word": "ni",
+ "duration": 0.17,
+ "codes": [58, 759, 1639, 1025, 1672, 633, 1315, 325, 1471, 1717, 665, 1401, 1524]
+ }
+ ],
+ "language": "ja"
+}
diff --git a/examples/outetts/speakers/ko_female_1.json b/examples/outetts/speakers/ko_female_1.json
new file mode 100644
index 000000000..7918dc375
--- /dev/null
+++ b/examples/outetts/speakers/ko_female_1.json
@@ -0,0 +1,175 @@
+{
+ "text": " \uc544\ubb34\ub798\ub3c4 \uccab \ub370\uc774\ud2b8\ub77c \ub2e4\ub4e4 \uc798 \ubcf4\uc774\uace0 \uc2f6\uc740 \uac83 \uac19\uc740\ub370\uc694. \uc544\uc9c1 \uaf42\ud78c \uc0ac\ub78c \uc5c6\ub2e4\ub354\ub2c8. \uc774\ub807\uac8c \uc5f4\uc2ec\ud788\uc778 \uac78 \ubcf4\uba74 \uc18d\ub9c8\uc74c\uc740 \uc880 \ub2e4\ub978\uac00 \ubd10\uc694.",
+ "words": [
+ {
+ "word": "amuraedo",
+ "duration": 0.52,
+ "codes": [
+ 1707, 1788, 1072, 1484, 1325, 25, 475, 456, 474, 441, 1331, 1639, 1439, 214, 572, 996, 1063, 1223, 208,
+ 1492, 1070, 1117, 104, 1271, 1378, 170, 1154, 245, 695, 1469, 1798, 1321, 1322, 28, 1718, 467, 912,
+ 1340, 774
+ ]
+ },
+ {
+ "word": "ceos",
+ "duration": 0.36,
+ "codes": [
+ 272, 890, 443, 728, 682, 1732, 1271, 1596, 1658, 57, 133, 1581, 864, 1246, 1817, 1451, 1822, 178, 407,
+ 1524, 1423, 797, 1318, 1397, 1438, 1725, 1730
+ ]
+ },
+ {
+ "word": "deiteura",
+ "duration": 0.55,
+ "codes": [
+ 516, 1707, 1783, 513, 1687, 1822, 1324, 931, 412, 714, 1101, 1105, 849, 461, 1147, 1705, 510, 1421,
+ 1655, 459, 693, 1037, 957, 1572, 1829, 308, 1535, 1434, 1313, 705, 1517, 898, 335, 1595, 446, 531, 498,
+ 658, 29, 248, 613
+ ]
+ },
+ {
+ "word": "dadeul",
+ "duration": 0.52,
+ "codes": [
+ 946, 570, 560, 1324, 629, 697, 1136, 1463, 827, 1647, 1455, 1404, 1514, 1421, 1418, 1721, 1770, 719,
+ 1459, 1040, 595, 1542, 868, 1107, 998, 172, 1077, 317, 1819, 1319, 939, 608, 1161, 1347, 282, 1133, 79,
+ 1212, 770
+ ]
+ },
+ {
+ "word": "jal",
+ "duration": 0.24,
+ "codes": [749, 1616, 1624, 335, 1261, 719, 1177, 1483, 347, 1438, 901, 1187, 1030, 340, 1331, 635, 679, 340]
+ },
+ {
+ "word": "boigo",
+ "duration": 0.36,
+ "codes": [
+ 1189, 514, 802, 1352, 86, 691, 1233, 913, 754, 678, 1563, 182, 506, 770, 69, 990, 592, 775, 921, 648,
+ 712, 1525, 1829, 832, 1037, 1089, 732
+ ]
+ },
+ {
+ "word": "sipeun",
+ "duration": 0.28,
+ "codes": [
+ 787, 1710, 1463, 14, 384, 767, 512, 1421, 1510, 1808, 1380, 1568, 1667, 1721, 947, 1393, 1792, 664, 414,
+ 756, 805
+ ]
+ },
+ {
+ "word": "geos",
+ "duration": 0.12,
+ "codes": [1371, 1583, 1700, 1196, 690, 813, 1650, 702, 1479]
+ },
+ {
+ "word": "gateundeyo",
+ "duration": 0.55,
+ "codes": [
+ 1525, 1623, 1332, 1330, 800, 723, 354, 1731, 1628, 735, 1205, 1672, 1771, 1261, 1364, 1583, 1546, 1640,
+ 1048, 28, 1590, 530, 1828, 134, 1706, 865, 1150, 1015, 1698, 361, 1563, 1726, 1272, 26, 1278, 1150, 408,
+ 1771, 764, 1542, 825
+ ]
+ },
+ {
+ "word": "ajig",
+ "duration": 0.96,
+ "codes": [
+ 1107, 58, 1424, 1487, 1300, 1078, 1775, 109, 1671, 1780, 1727, 1099, 1251, 1118, 1669, 1595, 1699, 1176,
+ 729, 743, 970, 1037, 878, 655, 878, 1738, 1486, 347, 1801, 1397, 1329, 1736, 1059, 1495, 1433, 1130,
+ 1118, 1289, 1037, 1231, 1478, 1811, 1298, 1502, 847, 147, 248, 571, 595, 705, 809, 1677, 702, 1683,
+ 1711, 22, 1364, 6, 546, 1363, 1331, 1427, 1670, 1804, 1004, 527, 1671, 1267, 1585, 1792, 1832, 1724
+ ]
+ },
+ {
+ "word": "ggojhin",
+ "duration": 0.45,
+ "codes": [
+ 1704, 1623, 1474, 1595, 1478, 516, 1289, 1509, 883, 1415, 682, 716, 217, 1396, 555, 1717, 799, 1079,
+ 438, 923, 1007, 1832, 1166, 10, 844, 62, 178, 368, 1372, 1211, 1600, 1341, 184, 1558
+ ]
+ },
+ {
+ "word": "saram",
+ "duration": 0.32,
+ "codes": [
+ 1071, 1498, 1657, 1775, 906, 1718, 1215, 1005, 13, 497, 953, 570, 522, 810, 1775, 579, 1133, 498, 1060,
+ 901, 970, 1243, 1175, 932
+ ]
+ },
+ {
+ "word": "eobsdadeoni",
+ "duration": 0.75,
+ "codes": [
+ 644, 1742, 1683, 1330, 1486, 1222, 1816, 1689, 1507, 1717, 1669, 1621, 344, 1646, 1573, 1751, 1346,
+ 1385, 799, 1010, 823, 1366, 1042, 1136, 318, 1229, 326, 550, 1015, 1300, 1763, 91, 1752, 1802, 1682,
+ 1088, 818, 716, 1156, 503, 1804, 1330, 1013, 924, 1294, 363, 1771, 615, 1594, 1502, 1646, 1766, 949,
+ 1312, 1655, 1783
+ ]
+ },
+ {
+ "word": "ireohge",
+ "duration": 1.35,
+ "codes": [
+ 1584, 1545, 1768, 1811, 1267, 1640, 1782, 1488, 1388, 1762, 1828, 1520, 1396, 1593, 1605, 1284, 1339,
+ 1657, 1769, 1540, 1605, 1709, 450, 1712, 734, 1461, 1616, 1635, 1536, 1592, 1321, 325, 1771, 406, 1657,
+ 1163, 585, 1016, 17, 1390, 1693, 758, 1284, 1474, 862, 1176, 1346, 426, 1293, 1338, 1713, 1346, 1284,
+ 1176, 939, 1430, 758, 1176, 1543, 1440, 862, 785, 1248, 1721, 1348, 1408, 1672, 735, 1763, 1672, 624,
+ 1525, 555, 923, 1528, 1721, 519, 1368, 529, 407, 1177, 50, 1708, 137, 553, 1268, 186, 626, 1770, 1794,
+ 1301, 1735, 976, 1391, 1139, 1160, 1677, 479, 200, 51, 1093
+ ]
+ },
+ {
+ "word": "yeolsimhiin",
+ "duration": 0.44,
+ "codes": [
+ 49, 1103, 137, 1298, 61, 181, 117, 133, 181, 1819, 856, 50, 844, 507, 356, 1382, 166, 356, 1366, 1358,
+ 639, 1435, 1445, 1365, 1304, 202, 1270, 1056, 99, 844, 1427, 1575, 57
+ ]
+ },
+ {
+ "word": "geol",
+ "duration": 0.13,
+ "codes": [1299, 1516, 1399, 721, 346, 600, 374, 549, 1500, 583]
+ },
+ {
+ "word": "bomyeon",
+ "duration": 0.37,
+ "codes": [
+ 1765, 1626, 1724, 1737, 1656, 1149, 635, 617, 1502, 935, 385, 1580, 1556, 912, 780, 282, 775, 1015, 695,
+ 143, 110, 532, 1138, 330, 1372, 782, 1487, 703
+ ]
+ },
+ {
+ "word": "sogmaeumeun",
+ "duration": 0.64,
+ "codes": [
+ 1333, 1783, 1315, 1754, 1673, 1667, 176, 1535, 554, 1139, 1654, 1627, 1525, 959, 516, 965, 957, 910,
+ 1010, 1213, 1272, 447, 1221, 1260, 1213, 1324, 565, 1121, 521, 1174, 42, 768, 1149, 1230, 95, 1666,
+ 1831, 1550, 1452, 709, 1006, 329, 1640, 1485, 734, 1530, 333, 1604
+ ]
+ },
+ {
+ "word": "jom",
+ "duration": 0.13,
+ "codes": [50, 927, 1147, 1110, 647, 1625, 1740, 481, 1811, 1784]
+ },
+ {
+ "word": "dareunga",
+ "duration": 0.44,
+ "codes": [
+ 1644, 1163, 1408, 1116, 971, 872, 958, 1483, 996, 400, 1281, 973, 1127, 1610, 1532, 1234, 1206, 796,
+ 1491, 317, 1501, 481, 1594, 1528, 1610, 1583, 1073, 1281, 951, 1281, 1335, 1423, 1444
+ ]
+ },
+ {
+ "word": "bwayo",
+ "duration": 0.28,
+ "codes": [
+ 702, 1405, 1266, 1669, 1770, 1089, 887, 1443, 1314, 1468, 966, 1087, 1059, 1436, 966, 1332, 1335, 1353,
+ 1423, 1019, 1073
+ ]
+ }
+ ],
+ "language": "ko"
+}
diff --git a/examples/outetts/speakers/ko_female_2.json b/examples/outetts/speakers/ko_female_2.json
new file mode 100644
index 000000000..a03661db5
--- /dev/null
+++ b/examples/outetts/speakers/ko_female_2.json
@@ -0,0 +1,162 @@
+{
+ "text": " \uc81c\uac00 \uc9c0\ub09c 1\ub144\uac04\uc740 \uac70\uc758 \ub2e4\ub978 \ud5a5\uc218 \uc548 \uc4f0\uace0 \uc774\uac83\ub9cc \uc37c\uc5b4\uc694. \ub0a8\ud3b8\ud558\uace0 \uc791\ub144\uc5d0 \ub3c4\ucfc4 \uc5ec\ud589 \uac14\ub2e4\uac00 \uc2dc\ud5a5\uc744 \ud574\ubcf4\uace0 \ubc18\ud55c \uc81c\ud488\uc778\ub370",
+ "words": [
+ {
+ "word": "jega",
+ "duration": 0.2,
+ "codes": [1571, 1145, 1430, 1169, 721, 529, 1307, 117, 1770, 1095, 1342, 1182, 613, 657, 73]
+ },
+ {
+ "word": "jinan",
+ "duration": 0.64,
+ "codes": [
+ 777, 637, 1177, 1173, 604, 1202, 875, 1211, 250, 661, 42, 693, 1704, 1713, 1451, 1376, 1287, 767, 802,
+ 546, 1487, 1127, 502, 1317, 1554, 1065, 470, 823, 65, 553, 951, 857, 522, 260, 433, 467, 991, 860, 1105,
+ 880, 384, 880, 997, 461, 1194, 648, 1079, 1541
+ ]
+ },
+ {
+ "word": "one",
+ "duration": 0.43,
+ "codes": [
+ 1403, 1025, 945, 779, 111, 1730, 1298, 66, 1636, 470, 1452, 978, 480, 1833, 674, 1740, 1450, 826, 673,
+ 1108, 259, 1157, 1067, 1775, 234, 732, 117, 717, 848, 1165, 1630, 882
+ ]
+ },
+ {
+ "word": "nyeonganeun",
+ "duration": 0.49,
+ "codes": [
+ 1420, 1382, 661, 1043, 367, 700, 1064, 1132, 106, 1078, 1165, 1106, 1046, 1192, 1813, 1211, 1753, 987,
+ 894, 1230, 81, 248, 433, 373, 1291, 290, 665, 754, 923, 1638, 1062, 1452, 628, 203, 47, 1742, 1698
+ ]
+ },
+ {
+ "word": "geoyi",
+ "duration": 0.36,
+ "codes": [
+ 771, 150, 1802, 484, 1318, 1210, 751, 1759, 1791, 1775, 1509, 1827, 922, 389, 770, 953, 863, 977, 133,
+ 1518, 769, 80, 201, 781, 112, 34, 125
+ ]
+ },
+ {
+ "word": "dareun",
+ "duration": 0.43,
+ "codes": [
+ 4, 583, 189, 1207, 1540, 166, 604, 721, 1594, 1623, 674, 1820, 1226, 1089, 985, 1823, 1278, 1160, 996,
+ 1454, 186, 1260, 128, 322, 155, 955, 793, 208, 1052, 344, 1115, 281
+ ]
+ },
+ {
+ "word": "hyangsu",
+ "duration": 0.28,
+ "codes": [
+ 798, 964, 1372, 581, 699, 976, 579, 972, 1369, 978, 1255, 1344, 629, 1401, 1831, 1641, 440, 1711, 1786,
+ 1331, 777
+ ]
+ },
+ {
+ "word": "an",
+ "duration": 0.15,
+ "codes": [1210, 427, 1331, 1226, 1686, 1278, 998, 970, 838, 1767, 1384]
+ },
+ {
+ "word": "sseugo",
+ "duration": 0.36,
+ "codes": [
+ 1339, 1339, 1510, 1552, 1583, 1527, 1748, 1637, 1546, 1751, 1290, 34, 1664, 1794, 977, 473, 1361, 804,
+ 633, 1346, 1360, 1490, 819, 17, 321, 1534, 811
+ ]
+ },
+ {
+ "word": "igeosman",
+ "duration": 0.88,
+ "codes": [
+ 606, 700, 511, 317, 1560, 1060, 1625, 1406, 747, 729, 1425, 1433, 1619, 1239, 1294, 1545, 744, 1321,
+ 1595, 555, 1765, 1480, 751, 1740, 630, 805, 1248, 601, 1321, 1768, 1007, 1655, 710, 1023, 1525, 630,
+ 1242, 1100, 840, 1460, 1400, 198, 1237, 71, 1588, 1706, 590, 1720, 159, 1588, 613, 288, 1457, 803, 1312,
+ 1801, 1650, 617, 970, 412, 172, 294, 576, 1431, 1816, 1565
+ ]
+ },
+ {
+ "word": "sseosseoyo",
+ "duration": 0.4,
+ "codes": [
+ 1757, 1667, 1535, 1709, 1213, 864, 1066, 61, 1678, 1810, 1724, 1701, 1615, 1714, 1737, 1673, 1788, 1786,
+ 1783, 1731, 907, 411, 803, 1730, 726, 9, 970, 1168, 994, 905
+ ]
+ },
+ {
+ "word": "nampyeonhago",
+ "duration": 0.76,
+ "codes": [
+ 1162, 671, 516, 86, 861, 957, 758, 1114, 1750, 1157, 1321, 1144, 555, 1704, 1461, 591, 1623, 824, 1233,
+ 1385, 275, 584, 1045, 622, 254, 329, 294, 95, 430, 37, 794, 912, 1806, 1746, 1231, 872, 622, 707, 25,
+ 1305, 25, 1209, 802, 864, 662, 1100, 1203, 535, 115, 373, 1409, 1061, 1601, 1008, 130, 1308, 282
+ ]
+ },
+ {
+ "word": "jagnyeone",
+ "duration": 0.36,
+ "codes": [
+ 809, 1548, 18, 1337, 1622, 1041, 1119, 913, 1005, 1267, 1723, 1799, 709, 1824, 1032, 1336, 1755, 818,
+ 1460, 1036, 1333, 1146, 1207, 328, 299, 429, 1759
+ ]
+ },
+ {
+ "word": "dokyo",
+ "duration": 0.32,
+ "codes": [
+ 1707, 144, 1134, 1051, 1170, 1333, 437, 1492, 698, 935, 1798, 1299, 1248, 1628, 223, 1268, 1043, 1172,
+ 769, 1774, 1114, 422, 531, 1478
+ ]
+ },
+ {
+ "word": "yeohaeng",
+ "duration": 0.27,
+ "codes": [
+ 65, 316, 192, 403, 1588, 991, 1200, 7, 537, 390, 1103, 45, 723, 1833, 407, 752, 1310, 732, 1581, 1495
+ ]
+ },
+ {
+ "word": "gassdaga",
+ "duration": 0.44,
+ "codes": [
+ 1084, 1130, 955, 497, 1252, 902, 58, 1096, 1405, 1346, 58, 1144, 89, 578, 1137, 1437, 339, 291, 123,
+ 755, 907, 790, 1330, 776, 852, 1123, 1207, 1314, 958, 466, 588, 1736, 1101
+ ]
+ },
+ {
+ "word": "sihyangeul",
+ "duration": 0.68,
+ "codes": [
+ 1245, 847, 809, 203, 997, 721, 1344, 1040, 1040, 1135, 354, 1156, 1132, 1397, 347, 797, 770, 1809, 1313,
+ 1726, 231, 1238, 1175, 1018, 1059, 368, 1061, 1565, 749, 1127, 1431, 586, 577, 498, 1199, 309, 1172,
+ 147, 772, 816, 336, 1450, 477, 1158, 878, 498, 621, 932, 344, 1082, 115
+ ]
+ },
+ {
+ "word": "haebogo",
+ "duration": 0.21,
+ "codes": [353, 181, 1032, 1775, 1694, 1114, 1731, 1222, 494, 1540, 1725, 1062, 1584, 1245, 991, 109]
+ },
+ {
+ "word": "banhan",
+ "duration": 0.32,
+ "codes": [
+ 1740, 469, 1621, 1420, 1297, 1178, 1831, 1178, 1459, 1620, 9, 1197, 1208, 1814, 1294, 1660, 1232, 904,
+ 366, 699, 1798, 282, 771, 1791
+ ]
+ },
+ {
+ "word": "jepuminde",
+ "duration": 0.6,
+ "codes": [
+ 1645, 1655, 1518, 1743, 723, 592, 1173, 835, 1343, 693, 459, 1739, 378, 1701, 1226, 1359, 1511, 1032,
+ 1804, 1259, 698, 1249, 1697, 1530, 1678, 1140, 1590, 421, 1489, 909, 761, 1749, 1417, 1388, 1213, 663,
+ 8, 1065, 1187, 137, 723, 628, 1638, 958, 1086
+ ]
+ }
+ ],
+ "language": "ko"
+}
diff --git a/examples/outetts/speakers/ko_male_1.json b/examples/outetts/speakers/ko_male_1.json
new file mode 100644
index 000000000..30736ba95
--- /dev/null
+++ b/examples/outetts/speakers/ko_male_1.json
@@ -0,0 +1,137 @@
+{
+ "text": " \uba85\ub2e8\uc5d0 \uc788\ub294 \ud559\uc0dd\ub4e4\uc740 \uc2e4\uc81c\ub85c \uc9c0\ub2a5\uc774 \ub192\uc9c0 \uc54a\uc558\uace0 \ubb34\uc791\uc704\ub85c \ubf51\ud78c \ud559\uc0dd\ub4e4\uc774\uc5c8\uae30 \ub54c\ubb38\uc785\ub2c8\ub2e4. \uc0ac\uc2e4\uc744 \ubab0\ub790\ub358 \uad50\uc0ac\ub4e4\uc740",
+ "words": [
+ {
+ "word": "myeongdane",
+ "duration": 0.48,
+ "codes": [
+ 151, 1274, 1665, 1231, 205, 713, 1368, 1078, 1155, 1015, 1301, 297, 1007, 297, 1765, 927, 593, 1364,
+ 653, 1664, 1613, 1563, 910, 944, 847, 39, 152, 248, 321, 1027, 318, 1093, 146, 1745, 254, 1103
+ ]
+ },
+ {
+ "word": "issneun",
+ "duration": 0.35,
+ "codes": [
+ 69, 1001, 1744, 479, 1781, 536, 631, 1451, 1596, 1636, 503, 41, 1214, 1417, 1286, 1824, 1069, 1366,
+ 1690, 430, 1113, 611, 658, 761, 775, 1025
+ ]
+ },
+ {
+ "word": "hagsaengdeuleun",
+ "duration": 0.73,
+ "codes": [
+ 13, 299, 607, 1633, 1447, 1756, 872, 1743, 1037, 1589, 1538, 1230, 713, 1691, 980, 344, 1375, 1061, 485,
+ 1013, 1147, 979, 809, 822, 1308, 783, 28, 1435, 1089, 1024, 1526, 440, 98, 1093, 786, 1689, 28, 787,
+ 1175, 205, 1708, 349, 763, 1326, 1120, 595, 211, 1415, 579, 600, 917, 178, 663, 940, 776
+ ]
+ },
+ {
+ "word": "siljero",
+ "duration": 0.92,
+ "codes": [
+ 678, 59, 1345, 60, 1756, 776, 744, 501, 762, 606, 766, 227, 1011, 1157, 1080, 1669, 487, 762, 1479, 227,
+ 1305, 1248, 538, 1327, 673, 696, 544, 241, 1302, 1348, 1667, 919, 1707, 962, 1139, 1797, 596, 1677, 767,
+ 434, 1525, 1178, 644, 1488, 305, 191, 1761, 1241, 735, 785, 423, 538, 1681, 943, 1250, 1061, 1088, 532,
+ 1638, 282, 575, 1342, 1002, 935, 1344, 1280, 303, 108, 1286
+ ]
+ },
+ {
+ "word": "jineungi",
+ "duration": 0.55,
+ "codes": [
+ 120, 143, 1700, 770, 1584, 1667, 423, 1510, 1652, 231, 581, 1583, 596, 1053, 1459, 769, 1225, 1825, 595,
+ 877, 750, 779, 1802, 1726, 1336, 178, 614, 1651, 549, 783, 1450, 882, 607, 1808, 1687, 1015, 940, 1470,
+ 543, 853, 195
+ ]
+ },
+ {
+ "word": "nopji",
+ "duration": 0.4,
+ "codes": [
+ 1731, 856, 1654, 559, 1538, 1796, 1069, 1825, 876, 1463, 32, 443, 1408, 1218, 1764, 287, 538, 1760,
+ 1359, 566, 1631, 1313, 1035, 543, 788, 24, 1317, 1620, 263, 1312
+ ]
+ },
+ {
+ "word": "anhassgo",
+ "duration": 0.41,
+ "codes": [
+ 222, 282, 958, 816, 800, 221, 361, 573, 1509, 864, 578, 374, 958, 1541, 467, 1110, 1063, 1013, 1410,
+ 1010, 151, 676, 256, 559, 1293, 1831, 1454, 1401, 319, 225, 217
+ ]
+ },
+ {
+ "word": "mujagwiro",
+ "duration": 1.41,
+ "codes": [
+ 326, 1789, 1347, 1554, 1255, 861, 1246, 334, 624, 1595, 445, 1080, 1273, 458, 1319, 567, 241, 975, 538,
+ 496, 417, 94, 325, 538, 151, 765, 285, 1362, 673, 401, 975, 445, 1242, 275, 391, 862, 445, 975, 633,
+ 551, 325, 487, 766, 933, 673, 1652, 824, 567, 1623, 555, 799, 1428, 603, 899, 799, 566, 1151, 287, 420,
+ 401, 1352, 686, 567, 458, 343, 1672, 1524, 1272, 1683, 1346, 836, 1830, 1346, 376, 1078, 1805, 1674,
+ 817, 938, 1, 471, 536, 1280, 1171, 1344, 254, 367, 102, 116, 1161, 745, 1781, 363, 1782, 999, 1330, 232,
+ 318, 536, 1366, 1493, 583, 1394, 83, 946, 890
+ ]
+ },
+ {
+ "word": "bbobhin",
+ "duration": 0.45,
+ "codes": [
+ 1674, 1684, 401, 316, 986, 379, 1474, 1401, 1453, 217, 1014, 557, 1217, 1218, 1249, 727, 199, 171, 795,
+ 31, 325, 1236, 1556, 854, 1726, 1827, 128, 1637, 455, 769, 831, 882, 177, 1221
+ ]
+ },
+ {
+ "word": "hagsaengdeulieossgi",
+ "duration": 1.03,
+ "codes": [
+ 355, 1416, 1701, 709, 1636, 1639, 1239, 1448, 1309, 1752, 1135, 1794, 1238, 1160, 1786, 1031, 1796, 676,
+ 878, 1737, 849, 10, 1349, 817, 911, 1226, 684, 206, 970, 682, 308, 994, 1081, 962, 1833, 506, 1022,
+ 1302, 1467, 968, 1710, 903, 364, 268, 1004, 677, 1802, 232, 443, 949, 796, 1217, 575, 1403, 248, 1811,
+ 657, 1686, 992, 1753, 1121, 285, 1176, 1246, 1793, 561, 31, 325, 119, 265, 796, 1680, 750, 55, 645, 983,
+ 527
+ ]
+ },
+ {
+ "word": "ddaemunibnida",
+ "duration": 0.64,
+ "codes": [
+ 203, 581, 681, 1798, 766, 223, 1262, 337, 337, 1528, 1416, 657, 549, 1399, 741, 581, 1056, 364, 617,
+ 1696, 1379, 961, 425, 1434, 1410, 617, 1748, 989, 326, 1370, 1212, 593, 1225, 898, 1590, 1422, 514, 154,
+ 711, 1488, 1644, 1433, 1136, 339, 1001, 1245, 1103, 1668
+ ]
+ },
+ {
+ "word": "sasileul",
+ "duration": 1.91,
+ "codes": [
+ 1203, 814, 1421, 1203, 355, 1397, 1193, 91, 1481, 484, 585, 899, 391, 345, 257, 241, 20, 187, 20, 692,
+ 224, 16, 187, 851, 360, 224, 544, 187, 23, 360, 209, 16, 187, 23, 509, 94, 311, 94, 194, 417, 187, 311,
+ 285, 23, 417, 94, 311, 360, 72, 417, 187, 311, 285, 194, 417, 187, 311, 285, 72, 417, 187, 311, 360, 23,
+ 544, 187, 23, 285, 224, 417, 187, 311, 285, 224, 417, 94, 851, 445, 194, 16, 204, 851, 287, 209, 694,
+ 285, 16, 765, 209, 1151, 285, 311, 257, 224, 281, 445, 3, 1244, 288, 820, 1293, 896, 231, 895, 527, 714,
+ 1088, 782, 1238, 719, 1330, 756, 164, 291, 425, 346, 841, 611, 254, 841, 429, 276, 1088, 1587, 340,
+ 1519, 398, 1139, 248, 1598, 375, 1693, 593, 1241, 346, 443, 482, 384, 154, 768, 783, 37, 546
+ ]
+ },
+ {
+ "word": "molrassdeon",
+ "duration": 0.65,
+ "codes": [
+ 1663, 272, 1482, 1513, 720, 1069, 368, 870, 757, 214, 856, 556, 1498, 539, 1221, 253, 898, 617, 631,
+ 1457, 472, 57, 206, 424, 462, 768, 382, 1506, 1419, 866, 601, 680, 566, 401, 1510, 1691, 364, 994, 217,
+ 790, 583, 182, 579, 770, 1394, 1766, 564, 1374, 1820
+ ]
+ },
+ {
+ "word": "gyosadeuleun",
+ "duration": 0.67,
+ "codes": [
+ 444, 1439, 1375, 482, 1828, 1592, 1411, 605, 1828, 1423, 653, 1807, 1690, 557, 1625, 1286, 116, 1607,
+ 1253, 1607, 654, 990, 884, 1247, 1506, 1073, 248, 302, 656, 88, 1372, 639, 591, 1538, 1354, 374, 1147,
+ 383, 716, 1781, 609, 456, 927, 664, 1285, 1345, 1301, 1674, 1114, 1780
+ ]
+ }
+ ],
+ "language": "ko"
+}
diff --git a/examples/outetts/speakers/ko_male_2.json b/examples/outetts/speakers/ko_male_2.json
new file mode 100644
index 000000000..45a2d86fc
--- /dev/null
+++ b/examples/outetts/speakers/ko_male_2.json
@@ -0,0 +1,128 @@
+{
+ "text": " \uba70\uce60 \ud6c4, \ud654\uac00\ub09c \ubd80\uc790\uc5d0\uac8c \uadf8\ub9bc\uc744 \ubcf4\uc5ec\uc8fc\uc5c8\ub2e4. \uae30\ub2e4\ub9ac\ub358 \uadf8\ub9bc\uc744 \ubc1b\uc740 \ubd80\uc790\ub294 \ub108\ubb34\ub098 \uae30\ubed0\ud558\uba70 \uadf8\ub9bc\uc744 \ubcf4\uc558\ub2e4.",
+ "words": [
+ {
+ "word": "myeocil",
+ "duration": 0.32,
+ "codes": [
+ 561, 27, 1809, 1516, 268, 479, 181, 657, 407, 711, 642, 18, 1449, 1609, 18, 790, 135, 206, 780, 1389,
+ 628, 543, 1616, 506
+ ]
+ },
+ {
+ "word": "hu",
+ "duration": 0.08,
+ "codes": [1351, 595, 211, 1625, 532, 29]
+ },
+ {
+ "word": "hwaganan",
+ "duration": 1.07,
+ "codes": [
+ 1311, 1388, 682, 1435, 1323, 1373, 1063, 1594, 1746, 1684, 1765, 1716, 548, 301, 1121, 735, 1348, 710,
+ 601, 1719, 457, 465, 636, 1555, 691, 986, 765, 1536, 227, 824, 1244, 169, 459, 704, 548, 1214, 136,
+ 1181, 707, 396, 929, 1730, 1648, 1276, 1023, 676, 627, 1550, 1774, 1500, 1688, 1468, 961, 705, 271, 339,
+ 587, 565, 112, 320, 1180, 1650, 1608, 1267, 547, 452, 386, 407, 1823, 63, 282, 472, 245, 380, 876, 1590,
+ 1345, 1048, 184, 1263
+ ]
+ },
+ {
+ "word": "bujaege",
+ "duration": 0.57,
+ "codes": [
+ 1575, 787, 1770, 1084, 1267, 1826, 1092, 193, 765, 1715, 985, 430, 1382, 1493, 1272, 184, 268, 182, 455,
+ 1712, 541, 399, 45, 678, 864, 430, 660, 276, 1073, 466, 263, 1136, 759, 178, 1581, 1617, 1711, 930, 407,
+ 768, 557, 190, 45
+ ]
+ },
+ {
+ "word": "geurimeul",
+ "duration": 0.48,
+ "codes": [
+ 600, 79, 45, 599, 326, 1238, 895, 936, 1703, 624, 516, 736, 1492, 948, 790, 1389, 637, 1596, 245, 882,
+ 515, 198, 143, 395, 479, 262, 1663, 742, 1026, 1591, 218, 600, 289, 49, 621, 112
+ ]
+ },
+ {
+ "word": "boyeojueossda",
+ "duration": 0.59,
+ "codes": [
+ 124, 1812, 1285, 1681, 1649, 1804, 1492, 952, 48, 47, 683, 143, 616, 886, 546, 303, 1618, 1734, 1217,
+ 358, 882, 1439, 621, 1674, 1113, 1081, 864, 1770, 234, 1160, 1622, 766, 1654, 1422, 1711, 275, 287,
+ 1781, 547, 1498, 76, 1143, 1200, 1330
+ ]
+ },
+ {
+ "word": "gidarideon",
+ "duration": 1.96,
+ "codes": [
+ 1117, 1629, 1829, 1197, 1224, 1276, 1506, 1721, 1038, 223, 1108, 670, 727, 1142, 762, 1246, 1273, 458,
+ 694, 696, 275, 686, 287, 417, 465, 275, 704, 696, 606, 325, 538, 401, 766, 54, 1242, 487, 487, 862, 391,
+ 257, 795, 275, 1151, 287, 401, 325, 275, 1242, 712, 360, 1016, 54, 1151, 257, 445, 1570, 559, 401, 1016,
+ 534, 686, 585, 606, 325, 54, 704, 387, 360, 325, 487, 606, 1016, 534, 704, 567, 765, 862, 445, 975, 274,
+ 534, 686, 824, 538, 1244, 54, 1390, 585, 487, 1623, 567, 585, 1016, 566, 325, 766, 287, 1390, 54, 274,
+ 1244, 538, 1242, 986, 680, 862, 566, 1293, 995, 1288, 784, 1375, 694, 1474, 1278, 581, 1027, 592, 934,
+ 107, 780, 1728, 1692, 1588, 135, 116, 152, 153, 945, 137, 1544, 152, 189, 1420, 736, 1669, 1554, 1407,
+ 756, 135, 614, 536, 46, 1497, 748, 319, 807
+ ]
+ },
+ {
+ "word": "geurimeul",
+ "duration": 0.39,
+ "codes": [
+ 1350, 1351, 1620, 820, 1783, 1481, 604, 1278, 979, 1645, 157, 529, 429, 213, 1518, 1733, 263, 181, 280,
+ 500, 911, 1778, 1335, 211, 433, 315, 357, 983, 536
+ ]
+ },
+ {
+ "word": "badeun",
+ "duration": 0.25,
+ "codes": [
+ 1567, 1756, 1382, 791, 341, 682, 107, 536, 1334, 1522, 1633, 521, 200, 2, 1599, 1493, 1004, 1612, 1368
+ ]
+ },
+ {
+ "word": "bujaneun",
+ "duration": 0.52,
+ "codes": [
+ 1542, 1594, 197, 949, 1444, 1293, 1273, 1721, 1247, 782, 1395, 1611, 1252, 1537, 1341, 268, 1753, 1018,
+ 836, 654, 10, 1287, 282, 456, 1519, 300, 853, 1439, 1251, 844, 291, 1287, 430, 374, 1336, 1830, 1751,
+ 917, 1750
+ ]
+ },
+ {
+ "word": "neomuna",
+ "duration": 0.92,
+ "codes": [
+ 521, 1023, 1257, 239, 1819, 1464, 1481, 1454, 1406, 1479, 250, 313, 758, 1486, 1436, 572, 259, 669, 832,
+ 301, 869, 163, 1028, 717, 523, 1168, 470, 1305, 1305, 470, 1168, 717, 967, 1164, 396, 1367, 1343, 681,
+ 1282, 584, 1123, 1466, 1465, 1440, 766, 1408, 1782, 920, 125, 200, 609, 934, 1004, 1077, 123, 945, 1520,
+ 1503, 127, 98, 303, 1340, 1732, 52, 95, 429, 100, 1281, 100
+ ]
+ },
+ {
+ "word": "gibbeohamyeo",
+ "duration": 0.52,
+ "codes": [
+ 903, 29, 344, 1683, 1011, 1500, 328, 340, 473, 104, 557, 104, 1549, 1564, 1114, 1142, 936, 1578, 1810,
+ 775, 940, 664, 268, 13, 112, 289, 65, 488, 29, 1416, 815, 1290, 1485, 455, 51, 488, 79, 687, 1161
+ ]
+ },
+ {
+ "word": "geurimeul",
+ "duration": 0.44,
+ "codes": [
+ 794, 569, 214, 297, 716, 1273, 1428, 603, 1799, 1075, 1814, 650, 1492, 1497, 110, 143, 324, 350, 1620,
+ 384, 1217, 903, 863, 1729, 515, 803, 1492, 1690, 725, 153, 575, 1646, 1696
+ ]
+ },
+ {
+ "word": "boassda",
+ "duration": 0.32,
+ "codes": [
+ 758, 1642, 1468, 1437, 53, 1369, 95, 1397, 753, 560, 1355, 1708, 1639, 1262, 603, 1289, 68, 975, 1300,
+ 1073, 179, 1126, 1252, 1206
+ ]
+ }
+ ],
+ "language": "ko"
+}
diff --git a/examples/outetts/speakers/picard.json b/examples/outetts/speakers/picard.json
new file mode 100644
index 000000000..2ba54196c
--- /dev/null
+++ b/examples/outetts/speakers/picard.json
@@ -0,0 +1,1329 @@
+{
+ "text": "also believes that the surviving officers are deliberately withholding vital information from this inquiry. Further investigation is recommended. Will, there was no further investigation. This report was classified and then it was quietly buried.",
+ "words": [
+ {
+ "word": "also",
+ "duration": 0.43,
+ "codes": [
+ 1746,
+ 1117,
+ 1408,
+ 1434,
+ 1397,
+ 1010,
+ 1671,
+ 1648,
+ 1210,
+ 1794,
+ 1347,
+ 922,
+ 203,
+ 1563,
+ 1517,
+ 225,
+ 949,
+ 1060,
+ 994,
+ 700,
+ 1793,
+ 1345,
+ 596,
+ 599,
+ 100,
+ 522,
+ 556,
+ 366,
+ 1777,
+ 1295,
+ 829,
+ 1002
+ ]
+ },
+ {
+ "word": "believes",
+ "duration": 0.68,
+ "codes": [
+ 341,
+ 1640,
+ 1147,
+ 1000,
+ 1755,
+ 681,
+ 936,
+ 1767,
+ 1719,
+ 91,
+ 642,
+ 1407,
+ 1368,
+ 1718,
+ 653,
+ 1790,
+ 1130,
+ 987,
+ 1415,
+ 1794,
+ 1087,
+ 290,
+ 1762,
+ 1178,
+ 1082,
+ 1138,
+ 454,
+ 1313,
+ 1630,
+ 471,
+ 1289,
+ 1447,
+ 1139,
+ 729,
+ 1253,
+ 878,
+ 1312,
+ 1308,
+ 1393,
+ 1049,
+ 1497,
+ 1247,
+ 733,
+ 1677,
+ 1511,
+ 1353,
+ 932,
+ 756,
+ 1178,
+ 1651,
+ 69
+ ]
+ },
+ {
+ "word": "that",
+ "duration": 0.21,
+ "codes": [
+ 292,
+ 1287,
+ 1439,
+ 438,
+ 1665,
+ 1631,
+ 1622,
+ 1001,
+ 798,
+ 155,
+ 1607,
+ 1275,
+ 239,
+ 1071,
+ 1405,
+ 1806
+ ]
+ },
+ {
+ "word": "the",
+ "duration": 0.19,
+ "codes": [
+ 555,
+ 1556,
+ 1023,
+ 861,
+ 1699,
+ 1821,
+ 1146,
+ 368,
+ 1738,
+ 609,
+ 1081,
+ 931,
+ 533,
+ 1100
+ ]
+ },
+ {
+ "word": "surviving",
+ "duration": 0.72,
+ "codes": [
+ 596,
+ 1745,
+ 1156,
+ 863,
+ 883,
+ 1700,
+ 1410,
+ 943,
+ 135,
+ 522,
+ 46,
+ 245,
+ 592,
+ 232,
+ 1256,
+ 1505,
+ 1042,
+ 1457,
+ 1454,
+ 1704,
+ 1500,
+ 1806,
+ 1088,
+ 28,
+ 1439,
+ 1001,
+ 1192,
+ 590,
+ 367,
+ 1042,
+ 1123,
+ 260,
+ 571,
+ 1051,
+ 475,
+ 324,
+ 387,
+ 93,
+ 687,
+ 470,
+ 1356,
+ 1681,
+ 627,
+ 1370,
+ 1716,
+ 1263,
+ 736,
+ 684,
+ 577,
+ 1494,
+ 1411,
+ 971,
+ 1454,
+ 1422
+ ]
+ },
+ {
+ "word": "officers",
+ "duration": 0.61,
+ "codes": [
+ 464,
+ 1397,
+ 1412,
+ 1279,
+ 1407,
+ 115,
+ 880,
+ 88,
+ 907,
+ 1045,
+ 525,
+ 1821,
+ 1063,
+ 1801,
+ 0,
+ 1788,
+ 985,
+ 1422,
+ 1754,
+ 1126,
+ 1304,
+ 1607,
+ 375,
+ 1241,
+ 69,
+ 206,
+ 202,
+ 245,
+ 430,
+ 290,
+ 570,
+ 1815,
+ 1263,
+ 635,
+ 687,
+ 1444,
+ 1429,
+ 1004,
+ 1499,
+ 1281,
+ 1342,
+ 1060,
+ 1532,
+ 1180,
+ 1450,
+ 357
+ ]
+ },
+ {
+ "word": "are",
+ "duration": 0.35,
+ "codes": [
+ 549,
+ 452,
+ 1402,
+ 1426,
+ 1423,
+ 891,
+ 1397,
+ 1380,
+ 1735,
+ 19,
+ 1262,
+ 1256,
+ 1126,
+ 1829,
+ 1331,
+ 1066,
+ 777,
+ 147,
+ 303,
+ 1420,
+ 1218,
+ 138,
+ 1286,
+ 737,
+ 1511,
+ 1076
+ ]
+ },
+ {
+ "word": "deliberately",
+ "duration": 0.68,
+ "codes": [
+ 1489,
+ 1404,
+ 1062,
+ 1714,
+ 1788,
+ 1400,
+ 364,
+ 1392,
+ 79,
+ 1468,
+ 1274,
+ 1633,
+ 1630,
+ 803,
+ 920,
+ 663,
+ 1457,
+ 437,
+ 1710,
+ 1726,
+ 875,
+ 179,
+ 1468,
+ 898,
+ 1542,
+ 548,
+ 1144,
+ 1677,
+ 1440,
+ 1671,
+ 1820,
+ 959,
+ 1132,
+ 1789,
+ 600,
+ 755,
+ 1618,
+ 839,
+ 1306,
+ 1489,
+ 1010,
+ 1700,
+ 329,
+ 886,
+ 1646,
+ 1364,
+ 1767,
+ 1297,
+ 388,
+ 703,
+ 388
+ ]
+ },
+ {
+ "word": "withholding",
+ "duration": 0.63,
+ "codes": [
+ 1607,
+ 594,
+ 1541,
+ 1809,
+ 31,
+ 1726,
+ 909,
+ 1077,
+ 901,
+ 894,
+ 1781,
+ 1826,
+ 1489,
+ 901,
+ 1805,
+ 1556,
+ 1203,
+ 1823,
+ 1677,
+ 1308,
+ 613,
+ 1280,
+ 1184,
+ 777,
+ 1398,
+ 894,
+ 893,
+ 1623,
+ 1115,
+ 1368,
+ 1817,
+ 994,
+ 1344,
+ 988,
+ 1803,
+ 1273,
+ 1032,
+ 346,
+ 1138,
+ 836,
+ 1287,
+ 1062,
+ 1462,
+ 1500,
+ 792,
+ 1743,
+ 1658
+ ]
+ },
+ {
+ "word": "vital",
+ "duration": 0.41,
+ "codes": [
+ 86,
+ 1462,
+ 1809,
+ 1823,
+ 1002,
+ 1781,
+ 1436,
+ 1485,
+ 1258,
+ 569,
+ 1158,
+ 492,
+ 642,
+ 1186,
+ 1167,
+ 147,
+ 1565,
+ 922,
+ 549,
+ 1803,
+ 984,
+ 1714,
+ 1248,
+ 1333,
+ 400,
+ 1139,
+ 1287,
+ 1511,
+ 525,
+ 1593,
+ 1636
+ ]
+ },
+ {
+ "word": "information",
+ "duration": 0.6,
+ "codes": [
+ 771,
+ 1758,
+ 1724,
+ 1037,
+ 599,
+ 1263,
+ 218,
+ 616,
+ 1824,
+ 1432,
+ 1179,
+ 1417,
+ 719,
+ 1760,
+ 493,
+ 22,
+ 1325,
+ 462,
+ 1356,
+ 1325,
+ 425,
+ 1396,
+ 1259,
+ 472,
+ 1462,
+ 382,
+ 291,
+ 907,
+ 796,
+ 1193,
+ 452,
+ 1368,
+ 1347,
+ 989,
+ 58,
+ 347,
+ 595,
+ 1253,
+ 463,
+ 400,
+ 1797,
+ 1320,
+ 792,
+ 1280,
+ 1743
+ ]
+ },
+ {
+ "word": "from",
+ "duration": 0.23,
+ "codes": [
+ 804,
+ 1716,
+ 1160,
+ 1800,
+ 985,
+ 117,
+ 1313,
+ 1588,
+ 294,
+ 1356,
+ 801,
+ 352,
+ 1483,
+ 1032,
+ 798,
+ 1028,
+ 1381
+ ]
+ },
+ {
+ "word": "this",
+ "duration": 0.16,
+ "codes": [
+ 926,
+ 1822,
+ 484,
+ 1651,
+ 1353,
+ 437,
+ 940,
+ 1407,
+ 1711,
+ 335,
+ 1088,
+ 901
+ ]
+ },
+ {
+ "word": "inquiry",
+ "duration": 0.45,
+ "codes": [
+ 1736,
+ 664,
+ 640,
+ 248,
+ 1,
+ 974,
+ 1638,
+ 998,
+ 891,
+ 1511,
+ 970,
+ 1492,
+ 1658,
+ 923,
+ 827,
+ 1079,
+ 1720,
+ 1682,
+ 1291,
+ 941,
+ 79,
+ 1312,
+ 1395,
+ 583,
+ 951,
+ 870,
+ 894,
+ 1126,
+ 117,
+ 910,
+ 1459,
+ 400,
+ 1076,
+ 324
+ ]
+ },
+ {
+ "word": "further",
+ "duration": 0.87,
+ "codes": [
+ 1731,
+ 58,
+ 1002,
+ 1071,
+ 1434,
+ 1671,
+ 1380,
+ 1177,
+ 1504,
+ 1683,
+ 1153,
+ 1092,
+ 1797,
+ 1714,
+ 805,
+ 1496,
+ 1321,
+ 1658,
+ 1632,
+ 1092,
+ 1652,
+ 1419,
+ 681,
+ 1621,
+ 1425,
+ 805,
+ 1665,
+ 792,
+ 1665,
+ 1809,
+ 406,
+ 1329,
+ 861,
+ 1822,
+ 1735,
+ 32,
+ 1632,
+ 805,
+ 681,
+ 1428,
+ 32,
+ 1777,
+ 1370,
+ 1737,
+ 927,
+ 1342,
+ 1718,
+ 1455,
+ 179,
+ 280,
+ 1235,
+ 1161,
+ 88,
+ 234,
+ 456,
+ 532,
+ 1411,
+ 1481,
+ 1705,
+ 1431,
+ 818,
+ 604,
+ 1791,
+ 225,
+ 1426
+ ]
+ },
+ {
+ "word": "investigation",
+ "duration": 0.73,
+ "codes": [
+ 1705,
+ 1089,
+ 407,
+ 55,
+ 1506,
+ 1130,
+ 1010,
+ 1477,
+ 1805,
+ 987,
+ 841,
+ 324,
+ 39,
+ 326,
+ 1313,
+ 1448,
+ 1677,
+ 135,
+ 372,
+ 67,
+ 1476,
+ 862,
+ 1370,
+ 1636,
+ 855,
+ 208,
+ 319,
+ 1509,
+ 823,
+ 638,
+ 788,
+ 972,
+ 299,
+ 512,
+ 664,
+ 138,
+ 959,
+ 773,
+ 1720,
+ 1186,
+ 1345,
+ 1285,
+ 1074,
+ 715,
+ 1651,
+ 604,
+ 886,
+ 985,
+ 318,
+ 1830,
+ 1820,
+ 1795,
+ 1357,
+ 1638,
+ 1806
+ ]
+ },
+ {
+ "word": "is",
+ "duration": 0.23,
+ "codes": [
+ 1598,
+ 1665,
+ 792,
+ 1683,
+ 1466,
+ 1062,
+ 1171,
+ 495,
+ 1063,
+ 430,
+ 1477,
+ 1423,
+ 1677,
+ 1801,
+ 430,
+ 440,
+ 1287
+ ]
+ },
+ {
+ "word": "recommended",
+ "duration": 0.6,
+ "codes": [
+ 872,
+ 1819,
+ 1224,
+ 613,
+ 1235,
+ 222,
+ 371,
+ 721,
+ 1259,
+ 1478,
+ 717,
+ 1098,
+ 1769,
+ 1770,
+ 1358,
+ 552,
+ 1420,
+ 484,
+ 936,
+ 1686,
+ 1219,
+ 147,
+ 910,
+ 499,
+ 492,
+ 1134,
+ 1511,
+ 905,
+ 1415,
+ 1553,
+ 307,
+ 1831,
+ 1244,
+ 5,
+ 1399,
+ 1160,
+ 992,
+ 466,
+ 860,
+ 1304,
+ 1434,
+ 1421,
+ 651,
+ 1795,
+ 1408
+ ]
+ },
+ {
+ "word": "will",
+ "duration": 0.44,
+ "codes": [
+ 985,
+ 1738,
+ 744,
+ 1205,
+ 121,
+ 1556,
+ 1053,
+ 1759,
+ 1143,
+ 1325,
+ 1808,
+ 1126,
+ 1822,
+ 1822,
+ 1752,
+ 891,
+ 1832,
+ 1699,
+ 891,
+ 1667,
+ 1373,
+ 326,
+ 1526,
+ 143,
+ 1323,
+ 1342,
+ 553,
+ 1697,
+ 483,
+ 720,
+ 1670,
+ 1219,
+ 1666
+ ]
+ },
+ {
+ "word": "there",
+ "duration": 0.16,
+ "codes": [
+ 899,
+ 845,
+ 1100,
+ 1110,
+ 371,
+ 607,
+ 212,
+ 516,
+ 818,
+ 41,
+ 1011,
+ 1070
+ ]
+ },
+ {
+ "word": "was",
+ "duration": 0.16,
+ "codes": [
+ 1605,
+ 626,
+ 105,
+ 1410,
+ 267,
+ 1295,
+ 797,
+ 770,
+ 519,
+ 1088,
+ 1061,
+ 863
+ ]
+ },
+ {
+ "word": "no",
+ "duration": 0.12,
+ "codes": [
+ 1465,
+ 1301,
+ 1739,
+ 1662,
+ 647,
+ 570,
+ 352,
+ 1677,
+ 260
+ ]
+ },
+ {
+ "word": "further",
+ "duration": 0.43,
+ "codes": [
+ 1357,
+ 182,
+ 655,
+ 779,
+ 1785,
+ 1321,
+ 1256,
+ 901,
+ 1685,
+ 827,
+ 1422,
+ 1450,
+ 400,
+ 1649,
+ 359,
+ 628,
+ 429,
+ 1325,
+ 595,
+ 456,
+ 117,
+ 260,
+ 1456,
+ 1447,
+ 566,
+ 1746,
+ 547,
+ 1801,
+ 159,
+ 1627,
+ 1257,
+ 1074
+ ]
+ },
+ {
+ "word": "investigation",
+ "duration": 0.77,
+ "codes": [
+ 1068,
+ 565,
+ 1669,
+ 1155,
+ 1225,
+ 744,
+ 970,
+ 1741,
+ 1176,
+ 1741,
+ 1477,
+ 573,
+ 267,
+ 499,
+ 352,
+ 1438,
+ 239,
+ 1611,
+ 263,
+ 330,
+ 863,
+ 592,
+ 1631,
+ 1669,
+ 943,
+ 1743,
+ 446,
+ 1278,
+ 183,
+ 684,
+ 930,
+ 420,
+ 1504,
+ 393,
+ 24,
+ 39,
+ 248,
+ 305,
+ 1447,
+ 65,
+ 940,
+ 1155,
+ 854,
+ 1803,
+ 1438,
+ 596,
+ 1310,
+ 570,
+ 499,
+ 886,
+ 499,
+ 1351,
+ 1443,
+ 841,
+ 1493,
+ 1731,
+ 1432,
+ 1454
+ ]
+ },
+ {
+ "word": "this",
+ "duration": 0.27,
+ "codes": [
+ 1057,
+ 1754,
+ 1071,
+ 1279,
+ 1373,
+ 1486,
+ 901,
+ 525,
+ 1542,
+ 1509,
+ 1823,
+ 232,
+ 1139,
+ 69,
+ 656,
+ 1337,
+ 330,
+ 927,
+ 276,
+ 366
+ ]
+ },
+ {
+ "word": "report",
+ "duration": 1.0,
+ "codes": [
+ 1342,
+ 69,
+ 382,
+ 870,
+ 206,
+ 1088,
+ 357,
+ 318,
+ 1403,
+ 245,
+ 715,
+ 714,
+ 366,
+ 719,
+ 292,
+ 715,
+ 1741,
+ 499,
+ 1806,
+ 1735,
+ 1432,
+ 1553,
+ 1637,
+ 1495,
+ 1553,
+ 1638,
+ 1279,
+ 1807,
+ 1375,
+ 1456,
+ 1637,
+ 1099,
+ 1390,
+ 1306,
+ 1260,
+ 1141,
+ 25,
+ 639,
+ 894,
+ 768,
+ 1800,
+ 516,
+ 1028,
+ 1305,
+ 1704,
+ 1760,
+ 1428,
+ 1073,
+ 1261,
+ 1331,
+ 1415,
+ 951,
+ 1004,
+ 977,
+ 354,
+ 1560,
+ 1224,
+ 1049,
+ 182,
+ 149,
+ 775,
+ 1455,
+ 1044,
+ 1326,
+ 1477,
+ 1052,
+ 1477,
+ 1459,
+ 1381,
+ 1434,
+ 950,
+ 1251,
+ 1495,
+ 891,
+ 1553
+ ]
+ },
+ {
+ "word": "was",
+ "duration": 0.28,
+ "codes": [
+ 1730,
+ 1741,
+ 330,
+ 599,
+ 218,
+ 299,
+ 1751,
+ 1282,
+ 1397,
+ 438,
+ 1556,
+ 1648,
+ 1294,
+ 1100,
+ 1237,
+ 1493,
+ 1607,
+ 366,
+ 780,
+ 366,
+ 1259
+ ]
+ },
+ {
+ "word": "classified",
+ "duration": 0.73,
+ "codes": [
+ 1478,
+ 1793,
+ 939,
+ 627,
+ 847,
+ 172,
+ 498,
+ 315,
+ 1476,
+ 359,
+ 589,
+ 589,
+ 386,
+ 958,
+ 1435,
+ 1351,
+ 1325,
+ 1018,
+ 135,
+ 599,
+ 67,
+ 366,
+ 1450,
+ 1424,
+ 791,
+ 41,
+ 1742,
+ 1410,
+ 1802,
+ 715,
+ 1450,
+ 1496,
+ 239,
+ 708,
+ 1066,
+ 956,
+ 221,
+ 779,
+ 1177,
+ 524,
+ 545,
+ 1264,
+ 1040,
+ 816,
+ 926,
+ 1448,
+ 990,
+ 41,
+ 1015,
+ 788,
+ 1071,
+ 1745,
+ 1655,
+ 458,
+ 1247
+ ]
+ },
+ {
+ "word": "and",
+ "duration": 0.12,
+ "codes": [
+ 1129,
+ 1307,
+ 1807,
+ 1157,
+ 1178,
+ 886,
+ 1233,
+ 1746,
+ 1639
+ ]
+ },
+ {
+ "word": "then",
+ "duration": 0.13,
+ "codes": [
+ 1414,
+ 1638,
+ 1700,
+ 1126,
+ 665,
+ 1318,
+ 1677,
+ 1346,
+ 792,
+ 1241
+ ]
+ },
+ {
+ "word": "it",
+ "duration": 0.09,
+ "codes": [
+ 1720,
+ 1483,
+ 1364,
+ 891,
+ 1770,
+ 957,
+ 727
+ ]
+ },
+ {
+ "word": "was",
+ "duration": 0.15,
+ "codes": [
+ 1462,
+ 1117,
+ 1802,
+ 1197,
+ 1404,
+ 1518,
+ 366,
+ 604,
+ 1808,
+ 1314,
+ 805
+ ]
+ },
+ {
+ "word": "quietly",
+ "duration": 0.61,
+ "codes": [
+ 1770,
+ 1621,
+ 1294,
+ 1667,
+ 957,
+ 1408,
+ 1801,
+ 1484,
+ 1298,
+ 1818,
+ 1718,
+ 22,
+ 1637,
+ 1485,
+ 1005,
+ 1258,
+ 306,
+ 880,
+ 1155,
+ 595,
+ 1462,
+ 955,
+ 556,
+ 1827,
+ 1125,
+ 1351,
+ 1483,
+ 1278,
+ 1818,
+ 1658,
+ 1632,
+ 681,
+ 1705,
+ 1360,
+ 121,
+ 1775,
+ 375,
+ 947,
+ 1790,
+ 1436,
+ 799,
+ 1486,
+ 344,
+ 1736,
+ 1631,
+ 1250
+ ]
+ },
+ {
+ "word": "buried",
+ "duration": 0.68,
+ "codes": [
+ 1505,
+ 802,
+ 1668,
+ 1784,
+ 1707,
+ 627,
+ 1806,
+ 1623,
+ 1092,
+ 1623,
+ 1380,
+ 1767,
+ 1648,
+ 32,
+ 1779,
+ 1474,
+ 984,
+ 1694,
+ 1725,
+ 1373,
+ 1465,
+ 591,
+ 1758,
+ 186,
+ 441,
+ 1647,
+ 1400,
+ 841,
+ 604,
+ 1073,
+ 1183,
+ 1153,
+ 1661,
+ 1172,
+ 1279,
+ 1787,
+ 1208,
+ 1370,
+ 1237,
+ 947,
+ 990,
+ 1265,
+ 833,
+ 1799,
+ 1677,
+ 1760,
+ 681,
+ 1779,
+ 121,
+ 1373,
+ 1738
+ ]
+ }
+ ],
+ "language": "en"
+}
\ No newline at end of file
diff --git a/examples/outetts/speakers/zh_female_1.json b/examples/outetts/speakers/zh_female_1.json
new file mode 100644
index 000000000..765f2a970
--- /dev/null
+++ b/examples/outetts/speakers/zh_female_1.json
@@ -0,0 +1,283 @@
+{
+ "text": "\u98df\u9053\u764c\u7b49\u7b49\u3002\u4e8c\u5341\u4e8c\u79cd\u91cd\u5927\u75be\u75c5\u7684\u8bdd\u4e0d\u4ec5\u53ef\u4ee5\u62a5\u9500\uff0c\u8fd8\u53ef\u4ee5\u4eab\u53d7\u5230\u989d\u5916\u6700\u9ad8\u5341\u4e07\u5143\u7684\u5927\u75c5\u8865\u52a9\u3002\u7b2c\u4e8c\u4e2a\u5c31\u662f\u62a5\u9500\u6bd4\u4f8b\u4e4b\u524d\u4e00\u4e03\u5e74\u90a3\u7248\u7684\u65f6\u5019\u3002",
+ "words": [
+ {
+ "word": "shidao",
+ "duration": 0.63,
+ "codes": [
+ 962, 1375, 1459, 11, 1735, 1139, 1061, 1671, 796, 1375, 1500, 889, 1752, 1370, 527, 1642, 890, 1222,
+ 1308, 1678, 1051, 1022, 1762, 1100, 143, 1323, 158, 1281, 1106, 1781, 1727, 1269, 101, 855, 1201, 446,
+ 1066, 826, 115, 1271, 497, 1309, 221, 818, 1149, 1200, 524
+ ]
+ },
+ {
+ "word": "ai",
+ "duration": 0.21,
+ "codes": [1183, 721, 1399, 956, 1356, 880, 1222, 393, 1095, 370, 942, 1247, 552, 873, 1457, 1403]
+ },
+ {
+ "word": "deng",
+ "duration": 0.27,
+ "codes": [
+ 955, 753, 1471, 1499, 1740, 776, 1080, 1512, 1642, 996, 871, 1063, 1345, 1731, 1501, 1409, 1506, 904,
+ 1301, 1807
+ ]
+ },
+ {
+ "word": "deng",
+ "duration": 0.48,
+ "codes": [
+ 261, 1707, 633, 1142, 1614, 301, 1262, 933, 986, 1338, 121, 939, 1792, 566, 899, 1474, 799, 1157, 659,
+ 762, 377, 1609, 1423, 1407, 550, 130, 756, 1044, 794, 1017, 1698, 382, 1196, 840, 1108, 1358
+ ]
+ },
+ {
+ "word": "twenty",
+ "duration": 0.44,
+ "codes": [
+ 1071, 1385, 1348, 509, 1771, 171, 334, 563, 984, 1245, 573, 934, 390, 1038, 461, 477, 1064, 1247, 477,
+ 1037, 519, 1772, 943, 788, 1735, 962, 1637, 701, 176, 977, 1038, 1235, 613
+ ]
+ },
+ {
+ "word": "two",
+ "duration": 0.08,
+ "codes": [1106, 1328, 565, 1111, 879, 441]
+ },
+ {
+ "word": "zhongzhong",
+ "duration": 0.48,
+ "codes": [
+ 841, 826, 422, 1159, 1099, 1437, 883, 1696, 1529, 1586, 1240, 1202, 1443, 1494, 894, 1433, 998, 1490,
+ 1489, 86, 1087, 1152, 1779, 59, 1246, 784, 906, 1119, 1402, 1757, 876, 130, 1762, 1353, 982, 905
+ ]
+ },
+ {
+ "word": "da",
+ "duration": 0.09,
+ "codes": [1800, 728, 663, 1791, 1492, 797, 963]
+ },
+ {
+ "word": "jibing",
+ "duration": 0.43,
+ "codes": [
+ 1183, 1060, 1110, 1198, 1078, 992, 1822, 1619, 1428, 1571, 784, 634, 1783, 1229, 771, 1544, 801, 32,
+ 1596, 1630, 414, 1550, 805, 892, 826, 1078, 1345, 1137, 965, 567, 1312, 987
+ ]
+ },
+ {
+ "word": "de",
+ "duration": 0.08,
+ "codes": [1545, 1706, 909, 1245, 818, 1494]
+ },
+ {
+ "word": "huabu",
+ "duration": 0.59,
+ "codes": [
+ 1483, 591, 1683, 1273, 795, 1601, 925, 1103, 883, 1001, 819, 1501, 1143, 1258, 998, 1200, 1640, 1504,
+ 929, 345, 1385, 939, 939, 1153, 1003, 1556, 1303, 1003, 1404, 957, 1118, 1809, 1262, 1114, 1273, 680,
+ 1273, 1262, 765, 1655, 1071, 825, 856, 737
+ ]
+ },
+ {
+ "word": "jinke",
+ "duration": 0.29,
+ "codes": [
+ 856, 1456, 1825, 1563, 1350, 917, 489, 1464, 1217, 1124, 250, 1281, 80, 1792, 1659, 1299, 1160, 1400,
+ 1322, 679, 801, 594
+ ]
+ },
+ {
+ "word": "yi",
+ "duration": 0.15,
+ "codes": [1287, 759, 690, 322, 1563, 1797, 1460, 427, 671, 428, 1136]
+ },
+ {
+ "word": "baoxiao",
+ "duration": 0.45,
+ "codes": [
+ 437, 1323, 659, 17, 1555, 420, 659, 1666, 260, 819, 1505, 847, 718, 655, 466, 982, 1525, 1518, 1737,
+ 554, 415, 1565, 884, 906, 1356, 42, 466, 978, 797, 879, 446, 826, 1180, 446
+ ]
+ },
+ {
+ "word": "haike",
+ "duration": 0.6,
+ "codes": [
+ 1165, 1333, 1686, 633, 964, 1478, 957, 1282, 1385, 1714, 19, 1465, 1434, 1738, 1648, 1117, 1705, 1448,
+ 1611, 680, 1279, 1246, 606, 1380, 633, 1130, 1153, 1823, 1196, 1324, 549, 997, 819, 1280, 782, 1256,
+ 1612, 414, 1500, 1436, 1752, 1500, 1022, 302, 225
+ ]
+ },
+ {
+ "word": "yi",
+ "duration": 0.11,
+ "codes": [982, 1376, 1364, 1501, 197, 909, 834, 789]
+ },
+ {
+ "word": "xiangshou",
+ "duration": 0.44,
+ "codes": [
+ 447, 882, 1111, 495, 1518, 1549, 1376, 731, 736, 1758, 9, 1198, 711, 1264, 1630, 955, 1317, 1167, 1710,
+ 1339, 1821, 10, 176, 788, 1720, 973, 690, 40, 1521, 910, 732, 864, 963
+ ]
+ },
+ {
+ "word": "dao",
+ "duration": 0.2,
+ "codes": [1826, 1239, 450, 1439, 589, 1131, 764, 871, 1277, 660, 594, 907, 1129, 580, 1487]
+ },
+ {
+ "word": "ewai",
+ "duration": 0.37,
+ "codes": [
+ 1169, 1158, 1782, 1118, 1358, 1063, 1087, 1792, 1344, 908, 621, 86, 705, 832, 753, 675, 594, 1755, 58,
+ 1281, 1047, 757, 518, 819, 1171, 512, 1177, 110
+ ]
+ },
+ {
+ "word": "zuigao",
+ "duration": 0.36,
+ "codes": [
+ 720, 353, 1685, 1685, 928, 34, 906, 1210, 1501, 1140, 1111, 428, 1638, 969, 770, 60, 1619, 1697, 1475,
+ 1127, 818, 1368, 291, 1044, 1272, 1162, 1306
+ ]
+ },
+ {
+ "word": "ten",
+ "duration": 0.27,
+ "codes": [
+ 1326, 1339, 1649, 736, 1088, 308, 1774, 962, 1146, 1738, 1061, 495, 640, 713, 1774, 321, 130, 1697,
+ 1670, 1252
+ ]
+ },
+ {
+ "word": "wan",
+ "duration": 0.16,
+ "codes": [779, 936, 907, 1137, 412, 1119, 1106, 142, 1268, 699, 772, 1166]
+ },
+ {
+ "word": "yuan",
+ "duration": 0.19,
+ "codes": [825, 644, 754, 1600, 1431, 594, 864, 1065, 806, 1118, 903, 908, 1373, 1773]
+ },
+ {
+ "word": "de",
+ "duration": 0.09,
+ "codes": [728, 845, 973, 1063, 1087, 382, 1275]
+ },
+ {
+ "word": "dabing",
+ "duration": 0.4,
+ "codes": [
+ 1124, 1747, 343, 712, 686, 939, 1097, 642, 934, 580, 130, 1197, 1088, 874, 492, 531, 1790, 1555, 1246,
+ 933, 1505, 913, 489, 893, 1546, 1830, 894, 40, 1811, 1488
+ ]
+ },
+ {
+ "word": "buzhu",
+ "duration": 0.31,
+ "codes": [
+ 663, 1676, 64, 505, 377, 1294, 1076, 828, 1417, 729, 1060, 1555, 776, 933, 1290, 654, 1078, 1065, 1317,
+ 29, 942, 149, 1596
+ ]
+ },
+ {
+ "word": "di",
+ "duration": 0.49,
+ "codes": [
+ 992, 1463, 1802, 1144, 1803, 710, 287, 1428, 1157, 1716, 1130, 923, 1714, 1230, 516, 1059, 1799, 1199,
+ 1412, 513, 1223, 1327, 513, 1668, 377, 735, 758, 1080, 939, 710, 509, 1657, 1726, 1202, 1665, 441, 836
+ ]
+ },
+ {
+ "word": "two",
+ "duration": 0.12,
+ "codes": [1168, 260, 1208, 1220, 248, 1183, 1201, 1169, 1197]
+ },
+ {
+ "word": "gejiu",
+ "duration": 0.39,
+ "codes": [
+ 76, 880, 531, 656, 390, 1046, 1228, 1280, 988, 1545, 734, 813, 864, 1501, 774, 116, 472, 916, 328, 1379,
+ 1425, 1597, 15, 1077, 1100, 1218, 1202, 1296, 1736
+ ]
+ },
+ {
+ "word": "shi",
+ "duration": 0.13,
+ "codes": [1312, 1267, 1750, 11, 554, 1057, 1532, 1127, 621, 130]
+ },
+ {
+ "word": "baoxiao",
+ "duration": 0.39,
+ "codes": [
+ 1808, 1581, 5, 1614, 436, 696, 1086, 294, 754, 996, 632, 594, 892, 897, 984, 175, 1624, 1580, 4, 1624,
+ 1207, 1385, 446, 492, 735, 524, 879, 1078, 1060
+ ]
+ },
+ {
+ "word": "bili",
+ "duration": 0.32,
+ "codes": [
+ 712, 1740, 1266, 1614, 1123, 1308, 940, 1052, 834, 1375, 1668, 1825, 1360, 1304, 1285, 1234, 1581, 1700,
+ 894, 1518, 836, 735, 877, 1287
+ ]
+ },
+ {
+ "word": "zhi",
+ "duration": 0.4,
+ "codes": [
+ 982, 945, 1652, 1251, 1669, 504, 1003, 1404, 624, 1404, 1306, 751, 1410, 1465, 1079, 1496, 751, 1803,
+ 1474, 651, 1525, 986, 680, 1527, 1580, 756, 1333, 521, 405, 860
+ ]
+ },
+ {
+ "word": "qian",
+ "duration": 0.24,
+ "codes": [1078, 1137, 577, 1636, 1547, 1552, 368, 93, 674, 57, 477, 753, 1232, 1064, 117, 798, 395, 1295]
+ },
+ {
+ "word": "one",
+ "duration": 0.12,
+ "codes": [717, 1713, 705, 898, 325, 628, 717, 47, 1436]
+ },
+ {
+ "word": "seven",
+ "duration": 0.31,
+ "codes": [
+ 955, 639, 1086, 1106, 973, 1617, 1783, 1664, 1580, 1565, 0, 1513, 788, 233, 298, 1477, 1193, 1361, 1166,
+ 1457, 1115, 188, 1499
+ ]
+ },
+ {
+ "word": "nian",
+ "duration": 0.2,
+ "codes": [1599, 580, 1663, 1486, 1366, 466, 908, 759, 1106, 609, 879, 886, 825, 188, 1776]
+ },
+ {
+ "word": "na",
+ "duration": 0.09,
+ "codes": [1759, 1365, 120, 982, 1403, 1139, 759]
+ },
+ {
+ "word": "ban",
+ "duration": 0.2,
+ "codes": [675, 1311, 1018, 1109, 272, 1692, 959, 149, 573, 1187, 1263, 1089, 1541, 1424, 1766]
+ },
+ {
+ "word": "de",
+ "duration": 0.12,
+ "codes": [1697, 1414, 1084, 1757, 832, 1001, 1801, 942, 0]
+ },
+ {
+ "word": "shihou",
+ "duration": 0.39,
+ "codes": [
+ 1014, 1779, 749, 1734, 1082, 750, 1738, 746, 1600, 713, 1820, 1253, 982, 1353, 1260, 1257, 1462, 1049,
+ 1794, 1103, 1775, 992, 1224, 1017, 1022, 942, 1081, 1484, 994
+ ]
+ }
+ ],
+ "language": "zh"
+}
diff --git a/examples/outetts/speakers/zh_male_1.json b/examples/outetts/speakers/zh_male_1.json
new file mode 100644
index 000000000..0de88f705
--- /dev/null
+++ b/examples/outetts/speakers/zh_male_1.json
@@ -0,0 +1,234 @@
+{
+ "text": "\u8fd1\u65e5\uff0c\u963f\u91cc\u5168\u8d44\u6536\u8d2d\u997f\u4e86\u4e48\u7684\u6d88\u606f\u6380\u8d77\u4e86\u4e0d\u5c0f\u6d6a\u82b1\u3002\u636e\u79f0\u4e09\u4e2a\u6708\u5185\uff0c\u963f\u91cc\u5c06\u6309\u7167\u4e5d\u5341\u4e94\u4ebf\u7684\u4ef7\u683c\u6536\u8d2d\u997f\u4e86\u4e48\u5168\u90e8\u80a1\u4efd\u3002",
+ "words": [
+ {
+ "word": "jinri",
+ "duration": 0.32,
+ "codes": [
+ 1536, 1246, 939, 1703, 1704, 930, 253, 18, 372, 794, 616, 689, 656, 1440, 51, 1024, 406, 946, 889, 1572,
+ 1268, 999, 982, 1104
+ ]
+ },
+ {
+ "word": "ali",
+ "duration": 0.72,
+ "codes": [
+ 1611, 727, 1390, 442, 1142, 795, 1352, 1130, 1279, 1007, 1059, 1348, 1495, 1495, 1419, 1170, 1667, 1432,
+ 1040, 1751, 1395, 1434, 1751, 1098, 1777, 1817, 1177, 1118, 1397, 869, 1510, 1638, 538, 861, 1174, 997,
+ 321, 173, 83, 271, 159, 520, 106, 520, 236, 609, 1686, 863, 1589, 1726, 263, 700, 1472, 528
+ ]
+ },
+ {
+ "word": "quan",
+ "duration": 0.31,
+ "codes": [
+ 1281, 645, 1762, 157, 459, 1534, 231, 633, 1667, 62, 1351, 232, 769, 239, 927, 1499, 535, 443, 330,
+ 1471, 746, 787, 1540
+ ]
+ },
+ {
+ "word": "zishou",
+ "duration": 0.4,
+ "codes": [
+ 1518, 1497, 994, 1709, 1570, 1660, 543, 1376, 1300, 1217, 1382, 98, 688, 376, 780, 1823, 1800, 366,
+ 1340, 358, 499, 713, 609, 871, 576, 725, 934, 148, 455, 1286
+ ]
+ },
+ {
+ "word": "goue",
+ "duration": 0.29,
+ "codes": [
+ 1675, 1508, 1672, 1548, 1790, 1001, 443, 352, 716, 1660, 210, 1540, 1308, 1131, 1740, 1749, 288, 1721,
+ 1812, 755, 853, 152
+ ]
+ },
+ {
+ "word": "le",
+ "duration": 0.16,
+ "codes": [1060, 143, 665, 779, 191, 57, 184, 1019, 1784, 1041, 374, 1720]
+ },
+ {
+ "word": "mede",
+ "duration": 0.23,
+ "codes": [303, 1829, 1236, 92, 744, 1081, 868, 1204, 1032, 1056, 599, 859, 1242, 1399, 1144, 1113, 384]
+ },
+ {
+ "word": "xiaoxi",
+ "duration": 0.4,
+ "codes": [
+ 1804, 821, 1800, 62, 356, 1451, 10, 1325, 1731, 535, 475, 1053, 210, 1804, 306, 779, 1823, 93, 1472,
+ 788, 949, 26, 780, 0, 24, 556, 1519, 844, 902, 932
+ ]
+ },
+ {
+ "word": "xianqi",
+ "duration": 0.4,
+ "codes": [
+ 253, 1741, 1401, 1139, 1668, 1833, 919, 1422, 340, 308, 1270, 556, 1823, 65, 1365, 305, 825, 701, 1136,
+ 1233, 1439, 1388, 652, 1283, 1364, 24, 1379, 1175, 1434, 1249
+ ]
+ },
+ {
+ "word": "le",
+ "duration": 0.12,
+ "codes": [346, 1810, 1393, 1148, 1048, 218, 797, 1803, 1047]
+ },
+ {
+ "word": "bu",
+ "duration": 0.12,
+ "codes": [1718, 986, 220, 1322, 1812, 1651, 1252, 503, 1708]
+ },
+ {
+ "word": "xiaolang",
+ "duration": 0.4,
+ "codes": [
+ 1720, 36, 1605, 276, 863, 350, 335, 1718, 997, 1072, 1111, 969, 1105, 1801, 1743, 1484, 716, 1705, 716,
+ 860, 1304, 446, 1317, 107, 1201, 1323, 1661, 1294, 1152, 1465
+ ]
+ },
+ {
+ "word": "hua",
+ "duration": 0.19,
+ "codes": [1801, 1810, 1375, 1532, 1710, 1637, 1599, 1726, 1109, 1804, 210, 491, 1174, 115]
+ },
+ {
+ "word": "ju",
+ "duration": 0.73,
+ "codes": [
+ 411, 1117, 565, 225, 1309, 1212, 240, 1322, 710, 487, 1033, 740, 744, 1121, 17, 1108, 1754, 261, 1037,
+ 899, 5, 1028, 624, 1305, 1130, 805, 1134, 1496, 1749, 1153, 1788, 1797, 1329, 1478, 1074, 1355, 1738,
+ 833, 1255, 591, 1338, 534, 1408, 820, 986, 1262, 1302, 805, 308, 1, 556, 357, 302, 570, 1476
+ ]
+ },
+ {
+ "word": "cheng",
+ "duration": 0.27,
+ "codes": [
+ 1431, 135, 312, 888, 1007, 1535, 135, 592, 318, 1261, 1304, 1370, 422, 1008, 1450, 467, 917, 1748, 1462,
+ 1236
+ ]
+ },
+ {
+ "word": "three",
+ "duration": 0.56,
+ "codes": [
+ 1747, 265, 861, 957, 606, 923, 964, 27, 1668, 1130, 476, 1211, 976, 572, 1367, 1255, 891, 1130, 799,
+ 1211, 1011, 1266, 1658, 1568, 1441, 400, 18, 368, 943, 848, 9, 1192, 140, 371, 155, 192, 738, 1591,
+ 1022, 1456, 713, 1710
+ ]
+ },
+ {
+ "word": "geyue",
+ "duration": 0.2,
+ "codes": [1251, 282, 138, 671, 315, 631, 804, 1777, 357, 134, 1814, 100, 804, 162, 1234]
+ },
+ {
+ "word": "nei",
+ "duration": 0.24,
+ "codes": [157, 599, 946, 890, 1626, 807, 1832, 138, 900, 270, 1420, 635, 687, 383, 1200, 1400, 178, 102]
+ },
+ {
+ "word": "ali",
+ "duration": 0.41,
+ "codes": [
+ 1808, 715, 1301, 1073, 888, 391, 1793, 1142, 567, 1385, 1437, 339, 699, 557, 393, 73, 1792, 25, 871,
+ 1457, 81, 671, 1296, 57, 1157, 1452, 597, 1030, 1451, 318, 702
+ ]
+ },
+ {
+ "word": "jiang",
+ "duration": 0.37,
+ "codes": [
+ 47, 980, 301, 1409, 14, 1787, 1009, 1742, 157, 1528, 1246, 1724, 335, 335, 7, 1308, 969, 1143, 1194,
+ 711, 791, 200, 158, 312, 1649, 1173, 1315, 1466
+ ]
+ },
+ {
+ "word": "an",
+ "duration": 0.17,
+ "codes": [900, 824, 1815, 1456, 1235, 339, 520, 408, 1156, 846, 901, 835, 634]
+ },
+ {
+ "word": "zhao",
+ "duration": 0.2,
+ "codes": [1449, 1439, 1299, 1679, 461, 1079, 142, 755, 1129, 1207, 1068, 725, 1180, 1369, 1677]
+ },
+ {
+ "word": "ninety",
+ "duration": 0.28,
+ "codes": [
+ 1821, 975, 767, 493, 592, 262, 1422, 382, 116, 1713, 123, 1651, 229, 197, 1004, 1671, 437, 1335, 1447,
+ 1625, 299
+ ]
+ },
+ {
+ "word": "five",
+ "duration": 0.27,
+ "codes": [
+ 206, 595, 366, 164, 1088, 371, 1401, 856, 143, 1409, 917, 355, 906, 1062, 1625, 1732, 1545, 1607, 832,
+ 1472
+ ]
+ },
+ {
+ "word": "yide",
+ "duration": 0.25,
+ "codes": [753, 1450, 758, 47, 760, 1794, 1350, 1615, 7, 1762, 57, 232, 1819, 1695, 1710, 1737, 53, 989, 152]
+ },
+ {
+ "word": "jiage",
+ "duration": 0.36,
+ "codes": [
+ 1379, 1409, 1527, 1518, 10, 69, 944, 1115, 966, 1143, 1424, 1197, 339, 607, 1125, 303, 944, 1622, 1756,
+ 1543, 1614, 1054, 1647, 1663, 969, 971, 952
+ ]
+ },
+ {
+ "word": "shou",
+ "duration": 0.31,
+ "codes": [
+ 47, 1671, 1590, 467, 1668, 961, 1279, 1406, 979, 218, 1382, 366, 782, 769, 1206, 1726, 142, 703, 1633,
+ 359, 1756, 408, 1595
+ ]
+ },
+ {
+ "word": "goue",
+ "duration": 0.32,
+ "codes": [
+ 1413, 1538, 1511, 825, 58, 321, 775, 942, 1243, 286, 1122, 1464, 1745, 939, 287, 1037, 401, 1831, 1499,
+ 1533, 892, 611, 217, 577
+ ]
+ },
+ {
+ "word": "le",
+ "duration": 0.17,
+ "codes": [1089, 1141, 1462, 461, 341, 1524, 95, 1755, 1581, 234, 1174, 1032, 903]
+ },
+ {
+ "word": "me",
+ "duration": 0.12,
+ "codes": [1542, 1593, 1764, 729, 1463, 847, 1218, 1312, 1183]
+ },
+ {
+ "word": "quanbu",
+ "duration": 0.39,
+ "codes": [
+ 1726, 1125, 1045, 1708, 731, 686, 178, 340, 1325, 434, 788, 1680, 1429, 498, 269, 65, 516, 1345, 1058,
+ 1503, 1535, 1006, 1756, 1737, 1340, 1335, 932, 348, 1124
+ ]
+ },
+ {
+ "word": "gu",
+ "duration": 0.19,
+ "codes": [1445, 1622, 1377, 950, 27, 1536, 1794, 1816, 1251, 1755, 1291, 1610, 1815, 1049]
+ },
+ {
+ "word": "fen",
+ "duration": 0.28,
+ "codes": [
+ 888, 923, 1433, 1771, 1740, 1436, 231, 719, 999, 1642, 1637, 1156, 1204, 1275, 882, 1235, 1471, 1355,
+ 1675, 387, 438
+ ]
+ }
+ ],
+ "language": "zh"
+}
diff --git a/examples/outetts/vc1.png b/examples/outetts/vc1.png
new file mode 100644
index 000000000..48fc50800
Binary files /dev/null and b/examples/outetts/vc1.png differ
diff --git a/examples/outetts/vc2.png b/examples/outetts/vc2.png
new file mode 100644
index 000000000..dae88a15d
Binary files /dev/null and b/examples/outetts/vc2.png differ
diff --git a/examples/outetts/voice_cloning.py b/examples/outetts/voice_cloning.py
new file mode 100644
index 000000000..804eacc33
--- /dev/null
+++ b/examples/outetts/voice_cloning.py
@@ -0,0 +1,22 @@
+import outetts
+print("Speaker JSON creation for Voice Cloning for OuteTTS...")
+
+model_config = outetts.HFModelConfig_v1(
+ model_path="OuteAI/OuteTTS-0.2-500M",
+ language="en", # Supported languages in v0.2: en, zh, ja, ko
+)
+
+interface = outetts.InterfaceHF(model_version="0.2", cfg=model_config)
+
+speaker = interface.create_speaker(
+ audio_path="/path/to/audio.wav",
+
+ # If transcript is not provided, it will be automatically transcribed using Whisper
+ transcript=None, # Set to None to use Whisper for transcription
+
+ whisper_model="turbo", # Optional: specify Whisper model (default: "turbo")
+ whisper_device=None, # Optional: specify device for Whisper (default: None)
+)
+
+interface.save_speaker(speaker, "speaker_output.json")
+print("Speaker JSON saved!")
\ No newline at end of file
diff --git a/expose.h b/expose.h
index 7cbe4a3a1..9a8b6e643 100644
--- a/expose.h
+++ b/expose.h
@@ -226,6 +226,8 @@ struct tts_generation_inputs
const char * prompt = nullptr;
const int speaker_seed = 0;
const int audio_seed = 0;
+ const char * custom_speaker_text = "";
+ const char * custom_speaker_data = "";
};
struct tts_generation_outputs
{
diff --git a/klite.embd b/klite.embd
index 2244e68c6..558ca405e 100644
--- a/klite.embd
+++ b/klite.embd
@@ -3077,6 +3077,7 @@ Current version indicated by LITEVER below.
var pending_storyjson_autosave = null;
var mainmenu_is_untab = false;
var websearch_in_progress = false;
+ var kcpp_tts_json = "";
var localsettings = {
my_api_key: "0000000000", //put here so it can be saved and loaded in persistent mode
@@ -3141,6 +3142,8 @@ Current version indicated by LITEVER below.
persist_session: true,
speech_synth: 0, //0 is disabled, 1000 is xtts
xtts_voice: "female_calm",
+ kcpp_tts_voice: "kobo",
+ kcpp_tts_json: "",
beep_on: false,
notify_on: false,
narrate_both_sides: false,
@@ -9656,6 +9659,7 @@ Current version indicated by LITEVER below.
kai_json_load(tmpstory, false);
}
}
+ update_for_sidepanel();
if(koboldcpp_has_multiplayer || koboldcpp_admin_type>0)
{
//force refresh
@@ -11094,6 +11098,8 @@ Current version indicated by LITEVER below.
}
document.getElementById("ttsselect").innerHTML = ttshtml;
document.getElementById("ttsselect").value = localsettings.speech_synth;
+ document.getElementById("kcpp_tts_voice").value = localsettings.kcpp_tts_voice;
+ kcpp_tts_json = localsettings.kcpp_tts_json;
toggle_tts_mode();
document.getElementById("beep_on").checked = localsettings.beep_on;
document.getElementById("notify_on").checked = localsettings.notify_on;
@@ -11473,6 +11479,8 @@ Current version indicated by LITEVER below.
localsettings.speech_synth = document.getElementById("ttsselect").value;
localsettings.xtts_voice = document.getElementById("xtts_voices").value;
+ localsettings.kcpp_tts_voice = document.getElementById("kcpp_tts_voice").value;
+ localsettings.kcpp_tts_json = kcpp_tts_json;
localsettings.beep_on = (document.getElementById("beep_on").checked?true:false);
localsettings.notify_on = (document.getElementById("notify_on").checked?true:false);
no_escape_html = (document.getElementById("no_escape_html").checked?true:false);
@@ -12279,6 +12287,27 @@ Current version indicated by LITEVER below.
});
}
+ function set_voice_clone()
+ {
+ inputBoxOkCancel("Set the Voice Clone JSON to clone an existing voice.
You can download existing voice clone JSONs, or make your own.
","Apply Voice Clone JSON",kcpp_tts_json,"Paste JSON Here",()=>{
+ let userinput = getInputBoxValue().trim();
+ try
+ {
+ kcpp_tts_json = "";
+ if(userinput!="")
+ {
+ kcpp_tts_json = JSON.stringify(JSON.parse(userinput));
+ }
+ } catch (e) {
+ console.log("Voice clone not correctly formatted!");
+ }
+
+ },
+ ()=>{
+ //do nothing on cancel
+ },true,true);
+ }
+
function restore_retried_text()
{
if(retry_in_progress)
@@ -13083,6 +13112,11 @@ Current version indicated by LITEVER below.
} else {
document.getElementById("kcpp_tts_voice_custom").classList.add("hidden");
}
+ if (document.getElementById("kcpp_tts_voice").value == "voiceclone") {
+ document.getElementById("kcpp_tts_voice_clone").classList.remove("hidden");
+ } else {
+ document.getElementById("kcpp_tts_voice_clone").classList.add("hidden");
+ }
}
@@ -13135,10 +13169,12 @@ Current version indicated by LITEVER below.
}
let ssval = localsettings.speech_synth;
let ssrate = localsettings.tts_speed;
+ let vcjson = localsettings.kcpp_tts_json;
if(speech_synth_override!=null)
{
ssval = speech_synth_override;
ssrate = document.getElementById("tts_speed").value;
+ vcjson = kcpp_tts_json;
}
if(localsettings.narrate_only_dialog)
{
@@ -13190,11 +13226,17 @@ Current version indicated by LITEVER below.
};
} else {
sub_endpt = apply_proxy_url(custom_kobold_endpoint + koboldcpp_tts_endpoint);
+ let is_voiceclone = (document.getElementById("kcpp_tts_voice").value == "voiceclone");
+ let is_custom = (document.getElementById("kcpp_tts_voice").value == "custom");
payload =
{
"input": text,
- "voice": (document.getElementById("kcpp_tts_voice").value == "custom")?document.getElementById("kcpp_tts_voice_custom").value:document.getElementById("kcpp_tts_voice").value
+ "voice": (is_custom)?document.getElementById("kcpp_tts_voice_custom").value:document.getElementById("kcpp_tts_voice").value
};
+ if(is_voiceclone && vcjson)
+ {
+ payload.speaker_json = vcjson;
+ }
ttsheaders = get_kobold_header();
}
@@ -14574,7 +14616,8 @@ Current version indicated by LITEVER below.
}
let is_using_o1 = custom_oai_model.toLowerCase().startsWith("o1-") || custom_oai_model.toLowerCase()=="o1" || custom_oai_model.toLowerCase().startsWith("o3-") || custom_oai_model.toLowerCase()=="o3";
- if(is_using_o1)
+ let is_using_4o_search = custom_oai_model.toLowerCase().includes("-search-preview");
+ if(is_using_o1 || is_using_4o_search)
{
//o1 does not support ANY customization
oai_payload =
@@ -21548,8 +21591,10 @@ Current version indicated by LITEVER below.
+
-