diff --git a/examples/outetts/readme.md b/examples/outetts/readme.md new file mode 100644 index 000000000..45b9d63cd --- /dev/null +++ b/examples/outetts/readme.md @@ -0,0 +1,23 @@ +## KoboldCpp now supports OuteTTS Voice Cloning + +However, it can be slightly challenging to set it up. +- The easiest way to get started is to use already pre-cloned voices, which you can find within the [speakers directory](speakers). +- Simply download the .json speaker file, open KoboldCpp with an OuteTTS model and WavTokenizer loaded. +- Paste the voice JSON into Kobold Lite UI's Settings > Media > TTS > voiceclone option. + +![Preview](vc1.png) +![Preview](vc2.png) + +- If you don't know how to get the OuteTTS and WavTokenizer models, [please read the wiki](https://github.com/LostRuins/koboldcpp/wiki#getting-an-ai-model-file). +- Once configured, you are good to go. Narrations will be done with that selected cloned speaker. + +### Cloning your own speaker +You can also create your own cloned speaker voice. +To do that, you will need Python 3.10+ with the `outetts` package installed. You can install it with this commands: +``` +pip install outetts --no-deps +pip install scipy einops pyyaml huggingface-hub encodec matplotlib transformers pytorch-lightning tensorboardX soundfile numpy jsonargparse torchcrepe librosa inflect loguru polars natsort tqdm sounddevice mecab-python3 unidic-lite uroman openai-whisper pygame +``` +Then after that, obtain a wav file containing a sample of the voice you want to clone. +Modify the [voice_cloning.py](voice_cloning.py) script with your input audio file, and let it generate the JSON. +Finally, use the JSON file in the same way as the above with the precloned voices. \ No newline at end of file diff --git a/examples/outetts/speakers/david_attenborough.json b/examples/outetts/speakers/david_attenborough.json new file mode 100644 index 000000000..b6e4880b7 --- /dev/null +++ b/examples/outetts/speakers/david_attenborough.json @@ -0,0 +1,1093 @@ +{ + "text": "to a surprising degree nearly all the major events in this history can be told using living animals to represent the ancestral creatures which were the actual protagonists", + "words": [ + { + "word": "to", + "duration": 0.19, + "codes": [ + 561, + 585, + 1813, + 852, + 285, + 23, + 1408, + 366, + 231, + 759, + 51, + 1593, + 1332, + 1233 + ] + }, + { + "word": "a", + "duration": 0.25, + "codes": [ + 113, + 505, + 787, + 343, + 29, + 761, + 1588, + 1368, + 1415, + 1663, + 58, + 1735, + 504, + 567, + 740, + 1213, + 1358, + 1666, + 966 + ] + }, + { + "word": "surprising", + "duration": 0.64, + "codes": [ + 146, + 807, + 286, + 471, + 1441, + 245, + 767, + 1337, + 596, + 1310, + 1526, + 537, + 1026, + 1612, + 1628, + 1676, + 1040, + 1511, + 1761, + 1005, + 127, + 1420, + 1795, + 755, + 1152, + 1263, + 1097, + 1483, + 1047, + 687, + 1404, + 809, + 842, + 1247, + 1831, + 556, + 1752, + 328, + 796, + 1800, + 1785, + 546, + 1022, + 1507, + 328, + 1409, + 920, + 1048 + ] + }, + { + "word": "degree", + "duration": 0.55, + "codes": [ + 1319, + 1176, + 150, + 505, + 1772, + 769, + 1281, + 865, + 319, + 1493, + 1527, + 1440, + 1689, + 345, + 1236, + 866, + 1319, + 1037, + 1153, + 1644, + 347, + 1794, + 901, + 127, + 1515, + 514, + 568, + 788, + 716, + 1343, + 319, + 1545, + 428, + 1251, + 197, + 1451, + 1813, + 1527, + 1360, + 1755, + 1580 + ] + }, + { + "word": "nearly", + "duration": 0.88, + "codes": [ + 585, + 1759, + 34, + 1525, + 1563, + 1543, + 1680, + 731, + 1465, + 1756, + 1580, + 681, + 416, + 1338, + 1806, + 1379, + 762, + 1489, + 423, + 241, + 559, + 68, + 1705, + 964, + 331, + 495, + 762, + 627, + 1665, + 612, + 1754, + 866, + 538, + 1730, + 168, + 555, + 1359, + 612, + 686, + 288, + 227, + 1242, + 765, + 258, + 464, + 1789, + 738, + 540, + 50, + 1604, + 1452, + 1296, + 952, + 1518, + 1563, + 1243, + 1732, + 1722, + 62, + 1535, + 1530, + 319, + 184, + 92, + 286, + 1534 + ] + }, + { + "word": "all", + "duration": 0.41, + "codes": [ + 1646, + 968, + 1037, + 1279, + 585, + 54, + 417, + 288, + 44, + 1274, + 1218, + 1558, + 321, + 532, + 1461, + 1296, + 492, + 1514, + 110, + 1060, + 1710, + 545, + 1570, + 107, + 1102, + 1536, + 700, + 545, + 377, + 1460, + 718 + ] + }, + { + "word": "the", + "duration": 0.16, + "codes": [ + 1233, + 1354, + 361, + 1409, + 1769, + 1597, + 1516, + 653, + 1304, + 931, + 1532, + 1217 + ] + }, + { + "word": "major", + "duration": 0.39, + "codes": [ + 935, + 1386, + 1345, + 78, + 1240, + 853, + 579, + 926, + 430, + 1014, + 440, + 1295, + 1711, + 1368, + 1061, + 807, + 1740, + 1362, + 1440, + 1500, + 980, + 1313, + 858, + 307, + 966, + 333, + 662, + 813, + 1416 + ] + }, + { + "word": "events", + "duration": 0.48, + "codes": [ + 1267, + 1638, + 1585, + 1079, + 1215, + 1722, + 1351, + 1312, + 1263, + 1470, + 1773, + 1811, + 1612, + 1773, + 1758, + 512, + 1344, + 688, + 1078, + 302, + 1393, + 608, + 1554, + 831, + 1791, + 1820, + 1815, + 1558, + 804, + 686, + 995, + 1821, + 1034, + 1621, + 870, + 1034 + ] + }, + { + "word": "in", + "duration": 0.13, + "codes": [ + 1797, + 178, + 1824, + 1257, + 1353, + 1810, + 1733, + 0, + 1768, + 1729 + ] + }, + { + "word": "this", + "duration": 0.2, + "codes": [ + 1554, + 593, + 501, + 1679, + 1323, + 1565, + 1027, + 1416, + 679, + 909, + 1685, + 947, + 1744, + 471, + 1410 + ] + }, + { + "word": "history", + "duration": 0.4, + "codes": [ + 1496, + 979, + 1681, + 1801, + 1656, + 895, + 1353, + 1444, + 719, + 1721, + 1340, + 796, + 1532, + 471, + 989, + 1669, + 1364, + 1016, + 1397, + 989, + 1667, + 1074, + 1700, + 1060, + 1215, + 917, + 1738, + 1542, + 1525, + 1078 + ] + }, + { + "word": "can", + "duration": 0.48, + "codes": [ + 1301, + 750, + 1750, + 1068, + 1681, + 694, + 761, + 257, + 235, + 458, + 976, + 538, + 1486, + 1754, + 469, + 1360, + 509, + 538, + 694, + 559, + 851, + 445, + 21, + 20, + 187, + 288, + 457, + 1797, + 1027, + 1753, + 1002, + 996, + 480, + 1008, + 1659, + 1545 + ] + }, + { + "word": "be", + "duration": 0.11, + "codes": [ + 1734, + 1684, + 264, + 1637, + 1078, + 1053, + 1445, + 629 + ] + }, + { + "word": "told", + "duration": 0.48, + "codes": [ + 634, + 1512, + 559, + 551, + 603, + 639, + 570, + 117, + 599, + 1663, + 1280, + 658, + 427, + 679, + 1161, + 600, + 917, + 98, + 1506, + 1633, + 1660, + 1700, + 877, + 615, + 98, + 310, + 143, + 1706, + 1326, + 1439, + 1731, + 1410, + 1014, + 444, + 1016, + 1759 + ] + }, + { + "word": "using", + "duration": 0.73, + "codes": [ + 1259, + 979, + 1381, + 1205, + 257, + 464, + 1011, + 824, + 1108, + 1230, + 1163, + 1429, + 1823, + 1096, + 1432, + 1479, + 751, + 1495, + 673, + 1705, + 1665, + 624, + 1322, + 561, + 445, + 551, + 1791, + 1545, + 198, + 1530, + 1698, + 11, + 1715, + 1019, + 202, + 875, + 844, + 1249, + 882, + 1718, + 1445, + 889, + 1035, + 858, + 1700, + 556, + 1093, + 356, + 1715, + 1008, + 1518, + 1528, + 1804, + 1737, + 629 + ] + }, + { + "word": "living", + "duration": 0.48, + "codes": [ + 1756, + 1715, + 1501, + 1798, + 1362, + 1211, + 1798, + 1776, + 1323, + 1521, + 1518, + 1463, + 1605, + 1009, + 1661, + 127, + 1472, + 69, + 1460, + 158, + 1697, + 1777, + 1465, + 233, + 1778, + 1722, + 1217, + 395, + 1332, + 344, + 372, + 1357, + 143, + 546, + 22, + 123 + ] + }, + { + "word": "animals", + "duration": 0.6, + "codes": [ + 1787, + 605, + 813, + 1682, + 1110, + 1768, + 797, + 1393, + 1326, + 1126, + 1775, + 1437, + 975, + 1312, + 1751, + 892, + 1697, + 1342, + 804, + 1634, + 1019, + 1440, + 1644, + 1490, + 1463, + 1321, + 1321, + 1696, + 1620, + 662, + 1572, + 1704, + 1780, + 1729, + 1341, + 503, + 1019, + 1379, + 1742, + 617, + 1804, + 1537, + 736, + 833, + 1797 + ] + }, + { + "word": "to", + "duration": 0.27, + "codes": [ + 1002, + 1349, + 1790, + 1147, + 1683, + 1385, + 820, + 567, + 668, + 551, + 670, + 1770, + 437, + 559, + 420, + 1178, + 24, + 1583, + 1423, + 903 + ] + }, + { + "word": "represent", + "duration": 0.67, + "codes": [ + 385, + 1312, + 160, + 1734, + 1717, + 1768, + 1496, + 1449, + 803, + 1498, + 178, + 1326, + 225, + 1498, + 1269, + 859, + 1570, + 68, + 194, + 1684, + 886, + 886, + 1472, + 926, + 1439, + 1673, + 1445, + 802, + 1735, + 519, + 382, + 22, + 863, + 1083, + 1487, + 1511, + 1443, + 1036, + 1345, + 607, + 1785, + 1627, + 1598, + 1676, + 1219, + 1152, + 786, + 1288, + 544, + 17 + ] + }, + { + "word": "the", + "duration": 0.24, + "codes": [ + 382, + 1088, + 1478, + 1813, + 1635, + 1333, + 1587, + 1792, + 1319, + 5, + 160, + 1681, + 368, + 1698, + 1394, + 952, + 836, + 175 + ] + }, + { + "word": "ancestral", + "duration": 0.69, + "codes": [ + 1761, + 398, + 1640, + 1758, + 1532, + 1526, + 1399, + 1426, + 203, + 1526, + 947, + 473, + 783, + 1647, + 1757, + 835, + 1607, + 1388, + 1558, + 746, + 1748, + 372, + 1379, + 1088, + 581, + 1565, + 1382, + 1124, + 621, + 1161, + 1633, + 1008, + 1355, + 1711, + 1633, + 499, + 1104, + 581, + 980, + 795, + 1244, + 343, + 1170, + 1332, + 1666, + 1690, + 583, + 438, + 1019, + 1666, + 1348, + 1491 + ] + }, + { + "word": "creatures", + "duration": 0.55, + "codes": [ + 1205, + 704, + 559, + 762, + 72, + 680, + 1397, + 1583, + 948, + 1440, + 973, + 890, + 1243, + 1278, + 1375, + 335, + 1249, + 1554, + 365, + 509, + 1465, + 1009, + 1611, + 522, + 919, + 719, + 1351, + 1392, + 1521, + 1563, + 55, + 1630, + 1349, + 1825, + 1345, + 361, + 1243, + 767, + 1422, + 848, + 318 + ] + }, + { + "word": "which", + "duration": 0.61, + "codes": [ + 1677, + 927, + 1360, + 534, + 1448, + 1763, + 1668, + 1074, + 1741, + 1331, + 1278, + 1814, + 1412, + 1476, + 1820, + 1036, + 1830, + 1404, + 1411, + 1704, + 1355, + 1637, + 1809, + 1395, + 1779, + 1489, + 1393, + 1233, + 1391, + 1668, + 986, + 54, + 311, + 464, + 987, + 1635, + 844, + 576, + 850, + 870, + 925, + 1491, + 889, + 1583, + 595, + 292 + ] + }, + { + "word": "were", + "duration": 0.16, + "codes": [ + 1649, + 1005, + 1019, + 1502, + 482, + 1056, + 1287, + 545, + 1423, + 1492, + 1483, + 1603 + ] + }, + { + "word": "the", + "duration": 0.11, + "codes": [ + 332, + 1659, + 767, + 1415, + 856, + 92, + 1368, + 848 + ] + }, + { + "word": "actual", + "duration": 0.41, + "codes": [ + 1061, + 1141, + 1235, + 721, + 1309, + 800, + 1469, + 1086, + 1601, + 1407, + 987, + 1170, + 1772, + 32, + 930, + 257, + 1404, + 947, + 1677, + 535, + 782, + 1701, + 1466, + 1069, + 1388, + 1372, + 1572, + 614, + 1675, + 861, + 1537 + ] + }, + { + "word": "protagonists", + "duration": 0.99, + "codes": [ + 501, + 641, + 765, + 1525, + 711, + 1247, + 1736, + 1675, + 1008, + 1174, + 1717, + 1629, + 888, + 704, + 1233, + 715, + 1036, + 1494, + 1511, + 587, + 935, + 1229, + 901, + 1258, + 1110, + 1332, + 1758, + 50, + 1575, + 702, + 608, + 1634, + 1370, + 1280, + 1707, + 775, + 1787, + 1570, + 1219, + 1805, + 1775, + 1492, + 0, + 1497, + 1500, + 1833, + 1473, + 1459, + 1652, + 1364, + 715, + 1817, + 656, + 1355, + 1439, + 382, + 1632, + 870, + 1329, + 1669, + 1139, + 1322, + 1568, + 1053, + 325, + 1139, + 793, + 1763, + 767, + 976, + 1422, + 1329, + 1682, + 1410 + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/examples/outetts/speakers/en_female_1.json b/examples/outetts/speakers/en_female_1.json new file mode 100644 index 000000000..b4fe19459 --- /dev/null +++ b/examples/outetts/speakers/en_female_1.json @@ -0,0 +1,277 @@ +{ + "text": " Uhm, now being the one to say, I know the worst of you and I've been directly affected by people like you, but it's a clean slate with me, buddy. You know, like that's really powerful in and of itself.", + "words": [ + { + "word": "uhm", + "duration": 0.36, + "codes": [ + 447, 223, 967, 301, 965, 827, 393, 908, 764, 1167, 711, 1222, 324, 1318, 806, 498, 1198, 1127, 1178, + 916, 1234, 1411, 1428, 706, 427, 1605, 1578 + ] + }, + { + "word": "now", + "duration": 0.36, + "codes": [ + 1049, 327, 385, 1070, 732, 1480, 450, 1025, 1469, 174, 1013, 1710, 1674, 775, 771, 251, 778, 1400, 897, + 1487, 366, 441, 1000, 393, 271, 1000, 768 + ] + }, + { + "word": "being", + "duration": 0.27, + "codes": [ + 926, 406, 1457, 437, 1231, 672, 1785, 521, 1179, 1559, 198, 1086, 733, 122, 1344, 845, 348, 1389, 470, + 1773 + ] + }, + { + "word": "the", + "duration": 0.08, + "codes": [1775, 562, 768, 1222, 768, 963] + }, + { + "word": "one", + "duration": 0.21, + "codes": [1757, 744, 144, 1610, 655, 616, 1317, 225, 1325, 913, 1342, 992, 1018, 80, 1777, 883] + }, + { + "word": "to", + "duration": 0.08, + "codes": [487, 1363, 1682, 1426, 655, 1483] + }, + { + "word": "say", + "duration": 0.27, + "codes": [ + 1644, 1804, 731, 273, 1592, 731, 1523, 1404, 984, 1207, 430, 1132, 1123, 768, 1116, 829, 1082, 1095, + 440, 1162 + ] + }, + { + "word": "i", + "duration": 0.33, + "codes": [ + 1330, 335, 1162, 1155, 308, 1162, 1150, 1481, 612, 674, 712, 1745, 1188, 1787, 1135, 1275, 1237, 1143, + 408, 1063, 393, 927, 1298, 132, 1686 + ] + }, + { + "word": "know", + "duration": 0.27, + "codes": [ + 983, 1677, 586, 1528, 1435, 835, 1396, 706, 987, 22, 1172, 218, 1404, 1001, 521, 1389, 775, 1416, 877, + 120 + ] + }, + { + "word": "the", + "duration": 0.16, + "codes": [916, 1756, 513, 1245, 1392, 89, 1266, 12, 1045, 1075, 904, 35] + }, + { + "word": "worst", + "duration": 0.32, + "codes": [ + 1607, 174, 1231, 144, 932, 490, 771, 1504, 798, 674, 364, 80, 1314, 1636, 449, 1704, 713, 1795, 968, + 1527, 1302, 1529, 1176, 795 + ] + }, + { + "word": "of", + "duration": 0.12, + "codes": [1193, 1205, 390, 1128, 1091, 883, 322, 377, 1070] + }, + { + "word": "you", + "duration": 0.17, + "codes": [1016, 1332, 926, 281, 927, 1368, 1687, 918, 67, 1638, 1317, 1265, 1770] + }, + { + "word": "and", + "duration": 0.28, + "codes": [ + 1129, 1633, 1373, 1207, 405, 879, 1030, 1253, 1071, 612, 724, 1770, 665, 1046, 1351, 1450, 1541, 1384, + 111, 1477, 284 + ] + }, + { + "word": "ive", + "duration": 0.35, + "codes": [ + 674, 266, 89, 1333, 1183, 1526, 1143, 883, 1135, 732, 827, 1119, 594, 1261, 1024, 1347, 92, 1392, 825, + 1710, 1289, 1598, 1070, 1525, 1442, 555 + ] + }, + { + "word": "been", + "duration": 0.17, + "codes": [1461, 194, 337, 1128, 188, 892, 848, 1280, 959, 754, 231, 649, 1304] + }, + { + "word": "directly", + "duration": 0.87, + "codes": [ + 1030, 353, 570, 1331, 470, 1832, 1362, 1809, 1383, 101, 325, 1557, 1242, 1512, 180, 227, 1242, 643, 209, + 464, 171, 1219, 174, 1723, 734, 118, 1269, 643, 209, 187, 612, 1231, 68, 567, 1242, 505, 319, 1268, 794, + 678, 40, 1286, 470, 1454, 199, 965, 188, 300, 1234, 1125, 794, 1289, 1224, 257, 469, 1121, 101, 823, + 1769, 1683, 95, 255, 59, 67, 832 + ] + }, + { + "word": "affected", + "duration": 0.44, + "codes": [ + 510, 873, 787, 1228, 771, 1428, 501, 751, 696, 258, 845, 1818, 1112, 498, 1111, 985, 1073, 832, 1427, + 168, 163, 447, 119, 567, 1626, 1820, 903, 635, 1060, 10, 1632, 35, 1635 + ] + }, + { + "word": "by", + "duration": 0.19, + "codes": [144, 144, 460, 185, 1112, 1044, 498, 1192, 656, 1333, 1001, 1186, 1186, 454] + }, + { + "word": "people", + "duration": 0.48, + "codes": [ + 1260, 747, 351, 526, 612, 1151, 1262, 1791, 344, 1752, 1547, 930, 1302, 1703, 1289, 92, 1407, 1482, 508, + 1431, 355, 1696, 337, 199, 1157, 223, 464, 568, 845, 411, 826, 718, 1786, 545, 712, 580 + ] + }, + { + "word": "like", + "duration": 0.32, + "codes": [ + 630, 532, 526, 607, 526, 839, 1305, 660, 459, 339, 717, 1178, 1148, 687, 149, 1390, 229, 199, 513, 712, + 1451, 731, 582, 1551 + ] + }, + { + "word": "you", + "duration": 0.21, + "codes": [1389, 954, 1781, 1047, 1236, 930, 809, 1621, 1268, 384, 242, 587, 869, 816, 1680, 405] + }, + { + "word": "but", + "duration": 0.59, + "codes": [ + 1089, 1590, 908, 80, 594, 1046, 1706, 1025, 1150, 405, 548, 893, 1285, 464, 301, 939, 643, 23, 285, 161, + 209, 453, 72, 167, 417, 244, 151, 643, 391, 199, 651, 1023, 337, 1010, 54, 331, 1167, 756, 388, 934, + 1060, 18, 1624, 1060 + ] + }, + { + "word": "its", + "duration": 0.16, + "codes": [1102, 183, 1199, 1258, 1285, 35, 659, 180, 426, 1587, 1733, 942] + }, + { + "word": "a", + "duration": 0.04, + "codes": [791, 1012, 818] + }, + { + "word": "clean", + "duration": 0.61, + "codes": [ + 1819, 976, 163, 447, 316, 223, 763, 457, 1208, 1808, 1697, 1162, 1660, 1833, 1054, 1734, 1121, 1309, + 1643, 924, 1677, 1548, 869, 1268, 223, 674, 111, 792, 1670, 912, 174, 1554, 90, 80, 1563, 1621, 1698, + 1544, 992, 988, 175, 793, 1661, 1026, 80, 1761 + ] + }, + { + "word": "slate", + "duration": 0.4, + "codes": [ + 1802, 322, 1689, 1577, 1302, 1552, 1529, 1722, 1580, 582, 1642, 1529, 1020, 582, 1538, 970, 437, 1141, + 1477, 988, 335, 1611, 922, 1558, 1120, 1189, 423, 188, 171, 562 + ] + }, + { + "word": "with", + "duration": 0.15, + "codes": [963, 1347, 1274, 747, 1230, 712, 1408, 1290, 957, 1279, 258] + }, + { + "word": "me", + "duration": 0.09, + "codes": [638, 1058, 174, 1452, 1038, 894, 1571] + }, + { + "word": "buddy", + "duration": 0.32, + "codes": [ + 1003, 130, 1341, 938, 40, 804, 167, 89, 1456, 1189, 1155, 1171, 1434, 1077, 1029, 1455, 1622, 1037, 163, + 1411, 1165, 1463, 837, 1202 + ] + }, + { + "word": "you", + "duration": 0.36, + "codes": [ + 1354, 1165, 615, 1588, 1192, 1445, 1033, 982, 401, 1079, 684, 1570, 266, 31, 420, 163, 893, 845, 905, + 1827, 1804, 153, 627, 243, 1179, 298, 1147 + ] + }, + { + "word": "know", + "duration": 0.19, + "codes": [163, 1542, 1366, 698, 1753, 206, 916, 1499, 245, 665, 600, 894, 587, 1741] + }, + { + "word": "like", + "duration": 0.24, + "codes": [1106, 1280, 1062, 1304, 945, 809, 598, 104, 1001, 822, 965, 189, 693, 1810, 1293, 199, 1277, 44] + }, + { + "word": "thats", + "duration": 0.24, + "codes": [ + 121, 1789, 1443, 370, 1154, 393, 1178, 1200, 1264, 424, 1391, 381, 978, 1346, 704, 1808, 1579, 1492 + ] + }, + { + "word": "really", + "duration": 0.56, + "codes": [ + 1177, 1761, 1723, 1360, 1413, 830, 551, 193, 59, 332, 598, 734, 1684, 1802, 60, 1590, 353, 89, 1636, + 1396, 893, 143, 455, 1501, 435, 1082, 621, 1593, 677, 474, 971, 1513, 913, 828, 1381, 1148, 1798, 1186, + 1443, 38, 335, 883 + ] + }, + { + "word": "powerful", + "duration": 0.63, + "codes": [ + 1773, 458, 1070, 964, 826, 1220, 1012, 1738, 1125, 669, 490, 1169, 922, 958, 1204, 489, 1001, 886, 1045, + 675, 1471, 1652, 732, 698, 1124, 480, 897, 1484, 1028, 35, 594, 1465, 505, 1669, 436, 851, 1288, 31, + 1501, 1187, 394, 909, 1541, 1793, 1720, 922, 840 + ] + }, + { + "word": "in", + "duration": 0.16, + "codes": [1317, 523, 630, 1343, 1187, 719, 907, 636, 111, 1524, 188, 1382] + }, + { + "word": "and", + "duration": 0.13, + "codes": [1074, 922, 1280, 1496, 1050, 832, 133, 1435, 1049, 1774] + }, + { + "word": "of", + "duration": 0.12, + "codes": [960, 1052, 1192, 1303, 1112, 970, 417, 60, 1155] + }, + { + "word": "itself", + "duration": 0.47, + "codes": [ + 1682, 1209, 1410, 513, 1222, 861, 167, 406, 1551, 582, 634, 1529, 786, 1363, 1578, 1739, 873, 424, 1041, + 1328, 955, 1110, 1490, 1424, 1199, 988, 1162, 1133, 1193, 978, 470, 832, 963, 1251, 733 + ] + } + ], + "language": "en" +} diff --git a/examples/outetts/speakers/en_female_2.json b/examples/outetts/speakers/en_female_2.json new file mode 100644 index 000000000..db67aa1c8 --- /dev/null +++ b/examples/outetts/speakers/en_female_2.json @@ -0,0 +1,210 @@ +{ + "text": "So we have five words here, um, all to do with a plumber and water pipes. Now the first word they talked about that there was water leaking everywhere.", + "words": [ + { + "word": "so", + "duration": 0.15, + "codes": [391, 1319, 1478, 895, 1580, 533, 166, 1015, 1169, 1186, 380] + }, + { + "word": "we", + "duration": 0.12, + "codes": [1403, 1150, 80, 1187, 1230, 529, 182, 398, 853] + }, + { + "word": "have", + "duration": 0.17, + "codes": [243, 1049, 759, 1477, 371, 1158, 422, 1516, 203, 449, 1351, 1546, 1130] + }, + { + "word": "five", + "duration": 0.51, + "codes": [ + 1241, 719, 239, 886, 1795, 1160, 1412, 1638, 943, 1588, 497, 1162, 106, 1185, 1183, 477, 840, 164, 581, + 775, 474, 518, 361, 764, 628, 153, 234, 125, 584, 1029, 149, 276, 1243, 1800, 1468, 1728, 1439, 1772 + ] + }, + { + "word": "words", + "duration": 0.45, + "codes": [ + 1455, 1366, 1734, 920, 703, 111, 1497, 1521, 302, 111, 128, 747, 687, 747, 104, 66, 208, 1727, 545, 69, + 684, 380, 1666, 1368, 183, 145, 1771, 1613, 1807, 1238, 1549, 1393, 1027, 736 + ] + }, + { + "word": "here", + "duration": 0.36, + "codes": [ + 117, 399, 1325, 1447, 1226, 1761, 778, 1120, 1764, 47, 348, 1416, 921, 50, 372, 437, 104, 771, 218, 982, + 162, 754, 997, 162, 912, 907, 245 + ] + }, + { + "word": "um", + "duration": 0.39, + "codes": [ + 1234, 511, 81, 1270, 1475, 841, 595, 64, 1268, 1461, 680, 1320, 573, 818, 764, 1068, 637, 684, 402, + 1222, 877, 537, 1103, 779, 300, 1629, 177, 529, 1672 + ] + }, + { + "word": "all", + "duration": 0.41, + "codes": [ + 653, 1811, 131, 516, 1525, 727, 1338, 1749, 758, 1097, 173, 116, 102, 660, 693, 1177, 159, 799, 221, + 1187, 984, 152, 95, 942, 1046, 236, 756, 1042, 1159, 1159, 1297 + ] + }, + { + "word": "to", + "duration": 0.24, + "codes": [577, 1542, 1356, 1122, 58, 47, 1759, 566, 1478, 1814, 599, 276, 1403, 62, 822, 1568, 95, 691] + }, + { + "word": "do", + "duration": 0.23, + "codes": [65, 1619, 1606, 1584, 251, 85, 1802, 1250, 350, 714, 1814, 631, 510, 1463, 307, 202, 50] + }, + { + "word": "with", + "duration": 0.45, + "codes": [ + 853, 893, 213, 4, 143, 120, 1141, 38, 1637, 182, 389, 747, 890, 1637, 1618, 843, 145, 1445, 381, 234, + 1274, 228, 1484, 374, 245, 1131, 254, 1062, 844, 1635, 1491, 1795, 345, 891 + ] + }, + { + "word": "a", + "duration": 0.19, + "codes": [1693, 1462, 874, 1511, 1408, 1284, 450, 274, 1466, 1682, 1173, 1062, 1073, 1675] + }, + { + "word": "plumber", + "duration": 0.6, + "codes": [ + 935, 1389, 1178, 1803, 1006, 1419, 1614, 1495, 1711, 1779, 1423, 874, 1407, 1626, 1351, 1748, 1145, + 1768, 555, 1389, 252, 1274, 1356, 717, 2, 1147, 1076, 1333, 234, 23, 1453, 704, 143, 1243, 1174, 1483, + 1247, 157, 37, 393, 988, 1141, 206, 1052, 818 + ] + }, + { + "word": "and", + "duration": 0.43, + "codes": [ + 329, 1801, 926, 756, 1469, 1040, 1313, 516, 1166, 1129, 1369, 263, 1144, 1026, 318, 1809, 803, 364, + 1646, 383, 1074, 109, 113, 775, 1415, 863, 1352, 1369, 1796, 1385, 1765, 1618 + ] + }, + { + "word": "water", + "duration": 0.48, + "codes": [ + 315, 1075, 1675, 86, 970, 679, 1491, 1635, 1833, 60, 1619, 1595, 414, 126, 908, 425, 277, 1159, 798, + 186, 1179, 321, 349, 497, 1611, 157, 1480, 1502, 1075, 1162, 993, 74, 520, 113, 1731, 1314 + ] + }, + { + "word": "pipes", + "duration": 0.53, + "codes": [ + 1415, 150, 1705, 1813, 401, 1456, 1325, 1671, 1820, 1170, 1489, 1328, 931, 410, 359, 836, 1253, 1074, + 1192, 1283, 1055, 298, 1711, 937, 1375, 1144, 1321, 1672, 630, 1623, 1828, 1041, 230, 519, 1139, 749, + 1638, 1139, 1641, 1393 + ] + }, + { + "word": "now", + "duration": 0.63, + "codes": [ + 231, 1688, 788, 333, 1830, 938, 1461, 1668, 1355, 1666, 1412, 1701, 1705, 1088, 1746, 1397, 1002, 1793, + 1259, 1459, 1803, 927, 1790, 1720, 985, 1803, 1259, 1400, 1803, 927, 1638, 1438, 1411, 1010, 933, 851, + 1764, 1493, 485, 74, 1794, 1181, 208, 1492, 1046, 1193, 774 + ] + }, + { + "word": "the", + "duration": 0.08, + "codes": [1657, 1022, 76, 1116, 1272, 95] + }, + { + "word": "first", + "duration": 0.49, + "codes": [ + 1730, 1823, 1447, 1217, 1751, 1098, 1287, 1795, 1342, 1711, 1735, 788, 1383, 1532, 1252, 173, 1173, 661, + 695, 412, 1139, 913, 113, 1668, 1437, 848, 1364, 962, 132, 719, 1018, 863, 1379, 1535, 696, 1486, 715 + ] + }, + { + "word": "word", + "duration": 0.43, + "codes": [ + 1364, 1287, 1698, 1100, 526, 443, 199, 1644, 302, 98, 1625, 1025, 682, 629, 307, 170, 671, 254, 1218, + 574, 565, 788, 993, 427, 626, 1398, 738, 587, 1755, 1453, 1617, 406 + ] + }, + { + "word": "they", + "duration": 0.2, + "codes": [969, 1400, 1491, 486, 1693, 426, 1287, 569, 1168, 550, 1563, 278, 79, 1718, 1321] + }, + { + "word": "talked", + "duration": 0.4, + "codes": [ + 286, 1023, 673, 1419, 1776, 599, 1041, 1629, 1125, 1671, 1687, 1320, 1473, 1260, 1254, 297, 1004, 1611, + 840, 969, 1759, 772, 1556, 1809, 869, 1704, 986, 1450, 335, 1477 + ] + }, + { + "word": "about", + "duration": 0.24, + "codes": [839, 376, 1553, 301, 426, 1346, 1671, 215, 1194, 971, 25, 838, 152, 1501, 1665, 1757, 1092, 710] + }, + { + "word": "that", + "duration": 0.12, + "codes": [436, 1166, 53, 1153, 324, 576, 1689, 1717, 566] + }, + { + "word": "there", + "duration": 0.16, + "codes": [1108, 1338, 1634, 903, 1355, 228, 813, 881, 102, 396, 1415, 518] + }, + { + "word": "was", + "duration": 0.12, + "codes": [793, 87, 1252, 911, 83, 1793, 634, 183, 124] + }, + { + "word": "water", + "duration": 0.59, + "codes": [ + 110, 1301, 1639, 520, 1219, 509, 1662, 748, 363, 224, 1753, 877, 1802, 1185, 500, 1148, 1435, 128, 951, + 942, 531, 1075, 324, 1402, 1159, 1202, 711, 218, 901, 441, 571, 1317, 1753, 1722, 1001, 157, 1137, 254, + 607, 808, 125, 661, 51, 798 + ] + }, + { + "word": "leaking", + "duration": 0.96, + "codes": [ + 1171, 1743, 1260, 1331, 91, 1118, 1782, 555, 1385, 505, 121, 1234, 998, 1360, 836, 214, 1048, 423, 13, + 439, 686, 303, 1352, 864, 727, 107, 1038, 1767, 479, 1685, 806, 1639, 719, 125, 365, 1237, 1732, 298, + 1471, 485, 1141, 1734, 1803, 971, 1798, 691, 1648, 1166, 1082, 1535, 1645, 1154, 388, 11, 110, 890, 449, + 736, 276, 1103, 714, 407, 1749, 243, 1694, 479, 426, 1368, 1348, 700, 562, 519 + ] + }, + { + "word": "everywhere", + "duration": 1.0, + "codes": [ + 4, 952, 856, 755, 1686, 372, 570, 1816, 1675, 227, 1675, 1145, 674, 1419, 933, 346, 956, 1739, 1519, + 192, 1778, 346, 1106, 1174, 1403, 175, 1491, 1758, 1451, 1777, 1411, 1773, 1716, 1407, 1743, 1186, 1031, + 902, 1174, 7, 231, 737, 1106, 1189, 592, 481, 1659, 1339, 1155, 110, 384, 583, 1656, 626, 279, 1172, + 326, 560, 326, 755, 1097, 1105, 1203, 675, 208, 1002, 1158, 1204, 1132, 1063, 595, 1102, 1216, 855, 1229 + ] + } + ], + "language": "en" +} diff --git a/examples/outetts/speakers/en_male_1.json b/examples/outetts/speakers/en_male_1.json new file mode 100644 index 000000000..122ed9ae7 --- /dev/null +++ b/examples/outetts/speakers/en_male_1.json @@ -0,0 +1,207 @@ +{ + "text": "The overall package from just two people is pretty remarkable. Sure I have some critiques about some of the gameplay aspects, but it's still really enjoyable and it looks lovely.", + "words": [ + { + "word": "the", + "duration": 0.08, + "codes": [257, 740, 636, 913, 788, 1703] + }, + { + "word": "overall", + "duration": 0.36, + "codes": [ + 127, 201, 191, 774, 700, 532, 1056, 557, 798, 298, 1741, 747, 1662, 1617, 1702, 1527, 368, 1588, 1049, + 1008, 1625, 747, 1576, 728, 1019, 1696, 1765 + ] + }, + { + "word": "package", + "duration": 0.56, + "codes": [ + 935, 584, 1319, 627, 1016, 1491, 1344, 1117, 1526, 1040, 239, 1435, 951, 498, 723, 1180, 535, 789, 1649, + 1637, 78, 465, 1668, 901, 595, 1675, 117, 1009, 1667, 320, 840, 79, 507, 1762, 1508, 1228, 1768, 802, + 1450, 1457, 232, 639 + ] + }, + { + "word": "from", + "duration": 0.19, + "codes": [604, 782, 1682, 872, 1532, 1600, 1036, 1761, 647, 1554, 1371, 653, 1595, 950] + }, + { + "word": "just", + "duration": 0.25, + "codes": [ + 1782, 1670, 317, 786, 1748, 631, 599, 1155, 1364, 1524, 36, 1591, 889, 1535, 541, 440, 1532, 50, 870 + ] + }, + { + "word": "two", + "duration": 0.24, + "codes": [1681, 1510, 673, 799, 805, 1342, 330, 519, 62, 640, 1138, 565, 1552, 1497, 1552, 572, 1715, 1732] + }, + { + "word": "people", + "duration": 0.39, + "codes": [ + 593, 274, 136, 740, 691, 633, 1484, 1061, 1138, 1485, 344, 428, 397, 1562, 645, 917, 1035, 1449, 1669, + 487, 442, 1484, 1329, 1832, 1704, 600, 761, 653, 269 + ] + }, + { + "word": "is", + "duration": 0.16, + "codes": [566, 583, 1755, 646, 1337, 709, 802, 1008, 485, 1583, 652, 10] + }, + { + "word": "pretty", + "duration": 0.32, + "codes": [ + 1818, 1747, 692, 733, 1010, 534, 406, 1697, 1053, 1521, 1355, 1274, 816, 1398, 211, 1218, 817, 1472, + 1703, 686, 13, 822, 445, 1068 + ] + }, + { + "word": "remarkable", + "duration": 0.68, + "codes": [ + 230, 1048, 1705, 355, 706, 1149, 1535, 1787, 1356, 1396, 835, 1583, 486, 1249, 286, 937, 1076, 1150, + 614, 42, 1058, 705, 681, 798, 934, 490, 514, 1399, 572, 1446, 1703, 1346, 1040, 1426, 1304, 664, 171, + 1530, 625, 64, 1708, 1830, 1030, 443, 1509, 1063, 1605, 1785, 721, 1440, 923 + ] + }, + { + "word": "sure", + "duration": 0.36, + "codes": [ + 792, 1780, 923, 1640, 265, 261, 1525, 567, 1491, 1250, 1730, 362, 919, 1766, 543, 1, 333, 113, 970, 252, + 1606, 133, 302, 1810, 1046, 1190, 1675 + ] + }, + { + "word": "i", + "duration": 0.08, + "codes": [123, 439, 1074, 705, 1799, 637] + }, + { + "word": "have", + "duration": 0.16, + "codes": [1509, 599, 518, 1170, 552, 1029, 1267, 864, 419, 143, 1061, 0] + }, + { + "word": "some", + "duration": 0.16, + "codes": [619, 400, 1270, 62, 1370, 1832, 917, 1661, 167, 269, 1366, 1508] + }, + { + "word": "critiques", + "duration": 0.6, + "codes": [ + 559, 584, 1163, 1129, 1313, 1728, 721, 1146, 1093, 577, 928, 27, 630, 1080, 1346, 1337, 320, 1382, 1175, + 1682, 1556, 990, 1683, 860, 1721, 110, 786, 376, 1085, 756, 1523, 234, 1334, 1506, 1578, 659, 612, 1108, + 1466, 1647, 308, 1470, 746, 556, 1061 + ] + }, + { + "word": "about", + "duration": 0.29, + "codes": [ + 26, 1649, 545, 1367, 1263, 1728, 450, 859, 1434, 497, 1220, 1285, 179, 755, 1154, 779, 179, 1229, 1213, + 922, 1774, 1408 + ] + }, + { + "word": "some", + "duration": 0.23, + "codes": [986, 28, 1649, 778, 858, 1519, 1, 18, 26, 1042, 1174, 1309, 1499, 1712, 1692, 1516, 1574] + }, + { + "word": "of", + "duration": 0.07, + "codes": [197, 716, 1039, 1662, 64] + }, + { + "word": "the", + "duration": 0.08, + "codes": [1811, 1568, 569, 886, 1025, 1374] + }, + { + "word": "gameplay", + "duration": 0.48, + "codes": [ + 1269, 1092, 933, 1362, 1762, 1700, 1675, 215, 781, 1086, 461, 838, 1022, 759, 649, 1416, 1004, 551, 909, + 787, 343, 830, 1391, 1040, 1622, 1779, 1360, 1231, 1187, 1317, 76, 997, 989, 978, 737, 189 + ] + }, + { + "word": "aspects", + "duration": 0.56, + "codes": [ + 1423, 797, 1316, 1222, 147, 719, 1347, 386, 1390, 1558, 154, 440, 634, 592, 1097, 1718, 712, 763, 1118, + 1721, 1311, 868, 580, 362, 1435, 868, 247, 221, 886, 1145, 1274, 1284, 457, 1043, 1459, 1818, 62, 599, + 1035, 62, 1649, 778 + ] + }, + { + "word": "but", + "duration": 0.2, + "codes": [780, 1825, 1681, 1007, 861, 710, 702, 939, 1669, 1491, 613, 1739, 823, 1469, 648] + }, + { + "word": "its", + "duration": 0.09, + "codes": [92, 688, 1623, 962, 1670, 527, 599] + }, + { + "word": "still", + "duration": 0.27, + "codes": [ + 636, 10, 1217, 344, 713, 957, 823, 154, 1649, 1286, 508, 214, 1760, 1250, 456, 1352, 1368, 921, 615, 5 + ] + }, + { + "word": "really", + "duration": 0.36, + "codes": [ + 55, 420, 1008, 1659, 27, 644, 1266, 617, 761, 1712, 109, 1465, 1587, 503, 1541, 619, 197, 1019, 817, + 269, 377, 362, 1381, 507, 1488, 4, 1695 + ] + }, + { + "word": "enjoyable", + "duration": 0.49, + "codes": [ + 678, 501, 864, 319, 288, 1472, 1341, 686, 562, 1463, 619, 1563, 471, 911, 730, 1811, 1006, 520, 861, + 1274, 125, 1431, 638, 621, 153, 876, 1770, 437, 987, 1653, 1109, 898, 1285, 80, 593, 1709, 843 + ] + }, + { + "word": "and", + "duration": 0.15, + "codes": [1285, 987, 303, 1037, 730, 1164, 502, 120, 1737, 1655, 1318] + }, + { + "word": "it", + "duration": 0.09, + "codes": [848, 1366, 395, 1601, 1513, 593, 1302] + }, + { + "word": "looks", + "duration": 0.27, + "codes": [ + 1281, 1266, 1755, 572, 248, 1751, 1257, 695, 1380, 457, 659, 585, 1315, 1105, 1776, 736, 24, 736, 654, + 1027 + ] + }, + { + "word": "lovely", + "duration": 0.56, + "codes": [ + 634, 596, 1766, 1556, 1306, 1285, 1481, 1721, 1123, 438, 1246, 1251, 795, 659, 1381, 1658, 217, 1772, + 562, 952, 107, 1129, 1112, 467, 550, 1079, 840, 1615, 1469, 1380, 168, 917, 836, 1827, 437, 583, 67, + 595, 1087, 1646, 1493, 1677 + ] + } + ], + "language": "en" +} diff --git a/examples/outetts/speakers/en_male_2.json b/examples/outetts/speakers/en_male_2.json new file mode 100644 index 000000000..6d69ddddf --- /dev/null +++ b/examples/outetts/speakers/en_male_2.json @@ -0,0 +1,246 @@ +{ + "text": "Difficult action game. While you have your melee attacks, the majority of combat takes place using your guns. From long rifles to shotguns and my trusty magnum. There is an emphasis on cooperative play.", + "words": [ + { + "word": "difficult", + "duration": 0.44, + "codes": [ + 1567, 714, 1238, 243, 810, 104, 1826, 1081, 865, 1627, 1176, 1535, 1238, 1832, 346, 647, 36, 631, 1750, + 1273, 1806, 861, 1429, 1332, 1633, 326, 1530, 1661, 503, 1597, 326, 733, 1715 + ] + }, + { + "word": "action", + "duration": 0.48, + "codes": [ + 1319, 1792, 1238, 939, 1765, 1282, 847, 621, 1147, 595, 514, 1180, 1235, 1438, 1057, 1472, 1489, 1822, + 1092, 1178, 796, 1511, 290, 1018, 687, 290, 1403, 253, 1755, 1107, 780, 1376, 341, 1614, 176, 317 + ] + }, + { + "word": "game", + "duration": 0.4, + "codes": [ + 1617, 703, 1617, 662, 101, 1530, 1678, 782, 1829, 146, 1175, 1771, 422, 370, 1079, 1069, 282, 996, 1134, + 814, 430, 296, 1534, 1742, 947, 1285, 1710, 1465, 1152, 640 + ] + }, + { + "word": "while", + "duration": 0.32, + "codes": [ + 1506, 1765, 1451, 1590, 1637, 1390, 727, 1010, 766, 1007, 1585, 1341, 48, 600, 1715, 1226, 887, 840, + 310, 362, 972, 1765, 67, 1572 + ] + }, + { + "word": "you", + "duration": 0.11, + "codes": [1581, 527, 1513, 1267, 774, 108, 29, 1366] + }, + { + "word": "have", + "duration": 0.21, + "codes": [1591, 434, 949, 1407, 117, 907, 874, 1105, 1132, 498, 1161, 13, 742, 1528, 1653, 1710] + }, + { + "word": "your", + "duration": 0.15, + "codes": [1337, 990, 234, 831, 4, 616, 1820, 1083, 1710, 1564, 529] + }, + { + "word": "melee", + "duration": 0.33, + "codes": [ + 75, 682, 311, 455, 786, 834, 804, 292, 492, 947, 474, 1720, 142, 1628, 614, 758, 789, 117, 1372, 402, + 896, 1537, 128, 927, 1108 + ] + }, + { + "word": "attacks", + "duration": 0.56, + "codes": [ + 1218, 218, 907, 1005, 1793, 1520, 1694, 458, 1428, 276, 315, 639, 231, 1455, 347, 1447, 1361, 370, 1224, + 498, 1509, 499, 588, 1188, 993, 775, 1004, 1591, 716, 999, 1548, 1464, 1720, 681, 1043, 639, 639, 543, + 592, 546, 356, 1217 + ] + }, + { + "word": "the", + "duration": 0.24, + "codes": [654, 335, 1524, 350, 493, 1644, 1724, 591, 799, 1176, 1023, 694, 1585, 806, 1344, 596, 503, 1557] + }, + { + "word": "majority", + "duration": 0.52, + "codes": [ + 1416, 240, 62, 443, 1660, 549, 160, 1729, 567, 32, 1703, 1553, 1337, 45, 631, 335, 184, 395, 576, 1068, + 759, 548, 587, 427, 1455, 775, 673, 983, 1791, 647, 398, 1595, 1664, 592, 1310, 356, 440, 495, 812 + ] + }, + { + "word": "of", + "duration": 0.11, + "codes": [1415, 455, 1452, 821, 1300, 1048, 1485, 1626] + }, + { + "word": "combat", + "duration": 0.44, + "codes": [ + 1588, 1649, 861, 727, 1671, 609, 847, 1004, 0, 1535, 1801, 1100, 1494, 1178, 1224, 623, 1620, 742, 1507, + 1794, 932, 1634, 1285, 1192, 764, 1245, 587, 521, 55, 1230, 1532, 1469, 1758 + ] + }, + { + "word": "takes", + "duration": 0.32, + "codes": [ + 1408, 681, 1385, 1737, 858, 46, 1649, 1091, 1320, 1015, 756, 269, 787, 230, 1777, 911, 91, 1152, 1412, + 1710, 232, 46, 1337, 782 + ] + }, + { + "word": "place", + "duration": 0.4, + "codes": [ + 1496, 1821, 1681, 1725, 1324, 1088, 1668, 1568, 1801, 1275, 1776, 741, 621, 1749, 901, 829, 640, 940, + 1464, 541, 1008, 1747, 631, 1653, 7, 202, 416, 198, 1337, 202 + ] + }, + { + "word": "using", + "duration": 0.4, + "codes": [ + 290, 1459, 778, 1819, 1689, 175, 1569, 182, 1512, 361, 778, 716, 231, 1446, 1240, 1763, 1734, 189, 631, + 654, 440, 1280, 1439, 986, 753, 1387, 805, 338, 1606, 1679 + ] + }, + { + "word": "your", + "duration": 0.16, + "codes": [1520, 1250, 1440, 47, 1374, 1731, 963, 1368, 1426, 1506, 1545, 1730] + }, + { + "word": "guns", + "duration": 0.48, + "codes": [ + 450, 1553, 1570, 651, 1648, 899, 971, 1573, 1684, 28, 107, 1744, 236, 1067, 1312, 412, 1110, 1112, 1014, + 1241, 1726, 875, 1830, 1792, 1219, 1565, 1534, 751, 1726, 1598, 1600, 320, 26, 1018, 1337, 1270 + ] + }, + { + "word": "from", + "duration": 0.32, + "codes": [ + 372, 1238, 6, 1436, 1654, 1637, 799, 1244, 1525, 1285, 1390, 1657, 1790, 1061, 634, 493, 972, 977, 1036, + 158, 540, 749, 317, 1552 + ] + }, + { + "word": "long", + "duration": 0.24, + "codes": [875, 529, 710, 63, 1822, 158, 1647, 809, 1022, 1155, 774, 1118, 63, 669, 1069, 1513, 1469, 47] + }, + { + "word": "rifles", + "duration": 0.59, + "codes": [ + 1007, 1650, 1015, 954, 843, 1075, 803, 1078, 839, 1382, 424, 1674, 800, 657, 1487, 382, 95, 41, 979, + 1619, 1485, 362, 1646, 1400, 333, 1761, 1827, 806, 389, 1562, 789, 481, 1606, 647, 1597, 1340, 593, + 1627, 1401, 1791, 202, 368, 213, 356 + ] + }, + { + "word": "to", + "duration": 0.12, + "codes": [1485, 6, 1817, 1623, 440, 631, 1366, 547, 1479] + }, + { + "word": "shotguns", + "duration": 0.64, + "codes": [ + 987, 62, 154, 790, 290, 1161, 430, 543, 1332, 24, 1270, 1700, 412, 462, 1629, 595, 260, 1819, 1435, + 1828, 1752, 1380, 1803, 1465, 1321, 1300, 1330, 1459, 547, 913, 719, 303, 1811, 595, 716, 344, 909, 784, + 203, 1269, 1394, 734, 415, 1633, 233, 202, 1649, 409 + ] + }, + { + "word": "and", + "duration": 0.36, + "codes": [ + 858, 416, 328, 1793, 1390, 957, 1262, 1601, 1628, 1346, 1466, 764, 621, 1459, 844, 158, 626, 827, 1466, + 1110, 18, 1540, 1515, 615, 593, 1627, 1145 + ] + }, + { + "word": "my", + "duration": 0.21, + "codes": [1445, 481, 1035, 1593, 1718, 1464, 1808, 644, 1344, 1381, 466, 936, 123, 604, 1545, 1340] + }, + { + "word": "trusty", + "duration": 0.44, + "codes": [ + 1583, 572, 1749, 1114, 1462, 308, 1332, 471, 863, 1410, 993, 1524, 1100, 1758, 958, 1453, 768, 1638, + 1625, 1002, 117, 1364, 213, 631, 202, 454, 957, 820, 1748, 822, 1335, 295, 1039 + ] + }, + { + "word": "magnum", + "duration": 0.48, + "codes": [ + 1285, 1679, 1626, 1257, 694, 1339, 678, 1815, 271, 1656, 535, 607, 383, 550, 1372, 769, 815, 1763, 1525, + 513, 1637, 1782, 1439, 539, 867, 297, 1476, 1620, 65, 1361, 804, 577, 1700, 91, 1583, 1745 + ] + }, + { + "word": "there", + "duration": 0.36, + "codes": [ + 761, 1532, 32, 1828, 950, 761, 1037, 1293, 1023, 1510, 1657, 1821, 1284, 603, 1669, 634, 757, 940, 782, + 955, 18, 296, 1636, 455, 108, 1629, 333 + ] + }, + { + "word": "is", + "duration": 0.23, + "codes": [1571, 1517, 399, 863, 1649, 302, 103, 213, 1409, 1733, 356, 736, 104, 184, 1778, 198, 298] + }, + { + "word": "an", + "duration": 0.12, + "codes": [654, 276, 1462, 104, 1388, 815, 736, 889, 1069] + }, + { + "word": "emphasis", + "duration": 0.51, + "codes": [ + 695, 678, 319, 262, 1396, 583, 473, 1728, 1706, 865, 1616, 1270, 1078, 1487, 911, 319, 1586, 335, 519, + 507, 356, 26, 1, 937, 1833, 335, 909, 990, 132, 616, 1814, 1, 1563, 38, 276, 368, 7, 335 + ] + }, + { + "word": "on", + "duration": 0.24, + "codes": [183, 844, 1477, 1720, 1751, 1302, 1743, 1435, 1789, 997, 644, 1828, 1253, 688, 1672, 473, 835, 26] + }, + { + "word": "cooperative", + "duration": 0.61, + "codes": [ + 856, 22, 1425, 1461, 901, 357, 1174, 1175, 1691, 1661, 626, 1776, 1286, 429, 1702, 757, 1702, 730, 1015, + 86, 321, 1628, 763, 1792, 1760, 1499, 1053, 1825, 1661, 1364, 1553, 1609, 409, 315, 166, 1027, 1295, + 927, 1231, 113, 1513, 1747, 1753, 1617, 1614, 274 + ] + }, + { + "word": "play", + "duration": 0.32, + "codes": [ + 1669, 1764, 1298, 1411, 1487, 454, 1035, 1814, 1295, 973, 787, 834, 798, 617, 1720, 1331, 768, 252, + 1757, 318, 695, 884, 775, 631 + ] + } + ], + "language": "en" +} diff --git a/examples/outetts/speakers/en_male_3.json b/examples/outetts/speakers/en_male_3.json new file mode 100644 index 000000000..7506e166c --- /dev/null +++ b/examples/outetts/speakers/en_male_3.json @@ -0,0 +1,185 @@ +{ + "text": "Uhm, your way of celebrating your mum, but also I think, you know, spreading awareness, which is so important. And so, you know, I think you've taken.", + "words": [ + { + "word": "uhm", + "duration": 0.31, + "codes": [ + 119, 548, 926, 164, 142, 214, 979, 142, 1497, 498, 616, 1160, 359, 1812, 571, 1336, 498, 456, 1741, + 1708, 1371, 1646, 878 + ] + }, + { + "word": "your", + "duration": 0.49, + "codes": [ + 615, 1470, 853, 1607, 685, 1640, 1717, 1626, 1052, 1467, 1536, 845, 257, 180, 1657, 1586, 659, 54, 666, + 235, 1747, 1405, 935, 714, 1665, 1618, 340, 286, 211, 1485, 1018, 83, 280, 1650, 1278, 569, 439 + ] + }, + { + "word": "way", + "duration": 0.2, + "codes": [1609, 1701, 395, 748, 1696, 1001, 280, 1517, 306, 803, 499, 373, 947, 179, 1449] + }, + { + "word": "of", + "duration": 0.08, + "codes": [232, 472, 796, 825, 1299, 1225] + }, + { + "word": "celebrating", + "duration": 0.84, + "codes": [ + 1650, 821, 117, 1436, 330, 687, 979, 407, 886, 596, 430, 1139, 253, 69, 777, 370, 1072, 755, 81, 222, + 1804, 1216, 1506, 945, 1013, 520, 179, 1272, 683, 1812, 564, 1570, 1664, 959, 791, 306, 413, 966, 252, + 326, 349, 492, 570, 948, 789, 24, 1129, 1407, 781, 99, 1633, 522, 63, 190, 753, 683, 368, 644, 1679, + 1168, 1295, 788, 1793 + ] + }, + { + "word": "your", + "duration": 0.35, + "codes": [ + 1350, 4, 515, 1520, 317, 1573, 1383, 615, 815, 896, 1157, 1684, 392, 641, 733, 1538, 1725, 530, 154, + 1785, 297, 463, 1743, 407, 623, 1371 + ] + }, + { + "word": "mum", + "duration": 0.21, + "codes": [1009, 519, 211, 98, 1101, 364, 106, 429, 827, 1143, 367, 221, 211, 1709, 1024, 515] + }, + { + "word": "but", + "duration": 0.16, + "codes": [183, 1069, 539, 1796, 1151, 1676, 525, 1127, 366, 618, 1695, 127] + }, + { + "word": "also", + "duration": 0.28, + "codes": [ + 850, 393, 413, 1673, 868, 291, 1673, 1344, 462, 385, 1073, 1398, 906, 592, 519, 448, 592, 348, 1258, + 711, 115 + ] + }, + { + "word": "i", + "duration": 0.16, + "codes": [539, 1519, 390, 389, 441, 413, 715, 390, 410, 294, 726, 269] + }, + { + "word": "think", + "duration": 0.32, + "codes": [ + 1791, 1781, 1619, 1393, 1649, 1741, 1535, 1703, 1721, 1134, 1435, 112, 1660, 232, 98, 654, 593, 485, + 593, 368, 530, 1584, 379, 338 + ] + }, + { + "word": "you", + "duration": 0.4, + "codes": [ + 379, 387, 1767, 543, 1410, 546, 1117, 614, 65, 793, 414, 284, 396, 538, 1039, 226, 1277, 1632, 1152, + 1796, 888, 953, 888, 464, 1762, 1446, 10, 876, 184, 1009 + ] + }, + { + "word": "know", + "duration": 0.11, + "codes": [473, 623, 383, 352, 1814, 147, 1289, 164] + }, + { + "word": "spreading", + "duration": 0.44, + "codes": [ + 482, 796, 134, 1387, 232, 1310, 308, 330, 400, 162, 535, 197, 1039, 274, 1442, 1701, 1138, 723, 39, 280, + 410, 412, 160, 645, 225, 1519, 581, 815, 415, 835, 884, 312, 1779 + ] + }, + { + "word": "awareness", + "duration": 0.48, + "codes": [ + 173, 988, 93, 1727, 374, 1442, 312, 436, 482, 1752, 373, 1727, 173, 1706, 522, 909, 543, 590, 773, 210, + 1057, 215, 1020, 623, 1379, 147, 1427, 1182, 1816, 1221, 877, 1626, 178, 175, 780, 985 + ] + }, + { + "word": "which", + "duration": 0.19, + "codes": [1214, 1031, 698, 1820, 547, 211, 605, 1152, 442, 837, 1774, 232, 522, 876] + }, + { + "word": "is", + "duration": 0.16, + "codes": [1649, 581, 211, 884, 110, 1530, 1217, 1409, 4, 1027, 1624, 849] + }, + { + "word": "so", + "duration": 0.53, + "codes": [ + 1337, 1034, 656, 1410, 979, 790, 1436, 471, 1403, 750, 782, 1697, 1748, 1380, 288, 668, 17, 585, 866, + 457, 975, 534, 401, 1570, 401, 1474, 1762, 1352, 1545, 178, 362, 356, 206, 276, 263, 356, 253, 492, 441, + 1101 + ] + }, + { + "word": "important", + "duration": 0.64, + "codes": [ + 474, 211, 1797, 626, 1396, 571, 593, 1160, 530, 1624, 317, 1568, 835, 1570, 1327, 892, 1208, 1463, 1577, + 895, 1378, 897, 1781, 82, 539, 1748, 238, 416, 390, 1732, 306, 312, 1832, 445, 338, 418, 60, 1502, 1808, + 503, 1334, 1581, 1554, 1781, 506, 1519, 1681, 269 + ] + }, + { + "word": "and", + "duration": 0.24, + "codes": [989, 1121, 932, 1393, 1103, 1164, 1185, 32, 1459, 671, 874, 872, 1056, 539, 1626, 1091, 539, 1569] + }, + { + "word": "so", + "duration": 0.16, + "codes": [1620, 1594, 10, 599, 746, 172, 1175, 980, 1317, 1402, 115, 148] + }, + { + "word": "you", + "duration": 0.63, + "codes": [ + 1094, 248, 1384, 393, 611, 522, 425, 947, 326, 930, 503, 1215, 1813, 99, 284, 1070, 447, 1168, 732, + 1120, 1749, 900, 743, 1684, 1360, 1225, 490, 743, 1268, 673, 793, 490, 620, 1279, 469, 649, 717, 986, + 669, 174, 555, 284, 139, 1479, 1463, 79, 214 + ] + }, + { + "word": "know", + "duration": 0.08, + "codes": [368, 471, 112, 1077, 299, 472] + }, + { + "word": "i", + "duration": 0.08, + "codes": [1170, 210, 1526, 499, 615, 1287] + }, + { + "word": "think", + "duration": 0.17, + "codes": [1654, 1709, 1057, 1817, 1012, 1076, 1470, 768, 539, 778, 1698, 1507, 888] + }, + { + "word": "youve", + "duration": 0.16, + "codes": [1797, 62, 605, 1800, 576, 1444, 434, 952, 485, 512, 1751, 691] + }, + { + "word": "taken", + "duration": 0.39, + "codes": [ + 1806, 822, 537, 700, 999, 1462, 791, 159, 1477, 558, 291, 1797, 901, 1336, 599, 1408, 1555, 707, 1677, + 919, 1411, 1381, 512, 525, 1583, 847, 78, 1596, 810 + ] + } + ], + "language": "en" +} diff --git a/examples/outetts/speakers/en_male_4.json b/examples/outetts/speakers/en_male_4.json new file mode 100644 index 000000000..c3333099c --- /dev/null +++ b/examples/outetts/speakers/en_male_4.json @@ -0,0 +1,254 @@ +{ + "text": "Corrosion by running water opens a window to a subterranean world. Jijin Cave covers an area of more than 700,000 square meters, which is equivalent to 100 football fields.", + "words": [ + { + "word": "corrosion", + "duration": 0.56, + "codes": [ + 1231, 1183, 393, 1069, 773, 1394, 217, 200, 211, 783, 1616, 814, 1680, 1381, 1372, 648, 211, 433, 57, + 679, 375, 1496, 1149, 328, 289, 748, 300, 1692, 1286, 318, 1633, 463, 596, 1436, 454, 1417, 1414, 593, + 848, 1422, 738, 1412 + ] + }, + { + "word": "by", + "duration": 0.31, + "codes": [ + 1526, 1674, 1411, 1048, 1829, 1473, 804, 1355, 1453, 1396, 1390, 78, 1722, 1113, 1206, 1063, 703, 609, + 1129, 1373, 983, 271, 668 + ] + }, + { + "word": "running", + "duration": 0.33, + "codes": [ + 514, 239, 304, 1008, 673, 850, 1506, 1315, 768, 280, 1403, 206, 112, 1336, 1436, 1801, 52, 46, 200, 503, + 1773, 1305, 1663, 348, 312 + ] + }, + { + "word": "water", + "duration": 0.44, + "codes": [ + 742, 1782, 1176, 1352, 1597, 1365, 662, 1614, 1281, 90, 1692, 843, 397, 891, 1219, 1347, 644, 1415, + 1816, 1581, 1153, 1702, 1139, 357, 175, 780, 1372, 1361, 1063, 876, 1160, 1795, 1013 + ] + }, + { + "word": "opens", + "duration": 0.71, + "codes": [ + 1404, 1680, 1745, 1125, 1357, 1774, 1126, 1392, 1314, 758, 5, 1738, 516, 1199, 1393, 1758, 1330, 1212, + 550, 1488, 384, 439, 1712, 587, 389, 1768, 32, 59, 795, 1319, 1331, 349, 1427, 1124, 1278, 753, 192, + 539, 1818, 1228, 1732, 439, 1472, 1215, 473, 211, 1800, 790, 143, 1376, 202, 6, 430 + ] + }, + { + "word": "a", + "duration": 0.13, + "codes": [996, 1245, 1790, 1010, 669, 1741, 1431, 997, 225, 1005] + }, + { + "word": "window", + "duration": 0.55, + "codes": [ + 239, 92, 539, 1446, 1786, 909, 633, 1727, 150, 1244, 385, 1781, 1535, 55, 539, 1240, 1310, 966, 640, + 1590, 230, 455, 1597, 755, 97, 200, 719, 144, 952, 1546, 1605, 1125, 1222, 892, 1400, 1368, 1077, 1393, + 1427, 1058, 1738 + ] + }, + { + "word": "to", + "duration": 0.2, + "codes": [1830, 653, 1638, 1227, 355, 852, 566, 559, 1447, 206, 1300, 989, 1823, 1052, 1456] + }, + { + "word": "a", + "duration": 0.09, + "codes": [703, 1612, 810, 657, 27, 102, 1261] + }, + { + "word": "subterranean", + "duration": 0.91, + "codes": [ + 1315, 882, 276, 183, 769, 452, 493, 471, 844, 454, 596, 870, 889, 592, 1018, 596, 328, 1358, 626, 179, + 95, 214, 1536, 1827, 1239, 1775, 32, 850, 1145, 1732, 1215, 986, 187, 1360, 1498, 206, 1139, 1769, 874, + 148, 1104, 877, 487, 1506, 414, 359, 131, 569, 1039, 876, 637, 423, 63, 844, 316, 264, 183, 1401, 1734, + 1489, 1013, 263, 1485, 1488, 1707, 363, 987, 1741 + ] + }, + { + "word": "world", + "duration": 0.45, + "codes": [ + 1813, 738, 343, 859, 1006, 406, 893, 716, 1762, 909, 973, 1684, 834, 1078, 1813, 1013, 1320, 1435, 1663, + 1282, 361, 1584, 19, 1454, 1708, 1489, 1454, 1647, 1505, 1312, 1225, 1433, 1741, 581 + ] + }, + { + "word": "jijin", + "duration": 1.05, + "codes": [ + 1382, 927, 1426, 744, 1735, 1525, 1294, 1699, 1684, 1414, 1543, 1025, 1478, 1543, 1301, 1428, 1655, + 1433, 1479, 1248, 805, 1273, 17, 862, 899, 1440, 1293, 1348, 1684, 1010, 1231, 1348, 1007, 1231, 957, + 1293, 1236, 869, 810, 585, 964, 954, 1601, 670, 861, 1601, 729, 1479, 1348, 870, 434, 117, 614, 398, + 395, 764, 343, 200, 863, 627, 91, 691, 1727, 943, 769, 502, 599, 1781, 367, 968, 217, 335, 1674, 500, + 1121, 150, 1345, 1742, 850 + ] + }, + { + "word": "cave", + "duration": 0.64, + "codes": [ + 1799, 1084, 1488, 1034, 987, 1491, 1486, 850, 686, 1352, 944, 596, 1671, 1451, 1393, 1404, 829, 512, + 402, 801, 1370, 822, 518, 1713, 328, 877, 333, 1346, 529, 938, 1285, 1708, 1528, 1490, 416, 1639, 1014, + 231, 1581, 1666, 1432, 1406, 1496, 1422, 1811, 1012, 1399, 1256 + ] + }, + { + "word": "covers", + "duration": 0.52, + "codes": [ + 535, 1344, 958, 1259, 1285, 1305, 1465, 727, 1390, 1767, 958, 658, 1700, 1651, 910, 570, 146, 222, 825, + 1098, 1789, 314, 1715, 506, 1710, 532, 688, 1563, 588, 117, 1397, 58, 129, 214, 1695, 687, 473, 778, 18 + ] + }, + { + "word": "an", + "duration": 0.13, + "codes": [176, 1002, 466, 827, 1138, 297, 399, 1467, 1490, 1031] + }, + { + "word": "area", + "duration": 0.39, + "codes": [ + 1295, 1811, 1215, 761, 793, 1705, 1476, 632, 1501, 262, 173, 225, 434, 599, 146, 280, 1224, 821, 1678, + 280, 1801, 350, 429, 231, 402, 686, 212, 522, 1271 + ] + }, + { + "word": "of", + "duration": 0.49, + "codes": [ + 705, 535, 438, 261, 929, 355, 1303, 1412, 1736, 1098, 1820, 1801, 1170, 1785, 1393, 1104, 1152, 1130, + 1448, 1096, 1153, 1433, 1361, 19, 1380, 1752, 950, 131, 465, 1613, 635, 860, 277, 618, 1712, 639, 1610 + ] + }, + { + "word": "more", + "duration": 0.23, + "codes": [1779, 820, 302, 1564, 1688, 1401, 74, 395, 268, 683, 644, 397, 1482, 397, 834, 629, 332] + }, + { + "word": "than", + "duration": 0.16, + "codes": [501, 1604, 607, 1755, 549, 222, 1773, 300, 835, 556, 593, 863] + }, + { + "word": "seven", + "duration": 0.44, + "codes": [ + 617, 1240, 1449, 1387, 202, 858, 796, 844, 1519, 471, 1166, 796, 543, 0, 1124, 147, 768, 836, 324, 361, + 1350, 1824, 763, 392, 1796, 684, 1698, 1091, 1790, 1371, 1670, 662, 1516 + ] + }, + { + "word": "hundred", + "duration": 0.48, + "codes": [ + 1810, 1640, 1741, 1689, 1540, 1355, 1723, 1282, 1171, 1794, 1651, 944, 248, 1281, 535, 616, 1510, 203, + 1550, 678, 64, 123, 551, 716, 1667, 1614, 757, 1631, 617, 76, 838, 653, 1769, 505, 1803, 1139 + ] + }, + { + "word": "zero", + "duration": 0.61, + "codes": [ + 1769, 1752, 1400, 1718, 1040, 1175, 1724, 1832, 525, 1040, 352, 1034, 411, 570, 825, 271, 624, 87, 461, + 1282, 706, 877, 782, 1433, 1627, 782, 1606, 778, 1436, 1817, 1256, 617, 1787, 999, 1048, 1720, 1277, + 1357, 1808, 1731, 1756, 328, 1518, 1061, 400, 416 + ] + }, + { + "word": "square", + "duration": 0.33, + "codes": [ + 463, 1166, 640, 1292, 659, 1277, 1429, 1285, 921, 664, 148, 320, 635, 1241, 1497, 147, 869, 1013, 330, + 1372, 1427, 1212, 1427, 1492, 878 + ] + }, + { + "word": "meters", + "duration": 0.44, + "codes": [ + 663, 1185, 1084, 1463, 1660, 659, 682, 290, 1267, 667, 1312, 428, 986, 1625, 823, 1734, 999, 889, 782, + 809, 985, 935, 243, 1281, 1449, 1476, 1109, 1638, 1002, 1542, 308, 67, 927 + ] + }, + { + "word": "which", + "duration": 1.84, + "codes": [ + 581, 1766, 669, 1573, 824, 707, 567, 551, 1038, 68, 559, 1154, 445, 680, 563, 235, 516, 378, 275, 967, + 68, 513, 378, 94, 516, 526, 68, 1154, 223, 566, 1137, 167, 717, 378, 241, 967, 223, 445, 1137, 167, 513, + 526, 241, 717, 223, 275, 1137, 187, 516, 316, 445, 1003, 551, 1601, 1741, 257, 693, 995, 5, 735, 669, + 729, 976, 799, 171, 1589, 484, 1114, 1510, 5, 1322, 633, 638, 669, 562, 1114, 820, 1070, 995, 1080, 832, + 995, 458, 957, 1634, 1157, 1003, 676, 486, 1305, 169, 735, 1390, 470, 795, 795, 548, 1652, 169, 751, + 1033, 538, 1251, 651, 486, 1118, 624, 681, 751, 845, 735, 939, 651, 1033, 1080, 1652, 1600, 1157, 981, + 933, 655, 838, 1076, 171, 459, 548, 1327, 427, 1406, 858, 87, 211, 1468, 533, 1603, 96, 844, 520 + ] + }, + { + "word": "is", + "duration": 0.09, + "codes": [152, 92, 47, 132, 1452, 432, 631] + }, + { + "word": "equivalent", + "duration": 0.55, + "codes": [ + 1473, 364, 522, 783, 1508, 1543, 1023, 964, 1285, 1260, 1122, 1663, 495, 1710, 1392, 557, 276, 389, 48, + 1732, 1564, 1538, 1370, 105, 1481, 443, 1533, 1770, 1019, 1041, 1102, 1494, 1247, 1388, 1637, 1736, 733, + 1735, 1381, 1364, 656 + ] + }, + { + "word": "to", + "duration": 0.24, + "codes": [1748, 1082, 901, 1568, 1329, 1231, 666, 1822, 486, 1305, 1813, 822, 358, 368, 1823, 547, 539, 775] + }, + { + "word": "one", + "duration": 0.36, + "codes": [ + 193, 126, 1717, 338, 1807, 121, 807, 758, 59, 395, 1321, 909, 836, 690, 1741, 339, 443, 623, 1542, 1230, + 1357, 485, 1625, 455, 317, 1826, 1282 + ] + }, + { + "word": "hundred", + "duration": 0.44, + "codes": [ + 1365, 1502, 1126, 1709, 1053, 1125, 1466, 1041, 1117, 1459, 1034, 955, 291, 797, 1177, 1635, 1599, 197, + 312, 1564, 636, 1520, 1695, 1320, 1765, 959, 142, 1496, 810, 926, 325, 240, 1671 + ] + }, + { + "word": "football", + "duration": 0.41, + "codes": [ + 1379, 1807, 1313, 1681, 1788, 1355, 1825, 1088, 1337, 941, 798, 1224, 806, 1450, 850, 240, 1011, 627, + 1033, 1779, 1532, 599, 1490, 1014, 1311, 1384, 1574, 354, 264, 1758, 738 + ] + }, + { + "word": "fields", + "duration": 0.4, + "codes": [ + 1439, 895, 1748, 715, 1535, 1034, 919, 770, 1035, 36, 784, 217, 380, 121, 1257, 384, 1141, 385, 1258, + 1312, 1732, 1820, 1605, 1724, 1812, 318, 858, 28, 858, 1342 + ] + } + ], + "language": "en" +} diff --git a/examples/outetts/speakers/gordon_ramsay.json b/examples/outetts/speakers/gordon_ramsay.json new file mode 100644 index 000000000..f20208776 --- /dev/null +++ b/examples/outetts/speakers/gordon_ramsay.json @@ -0,0 +1,1229 @@ +{ + "text": " because it made me ignite. I could see this incredible sort of beginning of something unique. And I think when we look at individuals, figureheads in our lives, and we want to aspire to become them.", + "words": [ + { + "word": "because", + "duration": 0.63, + "codes": [ + 257, + 704, + 765, + 1011, + 692, + 94, + 544, + 1163, + 400, + 806, + 769, + 455, + 183, + 1699, + 5, + 1334, + 1713, + 1212, + 375, + 1797, + 901, + 961, + 1400, + 88, + 1825, + 212, + 1790, + 248, + 1700, + 705, + 1056, + 1502, + 422, + 1763, + 142, + 1710, + 376, + 521, + 1655, + 57, + 416, + 22, + 273, + 6, + 1679, + 323, + 440 + ] + }, + { + "word": "it", + "duration": 0.28, + "codes": [ + 1400, + 1166, + 1065, + 933, + 509, + 313, + 544, + 258, + 669, + 154, + 1245, + 581, + 102, + 1726, + 11, + 39, + 1492, + 1217, + 1456, + 976, + 311 + ] + }, + { + "word": "made", + "duration": 0.41, + "codes": [ + 351, + 16, + 23, + 1346, + 1775, + 1784, + 455, + 1553, + 382, + 742, + 719, + 932, + 76, + 1303, + 215, + 318, + 303, + 769, + 937, + 296, + 697, + 1642, + 407, + 1537, + 296, + 1456, + 1712, + 56, + 900, + 1444, + 954 + ] + }, + { + "word": "me", + "duration": 0.15, + "codes": [ + 363, + 806, + 193, + 647, + 1120, + 1396, + 236, + 914, + 806, + 230, + 1621 + ] + }, + { + "word": "ignite", + "duration": 1.44, + "codes": [ + 113, + 527, + 1407, + 295, + 1039, + 13, + 335, + 795, + 57, + 510, + 649, + 1427, + 46, + 1649, + 1730, + 1384, + 70, + 1775, + 1710, + 1621, + 1775, + 166, + 1815, + 1756, + 1349, + 1092, + 415, + 1600, + 1629, + 867, + 1251, + 725, + 884, + 1092, + 1485, + 1512, + 331, + 257, + 3, + 417, + 1011, + 54, + 766, + 696, + 360, + 1284, + 360, + 766, + 387, + 287, + 325, + 696, + 509, + 325, + 391, + 1244, + 692, + 606, + 1011, + 765, + 257, + 274, + 360, + 1244, + 285, + 288, + 1728, + 1113, + 62, + 321, + 11, + 1025, + 1778, + 781, + 970, + 1543, + 1806, + 1509, + 1422, + 770, + 1145, + 98, + 510, + 93, + 656, + 106, + 738, + 1483, + 1356, + 1042, + 969, + 127, + 557, + 1361, + 658, + 1645, + 1002, + 582, + 1321, + 1730, + 1319, + 31, + 285, + 509, + 778, + 50, + 1270, + 350 + ] + }, + { + "word": "i", + "duration": 0.36, + "codes": [ + 1436, + 533, + 581, + 1637, + 746, + 872, + 392, + 919, + 578, + 561, + 351, + 241, + 1474, + 1285, + 805, + 1242, + 288, + 669, + 360, + 23, + 343, + 1438, + 1229, + 1113, + 179, + 231, + 1620 + ] + }, + { + "word": "could", + "duration": 0.17, + "codes": [ + 1468, + 1510, + 1266, + 689, + 1283, + 1668, + 1041, + 695, + 1825, + 925, + 1750, + 778, + 554 + ] + }, + { + "word": "see", + "duration": 0.19, + "codes": [ + 533, + 350, + 1238, + 507, + 10, + 750, + 383, + 1324, + 36, + 1415, + 323, + 789, + 605, + 127 + ] + }, + { + "word": "this", + "duration": 0.17, + "codes": [ + 678, + 275, + 562, + 1393, + 1407, + 665, + 1804, + 1004, + 4, + 1542, + 812, + 556, + 344 + ] + }, + { + "word": "incredible", + "duration": 1.19, + "codes": [ + 24, + 400, + 1094, + 1642, + 391, + 465, + 445, + 765, + 1011, + 265, + 1244, + 766, + 487, + 325, + 54, + 704, + 274, + 287, + 862, + 765, + 257, + 325, + 360, + 1244, + 899, + 287, + 1284, + 765, + 765, + 1016, + 287, + 686, + 766, + 606, + 325, + 351, + 351, + 141, + 1471, + 440, + 254, + 395, + 1552, + 1488, + 110, + 923, + 1341, + 1721, + 12, + 1114, + 1259, + 1110, + 573, + 1229, + 1457, + 375, + 595, + 1160, + 1300, + 1392, + 955, + 388, + 84, + 705, + 993, + 1093, + 859, + 1354, + 545, + 1119, + 1823, + 120, + 343, + 830, + 1239, + 476, + 1381, + 1341, + 1224, + 894, + 1581, + 754, + 1540, + 1467, + 946, + 1440, + 250, + 988, + 1619 + ] + }, + { + "word": "sort", + "duration": 0.92, + "codes": [ + 1266, + 893, + 1236, + 1246, + 1315, + 505, + 131, + 1236, + 1255, + 692, + 692, + 585, + 75, + 1288, + 1064, + 131, + 1064, + 1067, + 351, + 1031, + 1114, + 950, + 1031, + 567, + 1104, + 1130, + 387, + 872, + 1205, + 1707, + 872, + 274, + 1059, + 1059, + 795, + 1117, + 724, + 762, + 1007, + 391, + 766, + 710, + 54, + 1244, + 445, + 313, + 930, + 333, + 778, + 596, + 1287, + 645, + 430, + 1061, + 543, + 656, + 919, + 596, + 1229, + 965, + 847, + 553, + 1579, + 352, + 1007, + 738, + 1466, + 449, + 607 + ] + }, + { + "word": "of", + "duration": 0.12, + "codes": [ + 146, + 752, + 1502, + 972, + 1437, + 832, + 671, + 533, + 1694 + ] + }, + { + "word": "beginning", + "duration": 1.48, + "codes": [ + 1653, + 867, + 1242, + 1617, + 1085, + 1016, + 943, + 1375, + 979, + 800, + 1701, + 985, + 1411, + 1496, + 715, + 1565, + 1519, + 1175, + 1749, + 1370, + 1476, + 1762, + 1543, + 490, + 54, + 281, + 311, + 1244, + 696, + 765, + 1284, + 360, + 975, + 387, + 765, + 1011, + 606, + 544, + 862, + 445, + 704, + 287, + 391, + 1752, + 933, + 1639, + 993, + 751, + 1091, + 1130, + 1418, + 347, + 1279, + 1379, + 1031, + 1652, + 1170, + 1448, + 1033, + 1130, + 265, + 606, + 975, + 360, + 1016, + 986, + 401, + 1016, + 606, + 704, + 692, + 287, + 862, + 606, + 257, + 1244, + 287, + 862, + 766, + 287, + 325, + 241, + 360, + 54, + 1261, + 364, + 254, + 631, + 312, + 677, + 845, + 1595, + 780, + 782, + 521, + 502, + 657, + 157, + 1387, + 1103, + 1329, + 1505, + 521, + 202, + 1607, + 759, + 536, + 1589, + 1230, + 663, + 1745 + ] + }, + { + "word": "of", + "duration": 0.12, + "codes": [ + 1408, + 550, + 1462, + 1630, + 610, + 1767, + 343, + 1585, + 911 + ] + }, + { + "word": "something", + "duration": 0.29, + "codes": [ + 1354, + 350, + 178, + 636, + 1434, + 53, + 813, + 791, + 269, + 753, + 31, + 1014, + 630, + 59, + 1768, + 362, + 1344, + 375, + 1773, + 577, + 1512, + 1526 + ] + }, + { + "word": "unique", + "duration": 0.4, + "codes": [ + 1642, + 1568, + 1715, + 276, + 1763, + 236, + 178, + 1469, + 1463, + 257, + 277, + 520, + 822, + 1139, + 1224, + 473, + 1376, + 1499, + 1545, + 233, + 1799, + 325, + 1713, + 458, + 1551, + 1571, + 1011, + 340, + 1623, + 1034 + ] + }, + { + "word": "and", + "duration": 0.67, + "codes": [ + 943, + 1114, + 383, + 1432, + 672, + 989, + 360, + 544, + 606, + 509, + 1474, + 606, + 766, + 1244, + 391, + 1244, + 54, + 1128, + 1134, + 603, + 1379, + 1458, + 866, + 1074, + 1007, + 1214, + 1355, + 1601, + 1074, + 1752, + 995, + 872, + 1601, + 1215, + 1496, + 710, + 1011, + 287, + 509, + 766, + 360, + 257, + 851, + 20, + 1825, + 951, + 791, + 84, + 1827, + 65 + ] + }, + { + "word": "i", + "duration": 0.05, + "codes": [ + 616, + 1139, + 376, + 1634 + ] + }, + { + "word": "think", + "duration": 0.19, + "codes": [ + 1626, + 1569, + 1781, + 874, + 422, + 189, + 63, + 1791, + 914, + 1263, + 804, + 1550, + 1285, + 1660 + ] + }, + { + "word": "when", + "duration": 0.2, + "codes": [ + 1248, + 612, + 925, + 696, + 469, + 1595, + 646, + 1811, + 757, + 1500, + 252, + 1748, + 616, + 1250, + 302 + ] + }, + { + "word": "we", + "duration": 0.09, + "codes": [ + 1383, + 877, + 1310, + 1819, + 138, + 1383, + 110 + ] + }, + { + "word": "look", + "duration": 0.28, + "codes": [ + 619, + 1596, + 648, + 933, + 1056, + 1563, + 442, + 1653, + 985, + 1684, + 774, + 1492, + 1633, + 553, + 1473, + 768, + 831, + 1788, + 1348, + 712, + 872 + ] + }, + { + "word": "at", + "duration": 0.2, + "codes": [ + 1747, + 383, + 1270, + 1556, + 142, + 1833, + 212, + 1078, + 1133, + 756, + 79, + 1333, + 1456, + 1526, + 1327 + ] + }, + { + "word": "individuals", + "duration": 0.75, + "codes": [ + 21, + 423, + 686, + 265, + 1244, + 509, + 765, + 1284, + 54, + 766, + 765, + 285, + 281, + 505, + 1215, + 1450, + 300, + 730, + 812, + 55, + 1805, + 1326, + 64, + 1460, + 522, + 1832, + 83, + 1510, + 1081, + 254, + 1650, + 2, + 1765, + 1473, + 668, + 1783, + 14, + 330, + 1494, + 1324, + 644, + 1075, + 1591, + 174, + 814, + 1715, + 921, + 1084, + 752, + 1093, + 1678, + 1790, + 1561, + 335, + 1748, + 404 + ] + }, + { + "word": "figureheads", + "duration": 0.61, + "codes": [ + 1082, + 1290, + 10, + 1436, + 731, + 780, + 865, + 802, + 1720, + 1525, + 1248, + 272, + 436, + 744, + 136, + 584, + 209, + 504, + 1151, + 10, + 1161, + 492, + 498, + 1410, + 200, + 201, + 1048, + 722, + 1683, + 1228, + 1141, + 870, + 949, + 756, + 933, + 236, + 827, + 1505, + 1044, + 592, + 1778, + 1071, + 436, + 1603, + 273, + 166 + ] + }, + { + "word": "in", + "duration": 0.08, + "codes": [ + 1731, + 1212, + 83, + 1259, + 921, + 1542 + ] + }, + { + "word": "our", + "duration": 0.11, + "codes": [ + 1674, + 1384, + 1256, + 1190, + 1149, + 903, + 320, + 1127 + ] + }, + { + "word": "lives", + "duration": 0.33, + "codes": [ + 1697, + 1497, + 205, + 466, + 1378, + 212, + 1097, + 73, + 1771, + 697, + 1402, + 446, + 1622, + 364, + 1278, + 1740, + 1429, + 231, + 1650, + 995, + 1510, + 1802, + 340, + 782, + 817 + ] + }, + { + "word": "and", + "duration": 0.11, + "codes": [ + 62, + 1724, + 1119, + 1183, + 1224, + 774, + 1727, + 331 + ] + }, + { + "word": "we", + "duration": 0.05, + "codes": [ + 598, + 1139, + 1388, + 547 + ] + }, + { + "word": "want", + "duration": 0.15, + "codes": [ + 458, + 191, + 1670, + 1697, + 437, + 1267, + 248, + 1079, + 647, + 836, + 1634 + ] + }, + { + "word": "to", + "duration": 0.05, + "codes": [ + 60, + 1494, + 1253, + 809 + ] + }, + { + "word": "aspire", + "duration": 0.59, + "codes": [ + 1426, + 1052, + 1252, + 858, + 1497, + 1281, + 1423, + 1167, + 1155, + 901, + 1743, + 1118, + 1479, + 986, + 551, + 23, + 1754, + 1206, + 1174, + 1308, + 1300, + 1374, + 507, + 96, + 546, + 1, + 1479, + 16, + 501, + 824, + 1258, + 723, + 267, + 1731, + 210, + 1785, + 321, + 1361, + 789, + 474, + 1716, + 152, + 609, + 1806 + ] + }, + { + "word": "to", + "duration": 0.08, + "codes": [ + 1352, + 823, + 1532, + 889, + 1721, + 118 + ] + }, + { + "word": "become", + "duration": 0.36, + "codes": [ + 1343, + 861, + 1356, + 1513, + 269, + 870, + 1642, + 1467, + 1085, + 712, + 318, + 870, + 1459, + 901, + 83, + 1680, + 764, + 159, + 825, + 1126, + 1075, + 312, + 363, + 795, + 1809, + 503, + 1578 + ] + }, + { + "word": "them", + "duration": 0.16, + "codes": [ + 1263, + 27, + 1584, + 1829, + 1077, + 1439, + 1340, + 1817, + 1671, + 1732, + 1684, + 1614 + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/examples/outetts/speakers/ja_female_1.json b/examples/outetts/speakers/ja_female_1.json new file mode 100644 index 000000000..644705926 --- /dev/null +++ b/examples/outetts/speakers/ja_female_1.json @@ -0,0 +1,190 @@ +{ + "text": "\u4e0b\u30cd\u30bf\u3001\u3053\u308c\u306f\u5c11\u3057\u30a8\u30c3\u30c1\u306a\u3053\u3068\u3068\u304b\u5c11\u3057\u4e0b\u54c1\u306a\u3053\u3068\u3067\u3059\u306d\u3002\u5f7c\u3089\u3067\u3059\u304b\u3089\u7537\u306e\u4eba\u3067\u3059\u306d\u3002", + "words": [ + { + "word": "xianeta", + "duration": 0.75, + "codes": [ + 651, 343, 975, 1703, 1561, 1250, 1527, 176, 1568, 1450, 1287, 1375, 1238, 485, 416, 1435, 1634, 521, + 1068, 377, 1457, 662, 265, 158, 60, 149, 1424, 1810, 775, 1417, 1172, 326, 1278, 961, 1753, 905, 1509, + 1038, 1124, 29, 1403, 1265, 910, 182, 1447, 971, 1003, 1181, 630, 1719, 841, 610, 142, 110, 1177, 1044 + ] + }, + { + "word": "kore", + "duration": 0.65, + "codes": [ + 697, 841, 1429, 723, 1304, 1121, 1276, 1123, 915, 1505, 693, 1162, 338, 717, 1328, 674, 1367, 1227, 334, + 1309, 651, 964, 1223, 445, 1108, 334, 612, 751, 31, 458, 651, 223, 670, 331, 651, 1697, 1219, 1159, 149, + 152, 523, 794, 807, 995, 934, 1205, 408, 950, 868 + ] + }, + { + "word": "ha", + "duration": 0.19, + "codes": [516, 794, 1198, 1430, 1389, 261, 981, 1289, 632, 1137, 1136, 951, 1277, 446] + }, + { + "word": "shaoshi", + "duration": 0.85, + "codes": [ + 1076, 847, 1368, 1479, 1758, 761, 1043, 1076, 169, 1162, 845, 967, 1486, 331, 1121, 1071, 169, 1071, + 538, 673, 627, 445, 555, 542, 1669, 1617, 1602, 1681, 1568, 1602, 1329, 863, 1439, 1495, 1523, 1244, + 1461, 696, 1739, 41, 978, 1058, 1156, 1189, 55, 753, 1815, 1738, 1238, 749, 1059, 416, 22, 1638, 749, 0, + 636, 176, 1600, 1690, 1431, 1774, 1172, 1172 + ] + }, + { + "word": "etchi", + "duration": 0.67, + "codes": [ + 1492, 1344, 1761, 707, 65, 1205, 996, 1318, 1620, 1639, 879, 1432, 1255, 717, 1731, 287, 936, 1346, 337, + 1023, 287, 1437, 642, 1421, 110, 826, 1492, 1075, 1753, 1710, 1741, 1649, 1719, 1154, 1771, 591, 1289, + 1553, 601, 681, 859, 950, 1734, 634, 415, 4, 1710, 973, 1045, 1341 + ] + }, + { + "word": "na", + "duration": 0.12, + "codes": [945, 905, 1335, 1525, 1392, 321, 1216, 1172, 1264] + }, + { + "word": "koto", + "duration": 0.37, + "codes": [ + 658, 755, 1112, 693, 1344, 1006, 301, 1505, 486, 1448, 1655, 744, 818, 699, 1330, 510, 1487, 457, 1199, + 1373, 447, 1144, 606, 1824, 1253, 321, 443, 238 + ] + }, + { + "word": "to", + "duration": 0.51, + "codes": [ + 1257, 687, 723, 1096, 1443, 1092, 1199, 1277, 526, 1199, 845, 1038, 1311, 68, 1065, 744, 601, 1433, 612, + 651, 1023, 316, 1163, 458, 275, 1248, 68, 633, 923, 601, 566, 925, 1725, 1743, 907, 93, 188, 955 + ] + }, + { + "word": "ka", + "duration": 0.37, + "codes": [ + 1494, 91, 681, 1464, 526, 1065, 381, 1080, 1651, 1402, 1065, 271, 916, 1097, 1222, 1180, 1222, 1206, + 1112, 997, 984, 628, 1038, 408, 971, 446, 1044, 1306 + ] + }, + { + "word": "shaoshi", + "duration": 1.03, + "codes": [ + 142, 813, 973, 303, 811, 1517, 565, 1258, 1291, 1204, 1138, 1181, 1666, 193, 1162, 568, 674, 1347, 331, + 1108, 1333, 334, 1397, 1683, 630, 1226, 727, 869, 1438, 566, 1223, 712, 670, 301, 21, 1080, 686, 101, + 1250, 1552, 1641, 1329, 183, 1403, 1225, 85, 1010, 686, 1514, 680, 763, 676, 1726, 1234, 717, 93, 31, + 1015, 396, 1457, 1100, 867, 428, 1642, 1600, 1580, 1519, 1524, 1711, 1638, 368, 1568, 867, 1753, 1179, + 1645, 1317 + ] + }, + { + "word": "xiapin", + "duration": 0.84, + "codes": [ + 1726, 416, 1475, 286, 1372, 663, 1783, 572, 936, 1684, 351, 523, 1550, 458, 936, 1572, 337, 534, 1299, + 223, 1157, 633, 316, 1413, 538, 566, 591, 401, 1354, 1287, 1636, 252, 143, 1647, 473, 890, 828, 935, + 1633, 1278, 1651, 319, 1438, 1653, 1470, 1654, 634, 1259, 1528, 1450, 1513, 1641, 1424, 127, 679, 1209, + 310, 508, 1224, 1509, 1281, 1427, 594 + ] + }, + { + "word": "na", + "duration": 0.17, + "codes": [1497, 1103, 1133, 1401, 594, 1341, 1601, 1112, 95, 1220, 743, 441, 1438] + }, + { + "word": "koto", + "duration": 0.36, + "codes": [ + 545, 1067, 931, 160, 1486, 1767, 762, 1745, 594, 1499, 529, 73, 709, 1457, 940, 17, 1185, 1762, 378, + 1181, 505, 1654, 700, 424, 883, 319, 868 + ] + }, + { + "word": "desu", + "duration": 0.55, + "codes": [ + 489, 1052, 1075, 1347, 169, 1486, 670, 1148, 1464, 171, 1260, 923, 54, 1071, 538, 624, 1373, 54, 651, + 486, 285, 923, 1677, 957, 1502, 1083, 1152, 1027, 1785, 1776, 1524, 1522, 1752, 1603, 1634, 1677, 1586, + 1724, 1556, 1273, 1060 + ] + }, + { + "word": "ne", + "duration": 0.17, + "codes": [729, 1800, 338, 1772, 987, 449, 1624, 1761, 774, 678, 789, 376, 303] + }, + { + "word": "bi", + "duration": 1.31, + "codes": [ + 616, 1083, 1187, 1511, 1057, 1784, 1494, 240, 1291, 1079, 337, 1505, 566, 301, 1499, 445, 717, 1251, + 285, 735, 727, 168, 891, 287, 54, 1346, 334, 121, 933, 566, 1721, 1338, 534, 1176, 54, 673, 785, 464, + 696, 933, 331, 933, 31, 275, 939, 336, 676, 601, 469, 686, 1428, 824, 1362, 1461, 710, 1037, 490, 724, + 680, 316, 1142, 612, 981, 727, 464, 1233, 1080, 458, 676, 171, 538, 287, 484, 585, 31, 643, 54, 31, 950, + 603, 445, 735, 1820, 1278, 1769, 1117, 1720, 697, 1108, 441, 1621, 973, 603, 609, 879, 407, 708, 823 + ] + }, + { + "word": "ra", + "duration": 0.19, + "codes": [524, 1187, 1028, 1310, 1087, 1015, 1804, 1052, 847, 1423, 565, 660, 1103, 393] + }, + { + "word": "desu", + "duration": 0.48, + "codes": [ + 1356, 874, 955, 1384, 1195, 1438, 727, 1201, 1289, 5, 1195, 776, 1271, 1414, 336, 1121, 824, 1658, 700, + 492, 926, 1441, 1216, 1671, 1728, 1731, 1729, 1759, 1714, 1602, 1360, 1649, 1577, 1720, 1511, 377 + ] + }, + { + "word": "kara", + "duration": 0.25, + "codes": [ + 1154, 1725, 151, 555, 1738, 1511, 466, 294, 1140, 349, 993, 1005, 777, 494, 1462, 499, 797, 754, 874 + ] + }, + { + "word": "nan", + "duration": 0.32, + "codes": [ + 386, 970, 1258, 1258, 891, 1328, 1251, 627, 1318, 1045, 361, 9, 1190, 239, 1534, 338, 1486, 420, 121, + 1308, 1307, 300, 82, 724 + ] + }, + { + "word": "no", + "duration": 0.27, + "codes": [ + 1245, 381, 1108, 1023, 555, 1111, 1033, 115, 1052, 1214, 408, 322, 885, 1022, 747, 724, 985, 545, 1142, + 535 + ] + }, + { + "word": "ren", + "duration": 0.2, + "codes": [1055, 666, 642, 603, 1468, 1598, 1412, 1091, 1519, 1820, 121, 334, 1233, 1338, 1141] + }, + { + "word": "desu", + "duration": 0.36, + "codes": [ + 1105, 1191, 962, 115, 1716, 1410, 1799, 663, 1745, 1211, 1187, 1411, 1121, 1486, 1784, 1065, 1820, 450, + 1739, 1691, 1578, 1738, 1685, 16, 1478, 987, 405 + ] + }, + { + "word": "ne", + "duration": 0.13, + "codes": [1486, 1240, 77, 1504, 452, 1429, 1353, 1204, 1424, 1331] + } + ], + "language": "ja" +} diff --git a/examples/outetts/speakers/ja_female_2.json b/examples/outetts/speakers/ja_female_2.json new file mode 100644 index 000000000..1b3f5775d --- /dev/null +++ b/examples/outetts/speakers/ja_female_2.json @@ -0,0 +1,266 @@ +{ + "text": "\u306a\u306e\u306b\u5f1f\u304c\u90aa\u9b54\u3092\u3057\u3066\u304d\u307e\u3059\u3002\u306d\u3048\u3001\u304a\u5144\u3061\u3083\u3093\u904a\u307c\u3046\u3088\u3002\u306d\u3048\u3001\u304a\u5144\u3061\u3083\u3093\u51fa\u304b\u3051\u3088\u3046\u3088\u3002\u90aa\u9b54\u3092\u3057\u3066\u304d\u307e\u3059\u3002\u306a\u306e\u3067\u3001\u5168\u7136\u96c6\u4e2d\u304c\u3067\u304d\u307e\u305b\u3093\u3002", + "words": [ + { + "word": "na", + "duration": 0.19, + "codes": [1176, 566, 278, 584, 336, 394, 601, 1403, 269, 961, 1347, 1313, 466, 1352] + }, + { + "word": "no", + "duration": 0.28, + "codes": [ + 1204, 1007, 637, 1502, 984, 1264, 1690, 1240, 1078, 1316, 1051, 1227, 1780, 1509, 1670, 1767, 642, 1318, + 978, 1191, 1065 + ] + }, + { + "word": "ni", + "duration": 0.32, + "codes": [ + 1489, 794, 1506, 80, 523, 565, 1656, 169, 565, 287, 1047, 226, 1743, 706, 1244, 532, 1010, 1698, 759, + 1148, 1731, 549, 1393, 1494 + ] + }, + { + "word": "di", + "duration": 0.81, + "codes": [ + 1462, 1200, 1743, 915, 1317, 1468, 1675, 774, 1808, 1051, 1280, 1162, 301, 153, 1777, 355, 1766, 1043, + 1793, 1067, 259, 1780, 486, 490, 1640, 316, 169, 1408, 171, 633, 799, 336, 692, 601, 458, 981, 31, 805, + 603, 209, 673, 638, 690, 516, 474, 894, 1754, 1075, 830, 923, 32, 523, 1277, 27, 287, 1679, 1075, 446, + 149, 152, 449 + ] + }, + { + "word": "ga", + "duration": 0.55, + "codes": [ + 13, 470, 321, 259, 83, 526, 95, 1042, 526, 186, 883, 1037, 1388, 396, 1199, 136, 405, 1684, 377, 1470, + 1308, 997, 1137, 236, 630, 466, 1063, 1038, 768, 953, 1142, 1575, 1003, 1447, 1656, 306, 826, 1178, 254, + 1202, 879 + ] + }, + { + "word": "xiemo", + "duration": 0.64, + "codes": [ + 1256, 324, 9, 1207, 1046, 1259, 658, 1647, 1196, 1194, 1179, 1200, 1264, 1059, 595, 1245, 1356, 1132, + 1307, 1265, 1265, 1367, 858, 1811, 1070, 1484, 78, 42, 1711, 34, 362, 1821, 1261, 82, 809, 892, 1267, + 1497, 1156, 935, 1792, 385, 935, 1368, 728, 724, 339, 729 + ] + }, + { + "word": "o", + "duration": 0.12, + "codes": [1180, 994, 1324, 1189, 1136, 1137, 521, 1207, 1003] + }, + { + "word": "shi", + "duration": 0.13, + "codes": [565, 1435, 1144, 319, 1599, 1400, 1296, 1818, 1739, 1595] + }, + { + "word": "te", + "duration": 0.08, + "codes": [933, 1746, 1543, 910, 1297, 1328] + }, + { + "word": "ki", + "duration": 0.16, + "codes": [104, 1154, 416, 86, 1096, 1418, 168, 1832, 1552, 1686, 495, 660] + }, + { + "word": "masu", + "duration": 0.39, + "codes": [ + 781, 907, 316, 1235, 66, 1303, 904, 1188, 1124, 1171, 978, 791, 1213, 791, 1181, 565, 1137, 1226, 741, + 229, 1268, 1753, 1548, 719, 1615, 1821, 1466, 1413, 1038 + ] + }, + { + "word": "nee", + "duration": 0.24, + "codes": [1421, 458, 1123, 1142, 360, 1346, 771, 548, 1807, 1323, 143, 1280, 1045, 852, 947, 777, 1471, 781] + }, + { + "word": "o", + "duration": 0.01, + "codes": [764] + }, + { + "word": "xiong", + "duration": 0.35, + "codes": [ + 1055, 1360, 979, 660, 1179, 1028, 628, 963, 1411, 829, 669, 1069, 681, 655, 1097, 1714, 1043, 1492, + 1136, 677, 1066, 1818, 1429, 1389, 1788, 241 + ] + }, + { + "word": "chan", + "duration": 0.24, + "codes": [ + 1762, 1602, 101, 1374, 1431, 1132, 1309, 1203, 1178, 1197, 1477, 1105, 1063, 1204, 1391, 902, 206, 1816 + ] + }, + { + "word": "youbou", + "duration": 0.48, + "codes": [ + 1095, 1106, 1041, 1351, 729, 1137, 1468, 1619, 1735, 1773, 274, 1749, 1454, 1192, 19, 248, 1668, 1195, + 1295, 1771, 490, 266, 1453, 1013, 774, 1232, 839, 217, 1280, 973, 1324, 942, 798, 969, 1108, 818 + ] + }, + { + "word": "yo", + "duration": 0.21, + "codes": [903, 1277, 1332, 816, 743, 1254, 1308, 1633, 242, 1265, 1141, 1411, 829, 1447, 829, 1399] + }, + { + "word": "nee", + "duration": 0.79, + "codes": [ + 1192, 13, 1195, 910, 983, 1275, 123, 1314, 424, 1746, 569, 1417, 1172, 705, 13, 1202, 1154, 446, 792, + 1483, 1315, 627, 1317, 226, 1193, 1505, 396, 1331, 1144, 548, 1259, 674, 1489, 1489, 674, 1395, 772, + 1079, 1259, 630, 1226, 924, 601, 1770, 360, 490, 387, 119, 954, 40, 1179, 1468, 1090, 1351, 1090, 1232, + 1426, 1276, 1423 + ] + }, + { + "word": "o", + "duration": 0.01, + "codes": [1089] + }, + { + "word": "xiong", + "duration": 0.28, + "codes": [ + 1051, 1208, 1309, 1651, 1498, 1499, 1210, 1193, 1131, 1645, 1233, 887, 995, 1090, 789, 1349, 1100, 1583, + 829, 976, 1096 + ] + }, + { + "word": "chan", + "duration": 0.2, + "codes": [1092, 1474, 379, 1588, 1753, 1066, 19, 1184, 755, 1097, 1195, 1229, 1333, 1165, 1738] + }, + { + "word": "chukakeyou", + "duration": 0.72, + "codes": [ + 1451, 1526, 594, 89, 1374, 1784, 1055, 1477, 1090, 202, 886, 405, 1622, 1763, 624, 1761, 1144, 1122, + 1254, 1203, 79, 1332, 661, 1038, 1745, 1239, 717, 1279, 526, 1827, 1478, 1042, 793, 386, 1685, 1153, + 825, 640, 887, 1215, 632, 1230, 829, 1735, 965, 1421, 998, 1803, 839, 1745, 839, 1770, 545, 964 + ] + }, + { + "word": "yo", + "duration": 0.23, + "codes": [1107, 1252, 459, 1227, 966, 175, 1108, 1407, 1500, 1265, 1492, 661, 1223, 95, 1487, 918, 359] + }, + { + "word": "xiemo", + "duration": 0.76, + "codes": [ + 1304, 1297, 411, 967, 1196, 730, 1065, 1106, 1119, 775, 825, 814, 1378, 916, 548, 41, 921, 1414, 1352, + 338, 1277, 275, 717, 1414, 612, 1123, 1092, 674, 1251, 285, 486, 659, 241, 1321, 151, 765, 0, 1529, + 1832, 1045, 1256, 1320, 1100, 353, 1314, 1817, 1392, 111, 1213, 1510, 830, 1218, 327, 1131, 1126, 1682, + 1113 + ] + }, + { + "word": "o", + "duration": 0.16, + "codes": [580, 353, 1265, 1298, 1671, 1022, 721, 394, 982, 1690, 1803, 1324] + }, + { + "word": "shi", + "duration": 0.16, + "codes": [679, 1060, 1378, 1619, 1808, 1217, 1586, 1459, 1826, 496, 1038, 1246] + }, + { + "word": "te", + "duration": 0.11, + "codes": [241, 1455, 274, 1487, 63, 518, 1068, 22] + }, + { + "word": "ki", + "duration": 0.17, + "codes": [924, 1717, 1070, 1736, 265, 523, 1572, 766, 1786, 1478, 1497, 1460, 813] + }, + { + "word": "masu", + "duration": 0.43, + "codes": [ + 747, 1311, 1430, 1079, 1331, 1219, 698, 1131, 1029, 1772, 1100, 353, 1264, 1410, 1745, 1113, 1150, 941, + 1505, 1266, 1314, 416, 1634, 1298, 1609, 1669, 1331, 1578, 1713, 1766, 1557, 1739 + ] + }, + { + "word": "na", + "duration": 0.48, + "codes": [ + 1763, 258, 1289, 762, 419, 1112, 1168, 209, 1148, 1205, 54, 1328, 1699, 458, 1276, 995, 905, 1367, 54, + 1227, 724, 235, 603, 534, 31, 933, 551, 68, 747, 1141, 1736, 1447, 665, 913, 671, 1174 + ] + }, + { + "word": "no", + "duration": 0.17, + "codes": [1333, 1226, 1476, 1425, 1493, 114, 66, 1393, 1766, 813, 111, 1347, 1466] + }, + { + "word": "de", + "duration": 0.24, + "codes": [1330, 327, 729, 1382, 1000, 984, 1403, 174, 469, 1750, 35, 1746, 882, 1159, 900, 6, 9, 531] + }, + { + "word": "quanran", + "duration": 1.03, + "codes": [ + 1027, 870, 1159, 1032, 780, 1282, 376, 511, 1017, 870, 1116, 1097, 844, 1196, 376, 1344, 1058, 529, + 1471, 1280, 1651, 1148, 1774, 1319, 1285, 1744, 20, 1504, 1729, 497, 982, 546, 178, 977, 1133, 440, + 1320, 1017, 1112, 1082, 1303, 1022, 489, 908, 1057, 724, 1317, 1826, 792, 1389, 1784, 1052, 1758, 1236, + 1464, 1608, 1569, 1487, 1416, 1191, 1671, 1116, 400, 934, 200, 1206, 1051, 689, 764, 910, 1305, 927, + 998, 447, 1332, 970, 1429 + ] + }, + { + "word": "jizhong", + "duration": 0.97, + "codes": [ + 266, 560, 1736, 149, 1204, 1646, 1172, 1298, 1806, 1127, 1457, 1106, 661, 1107, 1281, 649, 1149, 1275, + 387, 1792, 1439, 1561, 1496, 546, 1524, 1638, 546, 1496, 1364, 328, 938, 654, 1179, 1111, 1682, 1329, + 202, 839, 353, 454, 822, 937, 40, 1364, 454, 1136, 754, 362, 1428, 1701, 702, 405, 1525, 445, 1154, + 1608, 1654, 865, 415, 888, 1600, 1102, 1277, 202, 756, 894, 1622, 440, 844, 737, 655, 1061, 1166 + ] + }, + { + "word": "ga", + "duration": 0.23, + "codes": [974, 655, 822, 931, 80, 1433, 859, 316, 1466, 1671, 547, 303, 972, 973, 497, 1258, 847] + }, + { + "word": "deki", + "duration": 0.49, + "codes": [ + 1200, 1256, 524, 825, 83, 705, 1122, 1216, 347, 179, 260, 1187, 999, 461, 1109, 1462, 761, 729, 1801, + 1092, 1470, 700, 718, 1381, 440, 1448, 905, 1746, 119, 1065, 1176, 1145, 1587, 1819, 1087, 957, 1091 + ] + }, + { + "word": "mase", + "duration": 0.43, + "codes": [ + 628, 1257, 284, 1351, 904, 460, 1468, 728, 1108, 1076, 741, 651, 339, 693, 1361, 370, 905, 971, 679, + 1613, 1500, 1606, 1634, 1687, 1750, 1202, 1265, 1203, 1139, 318, 1129, 816 + ] + }, + { + "word": "n", + "duration": 0.13, + "codes": [1169, 1397, 245, 1126, 1154, 642, 999, 961, 1742, 1542] + } + ], + "language": "ja" +} diff --git a/examples/outetts/speakers/ja_female_3.json b/examples/outetts/speakers/ja_female_3.json new file mode 100644 index 000000000..1a3982dce --- /dev/null +++ b/examples/outetts/speakers/ja_female_3.json @@ -0,0 +1,181 @@ +{ + "text": "\u5168\u529b\u3092\u5c3d\u304f\u3057\u305f\u306e\u306a\u3089\u3001\u7d50\u679c\u306f\u3069\u3046\u3067\u3042\u308c\u80f8\u3092\u5f35\u3063\u3066\u3044\u304d\u307e\u3057\u3087\u3046\u3002\u305d\u3046\u3059\u308c\u3070\u304d\u3063\u3068\u6b21\u306b\u7e4b\u304c\u308b\u306f\u305a\u3067\u3059\u3002", + "words": [ + { + "word": "quanli", + "duration": 0.4, + "codes": [ + 1007, 673, 1788, 101, 1354, 1539, 1337, 1742, 137, 225, 1521, 359, 189, 699, 1464, 508, 1298, 1458, + 1546, 1335, 78, 1251, 526, 1350, 469, 1010, 295, 1174, 188, 1426 + ] + }, + { + "word": "o", + "duration": 0.27, + "codes": [ + 254, 717, 1733, 59, 151, 1305, 954, 976, 1382, 1828, 1118, 367, 908, 923, 138, 1218, 548, 113, 1030, 674 + ] + }, + { + "word": "jinkushi", + "duration": 0.44, + "codes": [ + 587, 364, 1433, 1748, 1635, 544, 976, 1644, 1451, 906, 22, 512, 882, 1212, 1432, 1246, 1142, 1721, 1754, + 927, 95, 676, 2, 1210, 1189, 1647, 927, 122, 319, 11, 62, 1535, 1760 + ] + }, + { + "word": "ta", + "duration": 0.08, + "codes": [686, 1805, 1237, 944, 215, 1434] + }, + { + "word": "no", + "duration": 0.12, + "codes": [1203, 1127, 416, 1574, 1605, 1187, 1038, 927, 270] + }, + { + "word": "nara", + "duration": 0.24, + "codes": [509, 1409, 343, 98, 1632, 107, 1361, 1820, 43, 1286, 1389, 1187, 1671, 380, 560, 1553, 147, 1821] + }, + { + "word": "jieguo", + "duration": 0.57, + "codes": [ + 402, 1785, 581, 200, 1769, 1278, 1612, 1293, 1343, 627, 1448, 799, 1071, 1752, 406, 1380, 1305, 1016, + 175, 1345, 934, 508, 1439, 108, 63, 518, 1019, 906, 1215, 555, 976, 1114, 1185, 1809, 673, 1385, 1669, + 1748, 1001, 1395, 210, 1130, 215 + ] + }, + { + "word": "ha", + "duration": 0.15, + "codes": [1199, 271, 1140, 1059, 475, 717, 364, 492, 1367, 215, 718] + }, + { + "word": "dou", + "duration": 0.28, + "codes": [ + 1448, 236, 660, 1511, 1717, 1783, 160, 1557, 759, 1124, 248, 1287, 756, 1150, 981, 215, 988, 1198, 1119, + 1358, 678 + ] + }, + { + "word": "de", + "duration": 0.16, + "codes": [1415, 1245, 1173, 1043, 1604, 1602, 969, 1195, 1735, 157, 1016, 388] + }, + { + "word": "are", + "duration": 0.28, + "codes": [ + 1433, 4, 466, 1695, 1313, 57, 1482, 1156, 1760, 581, 205, 1747, 560, 1284, 293, 1223, 1755, 277, 1066, + 794, 309 + ] + }, + { + "word": "xiong", + "duration": 0.33, + "codes": [ + 1647, 1678, 795, 1163, 1448, 712, 1803, 1694, 1023, 1652, 1465, 1262, 1280, 251, 1549, 203, 954, 113, + 454, 36, 633, 362, 1672, 1335, 1607 + ] + }, + { + "word": "o", + "duration": 0.16, + "codes": [1247, 1172, 1342, 759, 1059, 95, 1002, 1149, 695, 637, 1487, 1042] + }, + { + "word": "zhangtsu", + "duration": 0.27, + "codes": [ + 1136, 1323, 679, 1556, 1623, 1031, 1415, 1195, 1110, 1280, 1423, 1059, 104, 1300, 1446, 696, 805, 555, + 1205, 1474 + ] + }, + { + "word": "te", + "duration": 0.05, + "codes": [1641, 931, 1330, 183] + }, + { + "word": "iki", + "duration": 0.2, + "codes": [552, 1557, 422, 1346, 510, 983, 1608, 1390, 606, 1028, 1724, 335, 546, 422, 568] + }, + { + "word": "mashou", + "duration": 0.32, + "codes": [ + 319, 134, 1674, 229, 1412, 537, 1468, 1437, 1297, 1366, 198, 333, 527, 276, 1395, 113, 1325, 1258, 1423, + 1258, 1280, 1075, 1156, 1066 + ] + }, + { + "word": "sou", + "duration": 1.11, + "codes": [ + 1202, 271, 41, 450, 1305, 1157, 1652, 1824, 17, 1279, 1478, 891, 1754, 1288, 1648, 1669, 1023, 1458, + 1419, 939, 1707, 1157, 1648, 1707, 1163, 1458, 274, 1288, 976, 1480, 824, 1322, 1231, 957, 1390, 1231, + 1652, 1007, 1262, 1632, 1080, 1749, 1707, 591, 1760, 1233, 1262, 1707, 659, 1652, 1652, 17, 1632, 899, + 933, 1489, 986, 1705, 1525, 710, 1707, 591, 799, 1632, 555, 1023, 1255, 1293, 1772, 1451, 1664, 1600, + 399, 485, 408, 1309, 106, 1120, 260, 918, 88, 548, 609 + ] + }, + { + "word": "sure", + "duration": 0.28, + "codes": [ + 626, 1147, 826, 519, 1606, 1302, 1681, 1617, 1641, 1691, 1127, 860, 1264, 1223, 1574, 1367, 1105, 1135, + 1183, 1286, 1031 + ] + }, + { + "word": "ba", + "duration": 0.12, + "codes": [324, 1462, 240, 1381, 1271, 1177, 282, 764, 977] + }, + { + "word": "kitto", + "duration": 0.41, + "codes": [ + 1181, 270, 870, 1584, 505, 591, 691, 1495, 1568, 308, 714, 1785, 1552, 234, 615, 85, 858, 1353, 1562, + 1699, 287, 1305, 891, 513, 1639, 1728, 1353, 1400, 260, 1292, 83 + ] + }, + { + "word": "ci", + "duration": 0.4, + "codes": [ + 1305, 221, 1707, 550, 1338, 756, 1530, 1145, 1705, 659, 459, 1023, 981, 1749, 1233, 636, 1363, 298, + 1487, 344, 152, 1341, 1615, 1721, 1582, 1221, 1740, 239, 1428, 243 + ] + }, + { + "word": "ni", + "duration": 0.15, + "codes": [1043, 1624, 1286, 612, 1493, 523, 1807, 47, 1601, 47, 918] + }, + { + "word": "jigaru", + "duration": 0.52, + "codes": [ + 293, 1767, 1578, 939, 1273, 1028, 1693, 1688, 10, 863, 18, 725, 1369, 91, 1580, 1712, 60, 1756, 324, + 992, 1764, 452, 521, 843, 1635, 906, 1575, 1669, 115, 42, 461, 9, 471, 730, 1302, 466, 1188, 1437, 637 + ] + }, + { + "word": "hazu", + "duration": 0.21, + "codes": [1723, 1160, 1047, 793, 697, 1391, 1746, 254, 1433, 1679, 1608, 634, 328, 93, 964, 65] + }, + { + "word": "desu", + "duration": 0.24, + "codes": [1695, 1561, 408, 967, 479, 658, 1229, 1205, 474, 890, 176, 1718, 440, 701, 719, 782, 1724, 1313] + } + ], + "language": "ja" +} diff --git a/examples/outetts/speakers/ja_male_1.json b/examples/outetts/speakers/ja_male_1.json new file mode 100644 index 000000000..ab9c2097a --- /dev/null +++ b/examples/outetts/speakers/ja_male_1.json @@ -0,0 +1,101 @@ +{ + "text": "\u3053\u306e\u8fba\u308a\u3067\u306f\u975e\u5e38\u306b\u51ac\u3001\u96ea\u304c\u591a\u3044\u305f\u3081\u306b", + "words": [ + { + "word": "kono", + "duration": 1.84, + "codes": [ + 866, 1242, 169, 820, 16, 1071, 636, 1499, 782, 1368, 701, 664, 1638, 1667, 256, 1284, 231, 1411, 62, + 535, 1342, 24, 715, 1818, 979, 1004, 945, 714, 1413, 1727, 98, 730, 787, 1200, 1356, 376, 475, 616, + 1396, 202, 1755, 1419, 1241, 1145, 714, 1490, 1683, 69, 700, 735, 243, 1749, 626, 67, 713, 83, 1751, + 927, 1499, 471, 1811, 1227, 1331, 58, 1264, 1638, 1433, 696, 1246, 1331, 1034, 1079, 1738, 1370, 700, + 1270, 1549, 0, 767, 895, 1057, 333, 1760, 1565, 1151, 406, 1016, 751, 1205, 1519, 1123, 1379, 1714, + 1079, 1329, 1185, 1303, 1370, 1504, 1088, 1735, 1391, 1175, 1003, 19, 1343, 169, 1465, 16, 735, 1215, + 1505, 1077, 1024, 951, 1824, 1422, 1509, 1053, 1805, 1266, 1262, 799, 1333, 1484, 1798, 1558, 1189, 123, + 1204, 120, 153, 484, 1664, 286, 1150, 754, 931 + ] + }, + { + "word": "bianri", + "duration": 0.35, + "codes": [ + 282, 777, 970, 208, 179, 1032, 208, 966, 121, 1007, 691, 620, 1743, 1200, 467, 366, 708, 39, 138, 738, + 431, 1034, 809, 398, 966, 888 + ] + }, + { + "word": "de", + "duration": 0.09, + "codes": [845, 925, 1792, 1091, 809, 750, 806] + }, + { + "word": "ha", + "duration": 0.16, + "codes": [1665, 961, 1404, 959, 1761, 840, 1755, 635, 639, 525, 1738, 147] + }, + { + "word": "feichang", + "duration": 0.96, + "codes": [ + 475, 1259, 147, 657, 1282, 370, 876, 1432, 1822, 442, 1064, 1465, 1254, 1040, 1504, 1153, 1459, 1421, + 1153, 1832, 1064, 1355, 513, 1309, 1255, 673, 1360, 343, 984, 1215, 1007, 1716, 1320, 1307, 328, 1402, + 1332, 1037, 1308, 1349, 1742, 1438, 1091, 1775, 1091, 1214, 1663, 1642, 1282, 1503, 895, 1245, 1800, + 454, 1635, 34, 833, 368, 844, 843, 295, 102, 912, 376, 47, 730, 130, 63, 110, 811, 1019, 2 + ] + }, + { + "word": "ni", + "duration": 0.16, + "codes": [1218, 1240, 1388, 1387, 638, 1403, 735, 1350, 202, 1443, 1554, 409] + }, + { + "word": "dong", + "duration": 1.16, + "codes": [ + 1175, 1427, 182, 1332, 1718, 640, 548, 1319, 862, 316, 1412, 924, 1223, 1091, 1079, 1735, 1215, 717, + 1230, 694, 624, 1273, 275, 1118, 1228, 1414, 1476, 997, 1642, 1247, 58, 1286, 1565, 1402, 1436, 1816, + 1477, 1024, 32, 1429, 928, 1158, 1752, 584, 1172, 820, 351, 1293, 625, 601, 1430, 641, 516, 1383, 490, + 924, 1570, 651, 1639, 572, 967, 1510, 603, 1418, 1293, 986, 438, 696, 710, 1338, 258, 1003, 1144, 712, + 1276, 1803, 1421, 1129, 1668, 1720, 1697, 1013, 1651, 1697, 679, 296, 1732 + ] + }, + { + "word": "xue", + "duration": 1.19, + "codes": [ + 952, 1633, 6, 469, 535, 510, 1618, 787, 853, 1827, 409, 1485, 752, 41, 1750, 555, 1118, 1242, 630, 1614, + 659, 1262, 1601, 696, 1360, 899, 585, 1763, 487, 274, 686, 287, 1176, 824, 712, 1176, 1080, 957, 1338, + 680, 1284, 505, 566, 928, 568, 1067, 786, 490, 1818, 1462, 1127, 1146, 1330, 1496, 1818, 1666, 788, + 1425, 1671, 1375, 1181, 1496, 1639, 1118, 333, 692, 1573, 1035, 1725, 1270, 1372, 1789, 605, 191, 1680, + 1434, 1483, 1554, 1787, 78, 1244, 1821, 440, 947, 1777, 279, 1098, 1735, 1337 + ] + }, + { + "word": "ga", + "duration": 0.16, + "codes": [277, 465, 67, 1590, 281, 1568, 1700, 82, 860, 1066, 43, 660] + }, + { + "word": "duoi", + "duration": 0.4, + "codes": [ + 324, 1072, 1129, 864, 1365, 1779, 1347, 1705, 934, 1357, 1633, 613, 1555, 1015, 456, 1605, 1793, 203, + 488, 439, 1674, 1279, 775, 214, 1639, 493, 1388, 308, 1418, 319 + ] + }, + { + "word": "tame", + "duration": 0.32, + "codes": [ + 1528, 1666, 1238, 1746, 1832, 1536, 442, 899, 1419, 1261, 1426, 1378, 1429, 1318, 1358, 1098, 1464, + 1792, 1614, 921, 1677, 1404, 1245, 1519 + ] + }, + { + "word": "ni", + "duration": 0.17, + "codes": [58, 759, 1639, 1025, 1672, 633, 1315, 325, 1471, 1717, 665, 1401, 1524] + } + ], + "language": "ja" +} diff --git a/examples/outetts/speakers/ko_female_1.json b/examples/outetts/speakers/ko_female_1.json new file mode 100644 index 000000000..7918dc375 --- /dev/null +++ b/examples/outetts/speakers/ko_female_1.json @@ -0,0 +1,175 @@ +{ + "text": " \uc544\ubb34\ub798\ub3c4 \uccab \ub370\uc774\ud2b8\ub77c \ub2e4\ub4e4 \uc798 \ubcf4\uc774\uace0 \uc2f6\uc740 \uac83 \uac19\uc740\ub370\uc694. \uc544\uc9c1 \uaf42\ud78c \uc0ac\ub78c \uc5c6\ub2e4\ub354\ub2c8. \uc774\ub807\uac8c \uc5f4\uc2ec\ud788\uc778 \uac78 \ubcf4\uba74 \uc18d\ub9c8\uc74c\uc740 \uc880 \ub2e4\ub978\uac00 \ubd10\uc694.", + "words": [ + { + "word": "amuraedo", + "duration": 0.52, + "codes": [ + 1707, 1788, 1072, 1484, 1325, 25, 475, 456, 474, 441, 1331, 1639, 1439, 214, 572, 996, 1063, 1223, 208, + 1492, 1070, 1117, 104, 1271, 1378, 170, 1154, 245, 695, 1469, 1798, 1321, 1322, 28, 1718, 467, 912, + 1340, 774 + ] + }, + { + "word": "ceos", + "duration": 0.36, + "codes": [ + 272, 890, 443, 728, 682, 1732, 1271, 1596, 1658, 57, 133, 1581, 864, 1246, 1817, 1451, 1822, 178, 407, + 1524, 1423, 797, 1318, 1397, 1438, 1725, 1730 + ] + }, + { + "word": "deiteura", + "duration": 0.55, + "codes": [ + 516, 1707, 1783, 513, 1687, 1822, 1324, 931, 412, 714, 1101, 1105, 849, 461, 1147, 1705, 510, 1421, + 1655, 459, 693, 1037, 957, 1572, 1829, 308, 1535, 1434, 1313, 705, 1517, 898, 335, 1595, 446, 531, 498, + 658, 29, 248, 613 + ] + }, + { + "word": "dadeul", + "duration": 0.52, + "codes": [ + 946, 570, 560, 1324, 629, 697, 1136, 1463, 827, 1647, 1455, 1404, 1514, 1421, 1418, 1721, 1770, 719, + 1459, 1040, 595, 1542, 868, 1107, 998, 172, 1077, 317, 1819, 1319, 939, 608, 1161, 1347, 282, 1133, 79, + 1212, 770 + ] + }, + { + "word": "jal", + "duration": 0.24, + "codes": [749, 1616, 1624, 335, 1261, 719, 1177, 1483, 347, 1438, 901, 1187, 1030, 340, 1331, 635, 679, 340] + }, + { + "word": "boigo", + "duration": 0.36, + "codes": [ + 1189, 514, 802, 1352, 86, 691, 1233, 913, 754, 678, 1563, 182, 506, 770, 69, 990, 592, 775, 921, 648, + 712, 1525, 1829, 832, 1037, 1089, 732 + ] + }, + { + "word": "sipeun", + "duration": 0.28, + "codes": [ + 787, 1710, 1463, 14, 384, 767, 512, 1421, 1510, 1808, 1380, 1568, 1667, 1721, 947, 1393, 1792, 664, 414, + 756, 805 + ] + }, + { + "word": "geos", + "duration": 0.12, + "codes": [1371, 1583, 1700, 1196, 690, 813, 1650, 702, 1479] + }, + { + "word": "gateundeyo", + "duration": 0.55, + "codes": [ + 1525, 1623, 1332, 1330, 800, 723, 354, 1731, 1628, 735, 1205, 1672, 1771, 1261, 1364, 1583, 1546, 1640, + 1048, 28, 1590, 530, 1828, 134, 1706, 865, 1150, 1015, 1698, 361, 1563, 1726, 1272, 26, 1278, 1150, 408, + 1771, 764, 1542, 825 + ] + }, + { + "word": "ajig", + "duration": 0.96, + "codes": [ + 1107, 58, 1424, 1487, 1300, 1078, 1775, 109, 1671, 1780, 1727, 1099, 1251, 1118, 1669, 1595, 1699, 1176, + 729, 743, 970, 1037, 878, 655, 878, 1738, 1486, 347, 1801, 1397, 1329, 1736, 1059, 1495, 1433, 1130, + 1118, 1289, 1037, 1231, 1478, 1811, 1298, 1502, 847, 147, 248, 571, 595, 705, 809, 1677, 702, 1683, + 1711, 22, 1364, 6, 546, 1363, 1331, 1427, 1670, 1804, 1004, 527, 1671, 1267, 1585, 1792, 1832, 1724 + ] + }, + { + "word": "ggojhin", + "duration": 0.45, + "codes": [ + 1704, 1623, 1474, 1595, 1478, 516, 1289, 1509, 883, 1415, 682, 716, 217, 1396, 555, 1717, 799, 1079, + 438, 923, 1007, 1832, 1166, 10, 844, 62, 178, 368, 1372, 1211, 1600, 1341, 184, 1558 + ] + }, + { + "word": "saram", + "duration": 0.32, + "codes": [ + 1071, 1498, 1657, 1775, 906, 1718, 1215, 1005, 13, 497, 953, 570, 522, 810, 1775, 579, 1133, 498, 1060, + 901, 970, 1243, 1175, 932 + ] + }, + { + "word": "eobsdadeoni", + "duration": 0.75, + "codes": [ + 644, 1742, 1683, 1330, 1486, 1222, 1816, 1689, 1507, 1717, 1669, 1621, 344, 1646, 1573, 1751, 1346, + 1385, 799, 1010, 823, 1366, 1042, 1136, 318, 1229, 326, 550, 1015, 1300, 1763, 91, 1752, 1802, 1682, + 1088, 818, 716, 1156, 503, 1804, 1330, 1013, 924, 1294, 363, 1771, 615, 1594, 1502, 1646, 1766, 949, + 1312, 1655, 1783 + ] + }, + { + "word": "ireohge", + "duration": 1.35, + "codes": [ + 1584, 1545, 1768, 1811, 1267, 1640, 1782, 1488, 1388, 1762, 1828, 1520, 1396, 1593, 1605, 1284, 1339, + 1657, 1769, 1540, 1605, 1709, 450, 1712, 734, 1461, 1616, 1635, 1536, 1592, 1321, 325, 1771, 406, 1657, + 1163, 585, 1016, 17, 1390, 1693, 758, 1284, 1474, 862, 1176, 1346, 426, 1293, 1338, 1713, 1346, 1284, + 1176, 939, 1430, 758, 1176, 1543, 1440, 862, 785, 1248, 1721, 1348, 1408, 1672, 735, 1763, 1672, 624, + 1525, 555, 923, 1528, 1721, 519, 1368, 529, 407, 1177, 50, 1708, 137, 553, 1268, 186, 626, 1770, 1794, + 1301, 1735, 976, 1391, 1139, 1160, 1677, 479, 200, 51, 1093 + ] + }, + { + "word": "yeolsimhiin", + "duration": 0.44, + "codes": [ + 49, 1103, 137, 1298, 61, 181, 117, 133, 181, 1819, 856, 50, 844, 507, 356, 1382, 166, 356, 1366, 1358, + 639, 1435, 1445, 1365, 1304, 202, 1270, 1056, 99, 844, 1427, 1575, 57 + ] + }, + { + "word": "geol", + "duration": 0.13, + "codes": [1299, 1516, 1399, 721, 346, 600, 374, 549, 1500, 583] + }, + { + "word": "bomyeon", + "duration": 0.37, + "codes": [ + 1765, 1626, 1724, 1737, 1656, 1149, 635, 617, 1502, 935, 385, 1580, 1556, 912, 780, 282, 775, 1015, 695, + 143, 110, 532, 1138, 330, 1372, 782, 1487, 703 + ] + }, + { + "word": "sogmaeumeun", + "duration": 0.64, + "codes": [ + 1333, 1783, 1315, 1754, 1673, 1667, 176, 1535, 554, 1139, 1654, 1627, 1525, 959, 516, 965, 957, 910, + 1010, 1213, 1272, 447, 1221, 1260, 1213, 1324, 565, 1121, 521, 1174, 42, 768, 1149, 1230, 95, 1666, + 1831, 1550, 1452, 709, 1006, 329, 1640, 1485, 734, 1530, 333, 1604 + ] + }, + { + "word": "jom", + "duration": 0.13, + "codes": [50, 927, 1147, 1110, 647, 1625, 1740, 481, 1811, 1784] + }, + { + "word": "dareunga", + "duration": 0.44, + "codes": [ + 1644, 1163, 1408, 1116, 971, 872, 958, 1483, 996, 400, 1281, 973, 1127, 1610, 1532, 1234, 1206, 796, + 1491, 317, 1501, 481, 1594, 1528, 1610, 1583, 1073, 1281, 951, 1281, 1335, 1423, 1444 + ] + }, + { + "word": "bwayo", + "duration": 0.28, + "codes": [ + 702, 1405, 1266, 1669, 1770, 1089, 887, 1443, 1314, 1468, 966, 1087, 1059, 1436, 966, 1332, 1335, 1353, + 1423, 1019, 1073 + ] + } + ], + "language": "ko" +} diff --git a/examples/outetts/speakers/ko_female_2.json b/examples/outetts/speakers/ko_female_2.json new file mode 100644 index 000000000..a03661db5 --- /dev/null +++ b/examples/outetts/speakers/ko_female_2.json @@ -0,0 +1,162 @@ +{ + "text": " \uc81c\uac00 \uc9c0\ub09c 1\ub144\uac04\uc740 \uac70\uc758 \ub2e4\ub978 \ud5a5\uc218 \uc548 \uc4f0\uace0 \uc774\uac83\ub9cc \uc37c\uc5b4\uc694. \ub0a8\ud3b8\ud558\uace0 \uc791\ub144\uc5d0 \ub3c4\ucfc4 \uc5ec\ud589 \uac14\ub2e4\uac00 \uc2dc\ud5a5\uc744 \ud574\ubcf4\uace0 \ubc18\ud55c \uc81c\ud488\uc778\ub370", + "words": [ + { + "word": "jega", + "duration": 0.2, + "codes": [1571, 1145, 1430, 1169, 721, 529, 1307, 117, 1770, 1095, 1342, 1182, 613, 657, 73] + }, + { + "word": "jinan", + "duration": 0.64, + "codes": [ + 777, 637, 1177, 1173, 604, 1202, 875, 1211, 250, 661, 42, 693, 1704, 1713, 1451, 1376, 1287, 767, 802, + 546, 1487, 1127, 502, 1317, 1554, 1065, 470, 823, 65, 553, 951, 857, 522, 260, 433, 467, 991, 860, 1105, + 880, 384, 880, 997, 461, 1194, 648, 1079, 1541 + ] + }, + { + "word": "one", + "duration": 0.43, + "codes": [ + 1403, 1025, 945, 779, 111, 1730, 1298, 66, 1636, 470, 1452, 978, 480, 1833, 674, 1740, 1450, 826, 673, + 1108, 259, 1157, 1067, 1775, 234, 732, 117, 717, 848, 1165, 1630, 882 + ] + }, + { + "word": "nyeonganeun", + "duration": 0.49, + "codes": [ + 1420, 1382, 661, 1043, 367, 700, 1064, 1132, 106, 1078, 1165, 1106, 1046, 1192, 1813, 1211, 1753, 987, + 894, 1230, 81, 248, 433, 373, 1291, 290, 665, 754, 923, 1638, 1062, 1452, 628, 203, 47, 1742, 1698 + ] + }, + { + "word": "geoyi", + "duration": 0.36, + "codes": [ + 771, 150, 1802, 484, 1318, 1210, 751, 1759, 1791, 1775, 1509, 1827, 922, 389, 770, 953, 863, 977, 133, + 1518, 769, 80, 201, 781, 112, 34, 125 + ] + }, + { + "word": "dareun", + "duration": 0.43, + "codes": [ + 4, 583, 189, 1207, 1540, 166, 604, 721, 1594, 1623, 674, 1820, 1226, 1089, 985, 1823, 1278, 1160, 996, + 1454, 186, 1260, 128, 322, 155, 955, 793, 208, 1052, 344, 1115, 281 + ] + }, + { + "word": "hyangsu", + "duration": 0.28, + "codes": [ + 798, 964, 1372, 581, 699, 976, 579, 972, 1369, 978, 1255, 1344, 629, 1401, 1831, 1641, 440, 1711, 1786, + 1331, 777 + ] + }, + { + "word": "an", + "duration": 0.15, + "codes": [1210, 427, 1331, 1226, 1686, 1278, 998, 970, 838, 1767, 1384] + }, + { + "word": "sseugo", + "duration": 0.36, + "codes": [ + 1339, 1339, 1510, 1552, 1583, 1527, 1748, 1637, 1546, 1751, 1290, 34, 1664, 1794, 977, 473, 1361, 804, + 633, 1346, 1360, 1490, 819, 17, 321, 1534, 811 + ] + }, + { + "word": "igeosman", + "duration": 0.88, + "codes": [ + 606, 700, 511, 317, 1560, 1060, 1625, 1406, 747, 729, 1425, 1433, 1619, 1239, 1294, 1545, 744, 1321, + 1595, 555, 1765, 1480, 751, 1740, 630, 805, 1248, 601, 1321, 1768, 1007, 1655, 710, 1023, 1525, 630, + 1242, 1100, 840, 1460, 1400, 198, 1237, 71, 1588, 1706, 590, 1720, 159, 1588, 613, 288, 1457, 803, 1312, + 1801, 1650, 617, 970, 412, 172, 294, 576, 1431, 1816, 1565 + ] + }, + { + "word": "sseosseoyo", + "duration": 0.4, + "codes": [ + 1757, 1667, 1535, 1709, 1213, 864, 1066, 61, 1678, 1810, 1724, 1701, 1615, 1714, 1737, 1673, 1788, 1786, + 1783, 1731, 907, 411, 803, 1730, 726, 9, 970, 1168, 994, 905 + ] + }, + { + "word": "nampyeonhago", + "duration": 0.76, + "codes": [ + 1162, 671, 516, 86, 861, 957, 758, 1114, 1750, 1157, 1321, 1144, 555, 1704, 1461, 591, 1623, 824, 1233, + 1385, 275, 584, 1045, 622, 254, 329, 294, 95, 430, 37, 794, 912, 1806, 1746, 1231, 872, 622, 707, 25, + 1305, 25, 1209, 802, 864, 662, 1100, 1203, 535, 115, 373, 1409, 1061, 1601, 1008, 130, 1308, 282 + ] + }, + { + "word": "jagnyeone", + "duration": 0.36, + "codes": [ + 809, 1548, 18, 1337, 1622, 1041, 1119, 913, 1005, 1267, 1723, 1799, 709, 1824, 1032, 1336, 1755, 818, + 1460, 1036, 1333, 1146, 1207, 328, 299, 429, 1759 + ] + }, + { + "word": "dokyo", + "duration": 0.32, + "codes": [ + 1707, 144, 1134, 1051, 1170, 1333, 437, 1492, 698, 935, 1798, 1299, 1248, 1628, 223, 1268, 1043, 1172, + 769, 1774, 1114, 422, 531, 1478 + ] + }, + { + "word": "yeohaeng", + "duration": 0.27, + "codes": [ + 65, 316, 192, 403, 1588, 991, 1200, 7, 537, 390, 1103, 45, 723, 1833, 407, 752, 1310, 732, 1581, 1495 + ] + }, + { + "word": "gassdaga", + "duration": 0.44, + "codes": [ + 1084, 1130, 955, 497, 1252, 902, 58, 1096, 1405, 1346, 58, 1144, 89, 578, 1137, 1437, 339, 291, 123, + 755, 907, 790, 1330, 776, 852, 1123, 1207, 1314, 958, 466, 588, 1736, 1101 + ] + }, + { + "word": "sihyangeul", + "duration": 0.68, + "codes": [ + 1245, 847, 809, 203, 997, 721, 1344, 1040, 1040, 1135, 354, 1156, 1132, 1397, 347, 797, 770, 1809, 1313, + 1726, 231, 1238, 1175, 1018, 1059, 368, 1061, 1565, 749, 1127, 1431, 586, 577, 498, 1199, 309, 1172, + 147, 772, 816, 336, 1450, 477, 1158, 878, 498, 621, 932, 344, 1082, 115 + ] + }, + { + "word": "haebogo", + "duration": 0.21, + "codes": [353, 181, 1032, 1775, 1694, 1114, 1731, 1222, 494, 1540, 1725, 1062, 1584, 1245, 991, 109] + }, + { + "word": "banhan", + "duration": 0.32, + "codes": [ + 1740, 469, 1621, 1420, 1297, 1178, 1831, 1178, 1459, 1620, 9, 1197, 1208, 1814, 1294, 1660, 1232, 904, + 366, 699, 1798, 282, 771, 1791 + ] + }, + { + "word": "jepuminde", + "duration": 0.6, + "codes": [ + 1645, 1655, 1518, 1743, 723, 592, 1173, 835, 1343, 693, 459, 1739, 378, 1701, 1226, 1359, 1511, 1032, + 1804, 1259, 698, 1249, 1697, 1530, 1678, 1140, 1590, 421, 1489, 909, 761, 1749, 1417, 1388, 1213, 663, + 8, 1065, 1187, 137, 723, 628, 1638, 958, 1086 + ] + } + ], + "language": "ko" +} diff --git a/examples/outetts/speakers/ko_male_1.json b/examples/outetts/speakers/ko_male_1.json new file mode 100644 index 000000000..30736ba95 --- /dev/null +++ b/examples/outetts/speakers/ko_male_1.json @@ -0,0 +1,137 @@ +{ + "text": " \uba85\ub2e8\uc5d0 \uc788\ub294 \ud559\uc0dd\ub4e4\uc740 \uc2e4\uc81c\ub85c \uc9c0\ub2a5\uc774 \ub192\uc9c0 \uc54a\uc558\uace0 \ubb34\uc791\uc704\ub85c \ubf51\ud78c \ud559\uc0dd\ub4e4\uc774\uc5c8\uae30 \ub54c\ubb38\uc785\ub2c8\ub2e4. \uc0ac\uc2e4\uc744 \ubab0\ub790\ub358 \uad50\uc0ac\ub4e4\uc740", + "words": [ + { + "word": "myeongdane", + "duration": 0.48, + "codes": [ + 151, 1274, 1665, 1231, 205, 713, 1368, 1078, 1155, 1015, 1301, 297, 1007, 297, 1765, 927, 593, 1364, + 653, 1664, 1613, 1563, 910, 944, 847, 39, 152, 248, 321, 1027, 318, 1093, 146, 1745, 254, 1103 + ] + }, + { + "word": "issneun", + "duration": 0.35, + "codes": [ + 69, 1001, 1744, 479, 1781, 536, 631, 1451, 1596, 1636, 503, 41, 1214, 1417, 1286, 1824, 1069, 1366, + 1690, 430, 1113, 611, 658, 761, 775, 1025 + ] + }, + { + "word": "hagsaengdeuleun", + "duration": 0.73, + "codes": [ + 13, 299, 607, 1633, 1447, 1756, 872, 1743, 1037, 1589, 1538, 1230, 713, 1691, 980, 344, 1375, 1061, 485, + 1013, 1147, 979, 809, 822, 1308, 783, 28, 1435, 1089, 1024, 1526, 440, 98, 1093, 786, 1689, 28, 787, + 1175, 205, 1708, 349, 763, 1326, 1120, 595, 211, 1415, 579, 600, 917, 178, 663, 940, 776 + ] + }, + { + "word": "siljero", + "duration": 0.92, + "codes": [ + 678, 59, 1345, 60, 1756, 776, 744, 501, 762, 606, 766, 227, 1011, 1157, 1080, 1669, 487, 762, 1479, 227, + 1305, 1248, 538, 1327, 673, 696, 544, 241, 1302, 1348, 1667, 919, 1707, 962, 1139, 1797, 596, 1677, 767, + 434, 1525, 1178, 644, 1488, 305, 191, 1761, 1241, 735, 785, 423, 538, 1681, 943, 1250, 1061, 1088, 532, + 1638, 282, 575, 1342, 1002, 935, 1344, 1280, 303, 108, 1286 + ] + }, + { + "word": "jineungi", + "duration": 0.55, + "codes": [ + 120, 143, 1700, 770, 1584, 1667, 423, 1510, 1652, 231, 581, 1583, 596, 1053, 1459, 769, 1225, 1825, 595, + 877, 750, 779, 1802, 1726, 1336, 178, 614, 1651, 549, 783, 1450, 882, 607, 1808, 1687, 1015, 940, 1470, + 543, 853, 195 + ] + }, + { + "word": "nopji", + "duration": 0.4, + "codes": [ + 1731, 856, 1654, 559, 1538, 1796, 1069, 1825, 876, 1463, 32, 443, 1408, 1218, 1764, 287, 538, 1760, + 1359, 566, 1631, 1313, 1035, 543, 788, 24, 1317, 1620, 263, 1312 + ] + }, + { + "word": "anhassgo", + "duration": 0.41, + "codes": [ + 222, 282, 958, 816, 800, 221, 361, 573, 1509, 864, 578, 374, 958, 1541, 467, 1110, 1063, 1013, 1410, + 1010, 151, 676, 256, 559, 1293, 1831, 1454, 1401, 319, 225, 217 + ] + }, + { + "word": "mujagwiro", + "duration": 1.41, + "codes": [ + 326, 1789, 1347, 1554, 1255, 861, 1246, 334, 624, 1595, 445, 1080, 1273, 458, 1319, 567, 241, 975, 538, + 496, 417, 94, 325, 538, 151, 765, 285, 1362, 673, 401, 975, 445, 1242, 275, 391, 862, 445, 975, 633, + 551, 325, 487, 766, 933, 673, 1652, 824, 567, 1623, 555, 799, 1428, 603, 899, 799, 566, 1151, 287, 420, + 401, 1352, 686, 567, 458, 343, 1672, 1524, 1272, 1683, 1346, 836, 1830, 1346, 376, 1078, 1805, 1674, + 817, 938, 1, 471, 536, 1280, 1171, 1344, 254, 367, 102, 116, 1161, 745, 1781, 363, 1782, 999, 1330, 232, + 318, 536, 1366, 1493, 583, 1394, 83, 946, 890 + ] + }, + { + "word": "bbobhin", + "duration": 0.45, + "codes": [ + 1674, 1684, 401, 316, 986, 379, 1474, 1401, 1453, 217, 1014, 557, 1217, 1218, 1249, 727, 199, 171, 795, + 31, 325, 1236, 1556, 854, 1726, 1827, 128, 1637, 455, 769, 831, 882, 177, 1221 + ] + }, + { + "word": "hagsaengdeulieossgi", + "duration": 1.03, + "codes": [ + 355, 1416, 1701, 709, 1636, 1639, 1239, 1448, 1309, 1752, 1135, 1794, 1238, 1160, 1786, 1031, 1796, 676, + 878, 1737, 849, 10, 1349, 817, 911, 1226, 684, 206, 970, 682, 308, 994, 1081, 962, 1833, 506, 1022, + 1302, 1467, 968, 1710, 903, 364, 268, 1004, 677, 1802, 232, 443, 949, 796, 1217, 575, 1403, 248, 1811, + 657, 1686, 992, 1753, 1121, 285, 1176, 1246, 1793, 561, 31, 325, 119, 265, 796, 1680, 750, 55, 645, 983, + 527 + ] + }, + { + "word": "ddaemunibnida", + "duration": 0.64, + "codes": [ + 203, 581, 681, 1798, 766, 223, 1262, 337, 337, 1528, 1416, 657, 549, 1399, 741, 581, 1056, 364, 617, + 1696, 1379, 961, 425, 1434, 1410, 617, 1748, 989, 326, 1370, 1212, 593, 1225, 898, 1590, 1422, 514, 154, + 711, 1488, 1644, 1433, 1136, 339, 1001, 1245, 1103, 1668 + ] + }, + { + "word": "sasileul", + "duration": 1.91, + "codes": [ + 1203, 814, 1421, 1203, 355, 1397, 1193, 91, 1481, 484, 585, 899, 391, 345, 257, 241, 20, 187, 20, 692, + 224, 16, 187, 851, 360, 224, 544, 187, 23, 360, 209, 16, 187, 23, 509, 94, 311, 94, 194, 417, 187, 311, + 285, 23, 417, 94, 311, 360, 72, 417, 187, 311, 285, 194, 417, 187, 311, 285, 72, 417, 187, 311, 360, 23, + 544, 187, 23, 285, 224, 417, 187, 311, 285, 224, 417, 94, 851, 445, 194, 16, 204, 851, 287, 209, 694, + 285, 16, 765, 209, 1151, 285, 311, 257, 224, 281, 445, 3, 1244, 288, 820, 1293, 896, 231, 895, 527, 714, + 1088, 782, 1238, 719, 1330, 756, 164, 291, 425, 346, 841, 611, 254, 841, 429, 276, 1088, 1587, 340, + 1519, 398, 1139, 248, 1598, 375, 1693, 593, 1241, 346, 443, 482, 384, 154, 768, 783, 37, 546 + ] + }, + { + "word": "molrassdeon", + "duration": 0.65, + "codes": [ + 1663, 272, 1482, 1513, 720, 1069, 368, 870, 757, 214, 856, 556, 1498, 539, 1221, 253, 898, 617, 631, + 1457, 472, 57, 206, 424, 462, 768, 382, 1506, 1419, 866, 601, 680, 566, 401, 1510, 1691, 364, 994, 217, + 790, 583, 182, 579, 770, 1394, 1766, 564, 1374, 1820 + ] + }, + { + "word": "gyosadeuleun", + "duration": 0.67, + "codes": [ + 444, 1439, 1375, 482, 1828, 1592, 1411, 605, 1828, 1423, 653, 1807, 1690, 557, 1625, 1286, 116, 1607, + 1253, 1607, 654, 990, 884, 1247, 1506, 1073, 248, 302, 656, 88, 1372, 639, 591, 1538, 1354, 374, 1147, + 383, 716, 1781, 609, 456, 927, 664, 1285, 1345, 1301, 1674, 1114, 1780 + ] + } + ], + "language": "ko" +} diff --git a/examples/outetts/speakers/ko_male_2.json b/examples/outetts/speakers/ko_male_2.json new file mode 100644 index 000000000..45a2d86fc --- /dev/null +++ b/examples/outetts/speakers/ko_male_2.json @@ -0,0 +1,128 @@ +{ + "text": " \uba70\uce60 \ud6c4, \ud654\uac00\ub09c \ubd80\uc790\uc5d0\uac8c \uadf8\ub9bc\uc744 \ubcf4\uc5ec\uc8fc\uc5c8\ub2e4. \uae30\ub2e4\ub9ac\ub358 \uadf8\ub9bc\uc744 \ubc1b\uc740 \ubd80\uc790\ub294 \ub108\ubb34\ub098 \uae30\ubed0\ud558\uba70 \uadf8\ub9bc\uc744 \ubcf4\uc558\ub2e4.", + "words": [ + { + "word": "myeocil", + "duration": 0.32, + "codes": [ + 561, 27, 1809, 1516, 268, 479, 181, 657, 407, 711, 642, 18, 1449, 1609, 18, 790, 135, 206, 780, 1389, + 628, 543, 1616, 506 + ] + }, + { + "word": "hu", + "duration": 0.08, + "codes": [1351, 595, 211, 1625, 532, 29] + }, + { + "word": "hwaganan", + "duration": 1.07, + "codes": [ + 1311, 1388, 682, 1435, 1323, 1373, 1063, 1594, 1746, 1684, 1765, 1716, 548, 301, 1121, 735, 1348, 710, + 601, 1719, 457, 465, 636, 1555, 691, 986, 765, 1536, 227, 824, 1244, 169, 459, 704, 548, 1214, 136, + 1181, 707, 396, 929, 1730, 1648, 1276, 1023, 676, 627, 1550, 1774, 1500, 1688, 1468, 961, 705, 271, 339, + 587, 565, 112, 320, 1180, 1650, 1608, 1267, 547, 452, 386, 407, 1823, 63, 282, 472, 245, 380, 876, 1590, + 1345, 1048, 184, 1263 + ] + }, + { + "word": "bujaege", + "duration": 0.57, + "codes": [ + 1575, 787, 1770, 1084, 1267, 1826, 1092, 193, 765, 1715, 985, 430, 1382, 1493, 1272, 184, 268, 182, 455, + 1712, 541, 399, 45, 678, 864, 430, 660, 276, 1073, 466, 263, 1136, 759, 178, 1581, 1617, 1711, 930, 407, + 768, 557, 190, 45 + ] + }, + { + "word": "geurimeul", + "duration": 0.48, + "codes": [ + 600, 79, 45, 599, 326, 1238, 895, 936, 1703, 624, 516, 736, 1492, 948, 790, 1389, 637, 1596, 245, 882, + 515, 198, 143, 395, 479, 262, 1663, 742, 1026, 1591, 218, 600, 289, 49, 621, 112 + ] + }, + { + "word": "boyeojueossda", + "duration": 0.59, + "codes": [ + 124, 1812, 1285, 1681, 1649, 1804, 1492, 952, 48, 47, 683, 143, 616, 886, 546, 303, 1618, 1734, 1217, + 358, 882, 1439, 621, 1674, 1113, 1081, 864, 1770, 234, 1160, 1622, 766, 1654, 1422, 1711, 275, 287, + 1781, 547, 1498, 76, 1143, 1200, 1330 + ] + }, + { + "word": "gidarideon", + "duration": 1.96, + "codes": [ + 1117, 1629, 1829, 1197, 1224, 1276, 1506, 1721, 1038, 223, 1108, 670, 727, 1142, 762, 1246, 1273, 458, + 694, 696, 275, 686, 287, 417, 465, 275, 704, 696, 606, 325, 538, 401, 766, 54, 1242, 487, 487, 862, 391, + 257, 795, 275, 1151, 287, 401, 325, 275, 1242, 712, 360, 1016, 54, 1151, 257, 445, 1570, 559, 401, 1016, + 534, 686, 585, 606, 325, 54, 704, 387, 360, 325, 487, 606, 1016, 534, 704, 567, 765, 862, 445, 975, 274, + 534, 686, 824, 538, 1244, 54, 1390, 585, 487, 1623, 567, 585, 1016, 566, 325, 766, 287, 1390, 54, 274, + 1244, 538, 1242, 986, 680, 862, 566, 1293, 995, 1288, 784, 1375, 694, 1474, 1278, 581, 1027, 592, 934, + 107, 780, 1728, 1692, 1588, 135, 116, 152, 153, 945, 137, 1544, 152, 189, 1420, 736, 1669, 1554, 1407, + 756, 135, 614, 536, 46, 1497, 748, 319, 807 + ] + }, + { + "word": "geurimeul", + "duration": 0.39, + "codes": [ + 1350, 1351, 1620, 820, 1783, 1481, 604, 1278, 979, 1645, 157, 529, 429, 213, 1518, 1733, 263, 181, 280, + 500, 911, 1778, 1335, 211, 433, 315, 357, 983, 536 + ] + }, + { + "word": "badeun", + "duration": 0.25, + "codes": [ + 1567, 1756, 1382, 791, 341, 682, 107, 536, 1334, 1522, 1633, 521, 200, 2, 1599, 1493, 1004, 1612, 1368 + ] + }, + { + "word": "bujaneun", + "duration": 0.52, + "codes": [ + 1542, 1594, 197, 949, 1444, 1293, 1273, 1721, 1247, 782, 1395, 1611, 1252, 1537, 1341, 268, 1753, 1018, + 836, 654, 10, 1287, 282, 456, 1519, 300, 853, 1439, 1251, 844, 291, 1287, 430, 374, 1336, 1830, 1751, + 917, 1750 + ] + }, + { + "word": "neomuna", + "duration": 0.92, + "codes": [ + 521, 1023, 1257, 239, 1819, 1464, 1481, 1454, 1406, 1479, 250, 313, 758, 1486, 1436, 572, 259, 669, 832, + 301, 869, 163, 1028, 717, 523, 1168, 470, 1305, 1305, 470, 1168, 717, 967, 1164, 396, 1367, 1343, 681, + 1282, 584, 1123, 1466, 1465, 1440, 766, 1408, 1782, 920, 125, 200, 609, 934, 1004, 1077, 123, 945, 1520, + 1503, 127, 98, 303, 1340, 1732, 52, 95, 429, 100, 1281, 100 + ] + }, + { + "word": "gibbeohamyeo", + "duration": 0.52, + "codes": [ + 903, 29, 344, 1683, 1011, 1500, 328, 340, 473, 104, 557, 104, 1549, 1564, 1114, 1142, 936, 1578, 1810, + 775, 940, 664, 268, 13, 112, 289, 65, 488, 29, 1416, 815, 1290, 1485, 455, 51, 488, 79, 687, 1161 + ] + }, + { + "word": "geurimeul", + "duration": 0.44, + "codes": [ + 794, 569, 214, 297, 716, 1273, 1428, 603, 1799, 1075, 1814, 650, 1492, 1497, 110, 143, 324, 350, 1620, + 384, 1217, 903, 863, 1729, 515, 803, 1492, 1690, 725, 153, 575, 1646, 1696 + ] + }, + { + "word": "boassda", + "duration": 0.32, + "codes": [ + 758, 1642, 1468, 1437, 53, 1369, 95, 1397, 753, 560, 1355, 1708, 1639, 1262, 603, 1289, 68, 975, 1300, + 1073, 179, 1126, 1252, 1206 + ] + } + ], + "language": "ko" +} diff --git a/examples/outetts/speakers/picard.json b/examples/outetts/speakers/picard.json new file mode 100644 index 000000000..2ba54196c --- /dev/null +++ b/examples/outetts/speakers/picard.json @@ -0,0 +1,1329 @@ +{ + "text": "also believes that the surviving officers are deliberately withholding vital information from this inquiry. Further investigation is recommended. Will, there was no further investigation. This report was classified and then it was quietly buried.", + "words": [ + { + "word": "also", + "duration": 0.43, + "codes": [ + 1746, + 1117, + 1408, + 1434, + 1397, + 1010, + 1671, + 1648, + 1210, + 1794, + 1347, + 922, + 203, + 1563, + 1517, + 225, + 949, + 1060, + 994, + 700, + 1793, + 1345, + 596, + 599, + 100, + 522, + 556, + 366, + 1777, + 1295, + 829, + 1002 + ] + }, + { + "word": "believes", + "duration": 0.68, + "codes": [ + 341, + 1640, + 1147, + 1000, + 1755, + 681, + 936, + 1767, + 1719, + 91, + 642, + 1407, + 1368, + 1718, + 653, + 1790, + 1130, + 987, + 1415, + 1794, + 1087, + 290, + 1762, + 1178, + 1082, + 1138, + 454, + 1313, + 1630, + 471, + 1289, + 1447, + 1139, + 729, + 1253, + 878, + 1312, + 1308, + 1393, + 1049, + 1497, + 1247, + 733, + 1677, + 1511, + 1353, + 932, + 756, + 1178, + 1651, + 69 + ] + }, + { + "word": "that", + "duration": 0.21, + "codes": [ + 292, + 1287, + 1439, + 438, + 1665, + 1631, + 1622, + 1001, + 798, + 155, + 1607, + 1275, + 239, + 1071, + 1405, + 1806 + ] + }, + { + "word": "the", + "duration": 0.19, + "codes": [ + 555, + 1556, + 1023, + 861, + 1699, + 1821, + 1146, + 368, + 1738, + 609, + 1081, + 931, + 533, + 1100 + ] + }, + { + "word": "surviving", + "duration": 0.72, + "codes": [ + 596, + 1745, + 1156, + 863, + 883, + 1700, + 1410, + 943, + 135, + 522, + 46, + 245, + 592, + 232, + 1256, + 1505, + 1042, + 1457, + 1454, + 1704, + 1500, + 1806, + 1088, + 28, + 1439, + 1001, + 1192, + 590, + 367, + 1042, + 1123, + 260, + 571, + 1051, + 475, + 324, + 387, + 93, + 687, + 470, + 1356, + 1681, + 627, + 1370, + 1716, + 1263, + 736, + 684, + 577, + 1494, + 1411, + 971, + 1454, + 1422 + ] + }, + { + "word": "officers", + "duration": 0.61, + "codes": [ + 464, + 1397, + 1412, + 1279, + 1407, + 115, + 880, + 88, + 907, + 1045, + 525, + 1821, + 1063, + 1801, + 0, + 1788, + 985, + 1422, + 1754, + 1126, + 1304, + 1607, + 375, + 1241, + 69, + 206, + 202, + 245, + 430, + 290, + 570, + 1815, + 1263, + 635, + 687, + 1444, + 1429, + 1004, + 1499, + 1281, + 1342, + 1060, + 1532, + 1180, + 1450, + 357 + ] + }, + { + "word": "are", + "duration": 0.35, + "codes": [ + 549, + 452, + 1402, + 1426, + 1423, + 891, + 1397, + 1380, + 1735, + 19, + 1262, + 1256, + 1126, + 1829, + 1331, + 1066, + 777, + 147, + 303, + 1420, + 1218, + 138, + 1286, + 737, + 1511, + 1076 + ] + }, + { + "word": "deliberately", + "duration": 0.68, + "codes": [ + 1489, + 1404, + 1062, + 1714, + 1788, + 1400, + 364, + 1392, + 79, + 1468, + 1274, + 1633, + 1630, + 803, + 920, + 663, + 1457, + 437, + 1710, + 1726, + 875, + 179, + 1468, + 898, + 1542, + 548, + 1144, + 1677, + 1440, + 1671, + 1820, + 959, + 1132, + 1789, + 600, + 755, + 1618, + 839, + 1306, + 1489, + 1010, + 1700, + 329, + 886, + 1646, + 1364, + 1767, + 1297, + 388, + 703, + 388 + ] + }, + { + "word": "withholding", + "duration": 0.63, + "codes": [ + 1607, + 594, + 1541, + 1809, + 31, + 1726, + 909, + 1077, + 901, + 894, + 1781, + 1826, + 1489, + 901, + 1805, + 1556, + 1203, + 1823, + 1677, + 1308, + 613, + 1280, + 1184, + 777, + 1398, + 894, + 893, + 1623, + 1115, + 1368, + 1817, + 994, + 1344, + 988, + 1803, + 1273, + 1032, + 346, + 1138, + 836, + 1287, + 1062, + 1462, + 1500, + 792, + 1743, + 1658 + ] + }, + { + "word": "vital", + "duration": 0.41, + "codes": [ + 86, + 1462, + 1809, + 1823, + 1002, + 1781, + 1436, + 1485, + 1258, + 569, + 1158, + 492, + 642, + 1186, + 1167, + 147, + 1565, + 922, + 549, + 1803, + 984, + 1714, + 1248, + 1333, + 400, + 1139, + 1287, + 1511, + 525, + 1593, + 1636 + ] + }, + { + "word": "information", + "duration": 0.6, + "codes": [ + 771, + 1758, + 1724, + 1037, + 599, + 1263, + 218, + 616, + 1824, + 1432, + 1179, + 1417, + 719, + 1760, + 493, + 22, + 1325, + 462, + 1356, + 1325, + 425, + 1396, + 1259, + 472, + 1462, + 382, + 291, + 907, + 796, + 1193, + 452, + 1368, + 1347, + 989, + 58, + 347, + 595, + 1253, + 463, + 400, + 1797, + 1320, + 792, + 1280, + 1743 + ] + }, + { + "word": "from", + "duration": 0.23, + "codes": [ + 804, + 1716, + 1160, + 1800, + 985, + 117, + 1313, + 1588, + 294, + 1356, + 801, + 352, + 1483, + 1032, + 798, + 1028, + 1381 + ] + }, + { + "word": "this", + "duration": 0.16, + "codes": [ + 926, + 1822, + 484, + 1651, + 1353, + 437, + 940, + 1407, + 1711, + 335, + 1088, + 901 + ] + }, + { + "word": "inquiry", + "duration": 0.45, + "codes": [ + 1736, + 664, + 640, + 248, + 1, + 974, + 1638, + 998, + 891, + 1511, + 970, + 1492, + 1658, + 923, + 827, + 1079, + 1720, + 1682, + 1291, + 941, + 79, + 1312, + 1395, + 583, + 951, + 870, + 894, + 1126, + 117, + 910, + 1459, + 400, + 1076, + 324 + ] + }, + { + "word": "further", + "duration": 0.87, + "codes": [ + 1731, + 58, + 1002, + 1071, + 1434, + 1671, + 1380, + 1177, + 1504, + 1683, + 1153, + 1092, + 1797, + 1714, + 805, + 1496, + 1321, + 1658, + 1632, + 1092, + 1652, + 1419, + 681, + 1621, + 1425, + 805, + 1665, + 792, + 1665, + 1809, + 406, + 1329, + 861, + 1822, + 1735, + 32, + 1632, + 805, + 681, + 1428, + 32, + 1777, + 1370, + 1737, + 927, + 1342, + 1718, + 1455, + 179, + 280, + 1235, + 1161, + 88, + 234, + 456, + 532, + 1411, + 1481, + 1705, + 1431, + 818, + 604, + 1791, + 225, + 1426 + ] + }, + { + "word": "investigation", + "duration": 0.73, + "codes": [ + 1705, + 1089, + 407, + 55, + 1506, + 1130, + 1010, + 1477, + 1805, + 987, + 841, + 324, + 39, + 326, + 1313, + 1448, + 1677, + 135, + 372, + 67, + 1476, + 862, + 1370, + 1636, + 855, + 208, + 319, + 1509, + 823, + 638, + 788, + 972, + 299, + 512, + 664, + 138, + 959, + 773, + 1720, + 1186, + 1345, + 1285, + 1074, + 715, + 1651, + 604, + 886, + 985, + 318, + 1830, + 1820, + 1795, + 1357, + 1638, + 1806 + ] + }, + { + "word": "is", + "duration": 0.23, + "codes": [ + 1598, + 1665, + 792, + 1683, + 1466, + 1062, + 1171, + 495, + 1063, + 430, + 1477, + 1423, + 1677, + 1801, + 430, + 440, + 1287 + ] + }, + { + "word": "recommended", + "duration": 0.6, + "codes": [ + 872, + 1819, + 1224, + 613, + 1235, + 222, + 371, + 721, + 1259, + 1478, + 717, + 1098, + 1769, + 1770, + 1358, + 552, + 1420, + 484, + 936, + 1686, + 1219, + 147, + 910, + 499, + 492, + 1134, + 1511, + 905, + 1415, + 1553, + 307, + 1831, + 1244, + 5, + 1399, + 1160, + 992, + 466, + 860, + 1304, + 1434, + 1421, + 651, + 1795, + 1408 + ] + }, + { + "word": "will", + "duration": 0.44, + "codes": [ + 985, + 1738, + 744, + 1205, + 121, + 1556, + 1053, + 1759, + 1143, + 1325, + 1808, + 1126, + 1822, + 1822, + 1752, + 891, + 1832, + 1699, + 891, + 1667, + 1373, + 326, + 1526, + 143, + 1323, + 1342, + 553, + 1697, + 483, + 720, + 1670, + 1219, + 1666 + ] + }, + { + "word": "there", + "duration": 0.16, + "codes": [ + 899, + 845, + 1100, + 1110, + 371, + 607, + 212, + 516, + 818, + 41, + 1011, + 1070 + ] + }, + { + "word": "was", + "duration": 0.16, + "codes": [ + 1605, + 626, + 105, + 1410, + 267, + 1295, + 797, + 770, + 519, + 1088, + 1061, + 863 + ] + }, + { + "word": "no", + "duration": 0.12, + "codes": [ + 1465, + 1301, + 1739, + 1662, + 647, + 570, + 352, + 1677, + 260 + ] + }, + { + "word": "further", + "duration": 0.43, + "codes": [ + 1357, + 182, + 655, + 779, + 1785, + 1321, + 1256, + 901, + 1685, + 827, + 1422, + 1450, + 400, + 1649, + 359, + 628, + 429, + 1325, + 595, + 456, + 117, + 260, + 1456, + 1447, + 566, + 1746, + 547, + 1801, + 159, + 1627, + 1257, + 1074 + ] + }, + { + "word": "investigation", + "duration": 0.77, + "codes": [ + 1068, + 565, + 1669, + 1155, + 1225, + 744, + 970, + 1741, + 1176, + 1741, + 1477, + 573, + 267, + 499, + 352, + 1438, + 239, + 1611, + 263, + 330, + 863, + 592, + 1631, + 1669, + 943, + 1743, + 446, + 1278, + 183, + 684, + 930, + 420, + 1504, + 393, + 24, + 39, + 248, + 305, + 1447, + 65, + 940, + 1155, + 854, + 1803, + 1438, + 596, + 1310, + 570, + 499, + 886, + 499, + 1351, + 1443, + 841, + 1493, + 1731, + 1432, + 1454 + ] + }, + { + "word": "this", + "duration": 0.27, + "codes": [ + 1057, + 1754, + 1071, + 1279, + 1373, + 1486, + 901, + 525, + 1542, + 1509, + 1823, + 232, + 1139, + 69, + 656, + 1337, + 330, + 927, + 276, + 366 + ] + }, + { + "word": "report", + "duration": 1.0, + "codes": [ + 1342, + 69, + 382, + 870, + 206, + 1088, + 357, + 318, + 1403, + 245, + 715, + 714, + 366, + 719, + 292, + 715, + 1741, + 499, + 1806, + 1735, + 1432, + 1553, + 1637, + 1495, + 1553, + 1638, + 1279, + 1807, + 1375, + 1456, + 1637, + 1099, + 1390, + 1306, + 1260, + 1141, + 25, + 639, + 894, + 768, + 1800, + 516, + 1028, + 1305, + 1704, + 1760, + 1428, + 1073, + 1261, + 1331, + 1415, + 951, + 1004, + 977, + 354, + 1560, + 1224, + 1049, + 182, + 149, + 775, + 1455, + 1044, + 1326, + 1477, + 1052, + 1477, + 1459, + 1381, + 1434, + 950, + 1251, + 1495, + 891, + 1553 + ] + }, + { + "word": "was", + "duration": 0.28, + "codes": [ + 1730, + 1741, + 330, + 599, + 218, + 299, + 1751, + 1282, + 1397, + 438, + 1556, + 1648, + 1294, + 1100, + 1237, + 1493, + 1607, + 366, + 780, + 366, + 1259 + ] + }, + { + "word": "classified", + "duration": 0.73, + "codes": [ + 1478, + 1793, + 939, + 627, + 847, + 172, + 498, + 315, + 1476, + 359, + 589, + 589, + 386, + 958, + 1435, + 1351, + 1325, + 1018, + 135, + 599, + 67, + 366, + 1450, + 1424, + 791, + 41, + 1742, + 1410, + 1802, + 715, + 1450, + 1496, + 239, + 708, + 1066, + 956, + 221, + 779, + 1177, + 524, + 545, + 1264, + 1040, + 816, + 926, + 1448, + 990, + 41, + 1015, + 788, + 1071, + 1745, + 1655, + 458, + 1247 + ] + }, + { + "word": "and", + "duration": 0.12, + "codes": [ + 1129, + 1307, + 1807, + 1157, + 1178, + 886, + 1233, + 1746, + 1639 + ] + }, + { + "word": "then", + "duration": 0.13, + "codes": [ + 1414, + 1638, + 1700, + 1126, + 665, + 1318, + 1677, + 1346, + 792, + 1241 + ] + }, + { + "word": "it", + "duration": 0.09, + "codes": [ + 1720, + 1483, + 1364, + 891, + 1770, + 957, + 727 + ] + }, + { + "word": "was", + "duration": 0.15, + "codes": [ + 1462, + 1117, + 1802, + 1197, + 1404, + 1518, + 366, + 604, + 1808, + 1314, + 805 + ] + }, + { + "word": "quietly", + "duration": 0.61, + "codes": [ + 1770, + 1621, + 1294, + 1667, + 957, + 1408, + 1801, + 1484, + 1298, + 1818, + 1718, + 22, + 1637, + 1485, + 1005, + 1258, + 306, + 880, + 1155, + 595, + 1462, + 955, + 556, + 1827, + 1125, + 1351, + 1483, + 1278, + 1818, + 1658, + 1632, + 681, + 1705, + 1360, + 121, + 1775, + 375, + 947, + 1790, + 1436, + 799, + 1486, + 344, + 1736, + 1631, + 1250 + ] + }, + { + "word": "buried", + "duration": 0.68, + "codes": [ + 1505, + 802, + 1668, + 1784, + 1707, + 627, + 1806, + 1623, + 1092, + 1623, + 1380, + 1767, + 1648, + 32, + 1779, + 1474, + 984, + 1694, + 1725, + 1373, + 1465, + 591, + 1758, + 186, + 441, + 1647, + 1400, + 841, + 604, + 1073, + 1183, + 1153, + 1661, + 1172, + 1279, + 1787, + 1208, + 1370, + 1237, + 947, + 990, + 1265, + 833, + 1799, + 1677, + 1760, + 681, + 1779, + 121, + 1373, + 1738 + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/examples/outetts/speakers/zh_female_1.json b/examples/outetts/speakers/zh_female_1.json new file mode 100644 index 000000000..765f2a970 --- /dev/null +++ b/examples/outetts/speakers/zh_female_1.json @@ -0,0 +1,283 @@ +{ + "text": "\u98df\u9053\u764c\u7b49\u7b49\u3002\u4e8c\u5341\u4e8c\u79cd\u91cd\u5927\u75be\u75c5\u7684\u8bdd\u4e0d\u4ec5\u53ef\u4ee5\u62a5\u9500\uff0c\u8fd8\u53ef\u4ee5\u4eab\u53d7\u5230\u989d\u5916\u6700\u9ad8\u5341\u4e07\u5143\u7684\u5927\u75c5\u8865\u52a9\u3002\u7b2c\u4e8c\u4e2a\u5c31\u662f\u62a5\u9500\u6bd4\u4f8b\u4e4b\u524d\u4e00\u4e03\u5e74\u90a3\u7248\u7684\u65f6\u5019\u3002", + "words": [ + { + "word": "shidao", + "duration": 0.63, + "codes": [ + 962, 1375, 1459, 11, 1735, 1139, 1061, 1671, 796, 1375, 1500, 889, 1752, 1370, 527, 1642, 890, 1222, + 1308, 1678, 1051, 1022, 1762, 1100, 143, 1323, 158, 1281, 1106, 1781, 1727, 1269, 101, 855, 1201, 446, + 1066, 826, 115, 1271, 497, 1309, 221, 818, 1149, 1200, 524 + ] + }, + { + "word": "ai", + "duration": 0.21, + "codes": [1183, 721, 1399, 956, 1356, 880, 1222, 393, 1095, 370, 942, 1247, 552, 873, 1457, 1403] + }, + { + "word": "deng", + "duration": 0.27, + "codes": [ + 955, 753, 1471, 1499, 1740, 776, 1080, 1512, 1642, 996, 871, 1063, 1345, 1731, 1501, 1409, 1506, 904, + 1301, 1807 + ] + }, + { + "word": "deng", + "duration": 0.48, + "codes": [ + 261, 1707, 633, 1142, 1614, 301, 1262, 933, 986, 1338, 121, 939, 1792, 566, 899, 1474, 799, 1157, 659, + 762, 377, 1609, 1423, 1407, 550, 130, 756, 1044, 794, 1017, 1698, 382, 1196, 840, 1108, 1358 + ] + }, + { + "word": "twenty", + "duration": 0.44, + "codes": [ + 1071, 1385, 1348, 509, 1771, 171, 334, 563, 984, 1245, 573, 934, 390, 1038, 461, 477, 1064, 1247, 477, + 1037, 519, 1772, 943, 788, 1735, 962, 1637, 701, 176, 977, 1038, 1235, 613 + ] + }, + { + "word": "two", + "duration": 0.08, + "codes": [1106, 1328, 565, 1111, 879, 441] + }, + { + "word": "zhongzhong", + "duration": 0.48, + "codes": [ + 841, 826, 422, 1159, 1099, 1437, 883, 1696, 1529, 1586, 1240, 1202, 1443, 1494, 894, 1433, 998, 1490, + 1489, 86, 1087, 1152, 1779, 59, 1246, 784, 906, 1119, 1402, 1757, 876, 130, 1762, 1353, 982, 905 + ] + }, + { + "word": "da", + "duration": 0.09, + "codes": [1800, 728, 663, 1791, 1492, 797, 963] + }, + { + "word": "jibing", + "duration": 0.43, + "codes": [ + 1183, 1060, 1110, 1198, 1078, 992, 1822, 1619, 1428, 1571, 784, 634, 1783, 1229, 771, 1544, 801, 32, + 1596, 1630, 414, 1550, 805, 892, 826, 1078, 1345, 1137, 965, 567, 1312, 987 + ] + }, + { + "word": "de", + "duration": 0.08, + "codes": [1545, 1706, 909, 1245, 818, 1494] + }, + { + "word": "huabu", + "duration": 0.59, + "codes": [ + 1483, 591, 1683, 1273, 795, 1601, 925, 1103, 883, 1001, 819, 1501, 1143, 1258, 998, 1200, 1640, 1504, + 929, 345, 1385, 939, 939, 1153, 1003, 1556, 1303, 1003, 1404, 957, 1118, 1809, 1262, 1114, 1273, 680, + 1273, 1262, 765, 1655, 1071, 825, 856, 737 + ] + }, + { + "word": "jinke", + "duration": 0.29, + "codes": [ + 856, 1456, 1825, 1563, 1350, 917, 489, 1464, 1217, 1124, 250, 1281, 80, 1792, 1659, 1299, 1160, 1400, + 1322, 679, 801, 594 + ] + }, + { + "word": "yi", + "duration": 0.15, + "codes": [1287, 759, 690, 322, 1563, 1797, 1460, 427, 671, 428, 1136] + }, + { + "word": "baoxiao", + "duration": 0.45, + "codes": [ + 437, 1323, 659, 17, 1555, 420, 659, 1666, 260, 819, 1505, 847, 718, 655, 466, 982, 1525, 1518, 1737, + 554, 415, 1565, 884, 906, 1356, 42, 466, 978, 797, 879, 446, 826, 1180, 446 + ] + }, + { + "word": "haike", + "duration": 0.6, + "codes": [ + 1165, 1333, 1686, 633, 964, 1478, 957, 1282, 1385, 1714, 19, 1465, 1434, 1738, 1648, 1117, 1705, 1448, + 1611, 680, 1279, 1246, 606, 1380, 633, 1130, 1153, 1823, 1196, 1324, 549, 997, 819, 1280, 782, 1256, + 1612, 414, 1500, 1436, 1752, 1500, 1022, 302, 225 + ] + }, + { + "word": "yi", + "duration": 0.11, + "codes": [982, 1376, 1364, 1501, 197, 909, 834, 789] + }, + { + "word": "xiangshou", + "duration": 0.44, + "codes": [ + 447, 882, 1111, 495, 1518, 1549, 1376, 731, 736, 1758, 9, 1198, 711, 1264, 1630, 955, 1317, 1167, 1710, + 1339, 1821, 10, 176, 788, 1720, 973, 690, 40, 1521, 910, 732, 864, 963 + ] + }, + { + "word": "dao", + "duration": 0.2, + "codes": [1826, 1239, 450, 1439, 589, 1131, 764, 871, 1277, 660, 594, 907, 1129, 580, 1487] + }, + { + "word": "ewai", + "duration": 0.37, + "codes": [ + 1169, 1158, 1782, 1118, 1358, 1063, 1087, 1792, 1344, 908, 621, 86, 705, 832, 753, 675, 594, 1755, 58, + 1281, 1047, 757, 518, 819, 1171, 512, 1177, 110 + ] + }, + { + "word": "zuigao", + "duration": 0.36, + "codes": [ + 720, 353, 1685, 1685, 928, 34, 906, 1210, 1501, 1140, 1111, 428, 1638, 969, 770, 60, 1619, 1697, 1475, + 1127, 818, 1368, 291, 1044, 1272, 1162, 1306 + ] + }, + { + "word": "ten", + "duration": 0.27, + "codes": [ + 1326, 1339, 1649, 736, 1088, 308, 1774, 962, 1146, 1738, 1061, 495, 640, 713, 1774, 321, 130, 1697, + 1670, 1252 + ] + }, + { + "word": "wan", + "duration": 0.16, + "codes": [779, 936, 907, 1137, 412, 1119, 1106, 142, 1268, 699, 772, 1166] + }, + { + "word": "yuan", + "duration": 0.19, + "codes": [825, 644, 754, 1600, 1431, 594, 864, 1065, 806, 1118, 903, 908, 1373, 1773] + }, + { + "word": "de", + "duration": 0.09, + "codes": [728, 845, 973, 1063, 1087, 382, 1275] + }, + { + "word": "dabing", + "duration": 0.4, + "codes": [ + 1124, 1747, 343, 712, 686, 939, 1097, 642, 934, 580, 130, 1197, 1088, 874, 492, 531, 1790, 1555, 1246, + 933, 1505, 913, 489, 893, 1546, 1830, 894, 40, 1811, 1488 + ] + }, + { + "word": "buzhu", + "duration": 0.31, + "codes": [ + 663, 1676, 64, 505, 377, 1294, 1076, 828, 1417, 729, 1060, 1555, 776, 933, 1290, 654, 1078, 1065, 1317, + 29, 942, 149, 1596 + ] + }, + { + "word": "di", + "duration": 0.49, + "codes": [ + 992, 1463, 1802, 1144, 1803, 710, 287, 1428, 1157, 1716, 1130, 923, 1714, 1230, 516, 1059, 1799, 1199, + 1412, 513, 1223, 1327, 513, 1668, 377, 735, 758, 1080, 939, 710, 509, 1657, 1726, 1202, 1665, 441, 836 + ] + }, + { + "word": "two", + "duration": 0.12, + "codes": [1168, 260, 1208, 1220, 248, 1183, 1201, 1169, 1197] + }, + { + "word": "gejiu", + "duration": 0.39, + "codes": [ + 76, 880, 531, 656, 390, 1046, 1228, 1280, 988, 1545, 734, 813, 864, 1501, 774, 116, 472, 916, 328, 1379, + 1425, 1597, 15, 1077, 1100, 1218, 1202, 1296, 1736 + ] + }, + { + "word": "shi", + "duration": 0.13, + "codes": [1312, 1267, 1750, 11, 554, 1057, 1532, 1127, 621, 130] + }, + { + "word": "baoxiao", + "duration": 0.39, + "codes": [ + 1808, 1581, 5, 1614, 436, 696, 1086, 294, 754, 996, 632, 594, 892, 897, 984, 175, 1624, 1580, 4, 1624, + 1207, 1385, 446, 492, 735, 524, 879, 1078, 1060 + ] + }, + { + "word": "bili", + "duration": 0.32, + "codes": [ + 712, 1740, 1266, 1614, 1123, 1308, 940, 1052, 834, 1375, 1668, 1825, 1360, 1304, 1285, 1234, 1581, 1700, + 894, 1518, 836, 735, 877, 1287 + ] + }, + { + "word": "zhi", + "duration": 0.4, + "codes": [ + 982, 945, 1652, 1251, 1669, 504, 1003, 1404, 624, 1404, 1306, 751, 1410, 1465, 1079, 1496, 751, 1803, + 1474, 651, 1525, 986, 680, 1527, 1580, 756, 1333, 521, 405, 860 + ] + }, + { + "word": "qian", + "duration": 0.24, + "codes": [1078, 1137, 577, 1636, 1547, 1552, 368, 93, 674, 57, 477, 753, 1232, 1064, 117, 798, 395, 1295] + }, + { + "word": "one", + "duration": 0.12, + "codes": [717, 1713, 705, 898, 325, 628, 717, 47, 1436] + }, + { + "word": "seven", + "duration": 0.31, + "codes": [ + 955, 639, 1086, 1106, 973, 1617, 1783, 1664, 1580, 1565, 0, 1513, 788, 233, 298, 1477, 1193, 1361, 1166, + 1457, 1115, 188, 1499 + ] + }, + { + "word": "nian", + "duration": 0.2, + "codes": [1599, 580, 1663, 1486, 1366, 466, 908, 759, 1106, 609, 879, 886, 825, 188, 1776] + }, + { + "word": "na", + "duration": 0.09, + "codes": [1759, 1365, 120, 982, 1403, 1139, 759] + }, + { + "word": "ban", + "duration": 0.2, + "codes": [675, 1311, 1018, 1109, 272, 1692, 959, 149, 573, 1187, 1263, 1089, 1541, 1424, 1766] + }, + { + "word": "de", + "duration": 0.12, + "codes": [1697, 1414, 1084, 1757, 832, 1001, 1801, 942, 0] + }, + { + "word": "shihou", + "duration": 0.39, + "codes": [ + 1014, 1779, 749, 1734, 1082, 750, 1738, 746, 1600, 713, 1820, 1253, 982, 1353, 1260, 1257, 1462, 1049, + 1794, 1103, 1775, 992, 1224, 1017, 1022, 942, 1081, 1484, 994 + ] + } + ], + "language": "zh" +} diff --git a/examples/outetts/speakers/zh_male_1.json b/examples/outetts/speakers/zh_male_1.json new file mode 100644 index 000000000..0de88f705 --- /dev/null +++ b/examples/outetts/speakers/zh_male_1.json @@ -0,0 +1,234 @@ +{ + "text": "\u8fd1\u65e5\uff0c\u963f\u91cc\u5168\u8d44\u6536\u8d2d\u997f\u4e86\u4e48\u7684\u6d88\u606f\u6380\u8d77\u4e86\u4e0d\u5c0f\u6d6a\u82b1\u3002\u636e\u79f0\u4e09\u4e2a\u6708\u5185\uff0c\u963f\u91cc\u5c06\u6309\u7167\u4e5d\u5341\u4e94\u4ebf\u7684\u4ef7\u683c\u6536\u8d2d\u997f\u4e86\u4e48\u5168\u90e8\u80a1\u4efd\u3002", + "words": [ + { + "word": "jinri", + "duration": 0.32, + "codes": [ + 1536, 1246, 939, 1703, 1704, 930, 253, 18, 372, 794, 616, 689, 656, 1440, 51, 1024, 406, 946, 889, 1572, + 1268, 999, 982, 1104 + ] + }, + { + "word": "ali", + "duration": 0.72, + "codes": [ + 1611, 727, 1390, 442, 1142, 795, 1352, 1130, 1279, 1007, 1059, 1348, 1495, 1495, 1419, 1170, 1667, 1432, + 1040, 1751, 1395, 1434, 1751, 1098, 1777, 1817, 1177, 1118, 1397, 869, 1510, 1638, 538, 861, 1174, 997, + 321, 173, 83, 271, 159, 520, 106, 520, 236, 609, 1686, 863, 1589, 1726, 263, 700, 1472, 528 + ] + }, + { + "word": "quan", + "duration": 0.31, + "codes": [ + 1281, 645, 1762, 157, 459, 1534, 231, 633, 1667, 62, 1351, 232, 769, 239, 927, 1499, 535, 443, 330, + 1471, 746, 787, 1540 + ] + }, + { + "word": "zishou", + "duration": 0.4, + "codes": [ + 1518, 1497, 994, 1709, 1570, 1660, 543, 1376, 1300, 1217, 1382, 98, 688, 376, 780, 1823, 1800, 366, + 1340, 358, 499, 713, 609, 871, 576, 725, 934, 148, 455, 1286 + ] + }, + { + "word": "goue", + "duration": 0.29, + "codes": [ + 1675, 1508, 1672, 1548, 1790, 1001, 443, 352, 716, 1660, 210, 1540, 1308, 1131, 1740, 1749, 288, 1721, + 1812, 755, 853, 152 + ] + }, + { + "word": "le", + "duration": 0.16, + "codes": [1060, 143, 665, 779, 191, 57, 184, 1019, 1784, 1041, 374, 1720] + }, + { + "word": "mede", + "duration": 0.23, + "codes": [303, 1829, 1236, 92, 744, 1081, 868, 1204, 1032, 1056, 599, 859, 1242, 1399, 1144, 1113, 384] + }, + { + "word": "xiaoxi", + "duration": 0.4, + "codes": [ + 1804, 821, 1800, 62, 356, 1451, 10, 1325, 1731, 535, 475, 1053, 210, 1804, 306, 779, 1823, 93, 1472, + 788, 949, 26, 780, 0, 24, 556, 1519, 844, 902, 932 + ] + }, + { + "word": "xianqi", + "duration": 0.4, + "codes": [ + 253, 1741, 1401, 1139, 1668, 1833, 919, 1422, 340, 308, 1270, 556, 1823, 65, 1365, 305, 825, 701, 1136, + 1233, 1439, 1388, 652, 1283, 1364, 24, 1379, 1175, 1434, 1249 + ] + }, + { + "word": "le", + "duration": 0.12, + "codes": [346, 1810, 1393, 1148, 1048, 218, 797, 1803, 1047] + }, + { + "word": "bu", + "duration": 0.12, + "codes": [1718, 986, 220, 1322, 1812, 1651, 1252, 503, 1708] + }, + { + "word": "xiaolang", + "duration": 0.4, + "codes": [ + 1720, 36, 1605, 276, 863, 350, 335, 1718, 997, 1072, 1111, 969, 1105, 1801, 1743, 1484, 716, 1705, 716, + 860, 1304, 446, 1317, 107, 1201, 1323, 1661, 1294, 1152, 1465 + ] + }, + { + "word": "hua", + "duration": 0.19, + "codes": [1801, 1810, 1375, 1532, 1710, 1637, 1599, 1726, 1109, 1804, 210, 491, 1174, 115] + }, + { + "word": "ju", + "duration": 0.73, + "codes": [ + 411, 1117, 565, 225, 1309, 1212, 240, 1322, 710, 487, 1033, 740, 744, 1121, 17, 1108, 1754, 261, 1037, + 899, 5, 1028, 624, 1305, 1130, 805, 1134, 1496, 1749, 1153, 1788, 1797, 1329, 1478, 1074, 1355, 1738, + 833, 1255, 591, 1338, 534, 1408, 820, 986, 1262, 1302, 805, 308, 1, 556, 357, 302, 570, 1476 + ] + }, + { + "word": "cheng", + "duration": 0.27, + "codes": [ + 1431, 135, 312, 888, 1007, 1535, 135, 592, 318, 1261, 1304, 1370, 422, 1008, 1450, 467, 917, 1748, 1462, + 1236 + ] + }, + { + "word": "three", + "duration": 0.56, + "codes": [ + 1747, 265, 861, 957, 606, 923, 964, 27, 1668, 1130, 476, 1211, 976, 572, 1367, 1255, 891, 1130, 799, + 1211, 1011, 1266, 1658, 1568, 1441, 400, 18, 368, 943, 848, 9, 1192, 140, 371, 155, 192, 738, 1591, + 1022, 1456, 713, 1710 + ] + }, + { + "word": "geyue", + "duration": 0.2, + "codes": [1251, 282, 138, 671, 315, 631, 804, 1777, 357, 134, 1814, 100, 804, 162, 1234] + }, + { + "word": "nei", + "duration": 0.24, + "codes": [157, 599, 946, 890, 1626, 807, 1832, 138, 900, 270, 1420, 635, 687, 383, 1200, 1400, 178, 102] + }, + { + "word": "ali", + "duration": 0.41, + "codes": [ + 1808, 715, 1301, 1073, 888, 391, 1793, 1142, 567, 1385, 1437, 339, 699, 557, 393, 73, 1792, 25, 871, + 1457, 81, 671, 1296, 57, 1157, 1452, 597, 1030, 1451, 318, 702 + ] + }, + { + "word": "jiang", + "duration": 0.37, + "codes": [ + 47, 980, 301, 1409, 14, 1787, 1009, 1742, 157, 1528, 1246, 1724, 335, 335, 7, 1308, 969, 1143, 1194, + 711, 791, 200, 158, 312, 1649, 1173, 1315, 1466 + ] + }, + { + "word": "an", + "duration": 0.17, + "codes": [900, 824, 1815, 1456, 1235, 339, 520, 408, 1156, 846, 901, 835, 634] + }, + { + "word": "zhao", + "duration": 0.2, + "codes": [1449, 1439, 1299, 1679, 461, 1079, 142, 755, 1129, 1207, 1068, 725, 1180, 1369, 1677] + }, + { + "word": "ninety", + "duration": 0.28, + "codes": [ + 1821, 975, 767, 493, 592, 262, 1422, 382, 116, 1713, 123, 1651, 229, 197, 1004, 1671, 437, 1335, 1447, + 1625, 299 + ] + }, + { + "word": "five", + "duration": 0.27, + "codes": [ + 206, 595, 366, 164, 1088, 371, 1401, 856, 143, 1409, 917, 355, 906, 1062, 1625, 1732, 1545, 1607, 832, + 1472 + ] + }, + { + "word": "yide", + "duration": 0.25, + "codes": [753, 1450, 758, 47, 760, 1794, 1350, 1615, 7, 1762, 57, 232, 1819, 1695, 1710, 1737, 53, 989, 152] + }, + { + "word": "jiage", + "duration": 0.36, + "codes": [ + 1379, 1409, 1527, 1518, 10, 69, 944, 1115, 966, 1143, 1424, 1197, 339, 607, 1125, 303, 944, 1622, 1756, + 1543, 1614, 1054, 1647, 1663, 969, 971, 952 + ] + }, + { + "word": "shou", + "duration": 0.31, + "codes": [ + 47, 1671, 1590, 467, 1668, 961, 1279, 1406, 979, 218, 1382, 366, 782, 769, 1206, 1726, 142, 703, 1633, + 359, 1756, 408, 1595 + ] + }, + { + "word": "goue", + "duration": 0.32, + "codes": [ + 1413, 1538, 1511, 825, 58, 321, 775, 942, 1243, 286, 1122, 1464, 1745, 939, 287, 1037, 401, 1831, 1499, + 1533, 892, 611, 217, 577 + ] + }, + { + "word": "le", + "duration": 0.17, + "codes": [1089, 1141, 1462, 461, 341, 1524, 95, 1755, 1581, 234, 1174, 1032, 903] + }, + { + "word": "me", + "duration": 0.12, + "codes": [1542, 1593, 1764, 729, 1463, 847, 1218, 1312, 1183] + }, + { + "word": "quanbu", + "duration": 0.39, + "codes": [ + 1726, 1125, 1045, 1708, 731, 686, 178, 340, 1325, 434, 788, 1680, 1429, 498, 269, 65, 516, 1345, 1058, + 1503, 1535, 1006, 1756, 1737, 1340, 1335, 932, 348, 1124 + ] + }, + { + "word": "gu", + "duration": 0.19, + "codes": [1445, 1622, 1377, 950, 27, 1536, 1794, 1816, 1251, 1755, 1291, 1610, 1815, 1049] + }, + { + "word": "fen", + "duration": 0.28, + "codes": [ + 888, 923, 1433, 1771, 1740, 1436, 231, 719, 999, 1642, 1637, 1156, 1204, 1275, 882, 1235, 1471, 1355, + 1675, 387, 438 + ] + } + ], + "language": "zh" +} diff --git a/examples/outetts/vc1.png b/examples/outetts/vc1.png new file mode 100644 index 000000000..48fc50800 Binary files /dev/null and b/examples/outetts/vc1.png differ diff --git a/examples/outetts/vc2.png b/examples/outetts/vc2.png new file mode 100644 index 000000000..dae88a15d Binary files /dev/null and b/examples/outetts/vc2.png differ diff --git a/examples/outetts/voice_cloning.py b/examples/outetts/voice_cloning.py new file mode 100644 index 000000000..804eacc33 --- /dev/null +++ b/examples/outetts/voice_cloning.py @@ -0,0 +1,22 @@ +import outetts +print("Speaker JSON creation for Voice Cloning for OuteTTS...") + +model_config = outetts.HFModelConfig_v1( + model_path="OuteAI/OuteTTS-0.2-500M", + language="en", # Supported languages in v0.2: en, zh, ja, ko +) + +interface = outetts.InterfaceHF(model_version="0.2", cfg=model_config) + +speaker = interface.create_speaker( + audio_path="/path/to/audio.wav", + + # If transcript is not provided, it will be automatically transcribed using Whisper + transcript=None, # Set to None to use Whisper for transcription + + whisper_model="turbo", # Optional: specify Whisper model (default: "turbo") + whisper_device=None, # Optional: specify device for Whisper (default: None) +) + +interface.save_speaker(speaker, "speaker_output.json") +print("Speaker JSON saved!") \ No newline at end of file diff --git a/expose.h b/expose.h index 7cbe4a3a1..9a8b6e643 100644 --- a/expose.h +++ b/expose.h @@ -226,6 +226,8 @@ struct tts_generation_inputs const char * prompt = nullptr; const int speaker_seed = 0; const int audio_seed = 0; + const char * custom_speaker_text = ""; + const char * custom_speaker_data = ""; }; struct tts_generation_outputs { diff --git a/klite.embd b/klite.embd index 2244e68c6..558ca405e 100644 --- a/klite.embd +++ b/klite.embd @@ -3077,6 +3077,7 @@ Current version indicated by LITEVER below. var pending_storyjson_autosave = null; var mainmenu_is_untab = false; var websearch_in_progress = false; + var kcpp_tts_json = ""; var localsettings = { my_api_key: "0000000000", //put here so it can be saved and loaded in persistent mode @@ -3141,6 +3142,8 @@ Current version indicated by LITEVER below. persist_session: true, speech_synth: 0, //0 is disabled, 1000 is xtts xtts_voice: "female_calm", + kcpp_tts_voice: "kobo", + kcpp_tts_json: "", beep_on: false, notify_on: false, narrate_both_sides: false, @@ -9656,6 +9659,7 @@ Current version indicated by LITEVER below. kai_json_load(tmpstory, false); } } + update_for_sidepanel(); if(koboldcpp_has_multiplayer || koboldcpp_admin_type>0) { //force refresh @@ -11094,6 +11098,8 @@ Current version indicated by LITEVER below. } document.getElementById("ttsselect").innerHTML = ttshtml; document.getElementById("ttsselect").value = localsettings.speech_synth; + document.getElementById("kcpp_tts_voice").value = localsettings.kcpp_tts_voice; + kcpp_tts_json = localsettings.kcpp_tts_json; toggle_tts_mode(); document.getElementById("beep_on").checked = localsettings.beep_on; document.getElementById("notify_on").checked = localsettings.notify_on; @@ -11473,6 +11479,8 @@ Current version indicated by LITEVER below. localsettings.speech_synth = document.getElementById("ttsselect").value; localsettings.xtts_voice = document.getElementById("xtts_voices").value; + localsettings.kcpp_tts_voice = document.getElementById("kcpp_tts_voice").value; + localsettings.kcpp_tts_json = kcpp_tts_json; localsettings.beep_on = (document.getElementById("beep_on").checked?true:false); localsettings.notify_on = (document.getElementById("notify_on").checked?true:false); no_escape_html = (document.getElementById("no_escape_html").checked?true:false); @@ -12279,6 +12287,27 @@ Current version indicated by LITEVER below. }); } + function set_voice_clone() + { + inputBoxOkCancel("Set the Voice Clone JSON to clone an existing voice.

You can download existing voice clone JSONs, or make your own.
","Apply Voice Clone JSON",kcpp_tts_json,"Paste JSON Here",()=>{ + let userinput = getInputBoxValue().trim(); + try + { + kcpp_tts_json = ""; + if(userinput!="") + { + kcpp_tts_json = JSON.stringify(JSON.parse(userinput)); + } + } catch (e) { + console.log("Voice clone not correctly formatted!"); + } + + }, + ()=>{ + //do nothing on cancel + },true,true); + } + function restore_retried_text() { if(retry_in_progress) @@ -13083,6 +13112,11 @@ Current version indicated by LITEVER below. } else { document.getElementById("kcpp_tts_voice_custom").classList.add("hidden"); } + if (document.getElementById("kcpp_tts_voice").value == "voiceclone") { + document.getElementById("kcpp_tts_voice_clone").classList.remove("hidden"); + } else { + document.getElementById("kcpp_tts_voice_clone").classList.add("hidden"); + } } @@ -13135,10 +13169,12 @@ Current version indicated by LITEVER below. } let ssval = localsettings.speech_synth; let ssrate = localsettings.tts_speed; + let vcjson = localsettings.kcpp_tts_json; if(speech_synth_override!=null) { ssval = speech_synth_override; ssrate = document.getElementById("tts_speed").value; + vcjson = kcpp_tts_json; } if(localsettings.narrate_only_dialog) { @@ -13190,11 +13226,17 @@ Current version indicated by LITEVER below. }; } else { sub_endpt = apply_proxy_url(custom_kobold_endpoint + koboldcpp_tts_endpoint); + let is_voiceclone = (document.getElementById("kcpp_tts_voice").value == "voiceclone"); + let is_custom = (document.getElementById("kcpp_tts_voice").value == "custom"); payload = { "input": text, - "voice": (document.getElementById("kcpp_tts_voice").value == "custom")?document.getElementById("kcpp_tts_voice_custom").value:document.getElementById("kcpp_tts_voice").value + "voice": (is_custom)?document.getElementById("kcpp_tts_voice_custom").value:document.getElementById("kcpp_tts_voice").value }; + if(is_voiceclone && vcjson) + { + payload.speaker_json = vcjson; + } ttsheaders = get_kobold_header(); } @@ -14574,7 +14616,8 @@ Current version indicated by LITEVER below. } let is_using_o1 = custom_oai_model.toLowerCase().startsWith("o1-") || custom_oai_model.toLowerCase()=="o1" || custom_oai_model.toLowerCase().startsWith("o3-") || custom_oai_model.toLowerCase()=="o3"; - if(is_using_o1) + let is_using_4o_search = custom_oai_model.toLowerCase().includes("-search-preview"); + if(is_using_o1 || is_using_4o_search) { //o1 does not support ANY customization oai_payload = @@ -21548,8 +21591,10 @@ Current version indicated by LITEVER below. + - + + diff --git a/koboldcpp.py b/koboldcpp.py index 6c3b33a7e..f6e0ba25f 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -310,7 +310,9 @@ class tts_load_model_inputs(ctypes.Structure): class tts_generation_inputs(ctypes.Structure): _fields_ = [("prompt", ctypes.c_char_p), ("speaker_seed", ctypes.c_int), - ("audio_seed", ctypes.c_int)] + ("audio_seed", ctypes.c_int), + ("custom_speaker_text", ctypes.c_char_p), + ("custom_speaker_data", ctypes.c_char_p)] class tts_generation_outputs(ctypes.Structure): _fields_ = [("status", ctypes.c_int), @@ -1501,11 +1503,34 @@ def tts_load_model(ttc_model_filename,cts_model_filename): ret = handle.tts_load_model(inputs) return ret +def tts_prepare_voice_json(jsonstr): + try: + if not jsonstr: + return None + parsed_json = json.loads(jsonstr) + txt = parsed_json.get("text","") + items = parsed_json.get("words",[]) + processed = "" + if txt=="" or not items or len(items)<1: + return None + for item in items: + word = item.get("word","") + duration = item.get("duration","") + codes = item.get("codes",[]) + codestr = "" + for c in codes: + codestr += f"<|{c}|>" + processed += f"{word}<|t_{duration:.2f}|><|code_start|>{codestr}<|code_end|>\n" + return {"phrase":txt.strip()+".","voice":processed.strip()} + except Exception: + return None + def tts_generate(genparams): global args prompt = genparams.get("input", genparams.get("text", "")) prompt = prompt.strip() voice = 1 + speaker_json = tts_prepare_voice_json(genparams.get("speaker_json","")) #handle custom cloned voices voicestr = genparams.get("voice", genparams.get("speaker_wav", "")) voice_mapping = ["kobo","cheery","sleepy","shouty","chatty"] normalized_voice = voicestr.strip().lower() if voicestr else "" @@ -1522,6 +1547,13 @@ def tts_generate(genparams): except Exception: aseed = -1 inputs.audio_seed = aseed + if speaker_json: + inputs.custom_speaker_text = speaker_json.get("phrase","").encode("UTF-8") + inputs.custom_speaker_data = speaker_json.get("voice","").encode("UTF-8") + inputs.speaker_seed = 100 + else: + inputs.custom_speaker_text = "".encode("UTF-8") + inputs.custom_speaker_data = "".encode("UTF-8") ret = handle.tts_generate(inputs) outstr = "" if ret.status==1: diff --git a/otherarch/tts_adapter.cpp b/otherarch/tts_adapter.cpp index 6414d6390..3d1e5b987 100644 --- a/otherarch/tts_adapter.cpp +++ b/otherarch/tts_adapter.cpp @@ -474,6 +474,7 @@ static int last_generation_settings_speaker_seed; static int last_generation_settings_audio_seed; static std::vector last_speaker_codes; //will store cached speaker static int last_speaker_seed = -999; +static std::string last_speaker_data = ""; static int cts_offset = 151672; static int space_id = 151670; static int code_terminate_id = 151670; @@ -613,7 +614,9 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs) const llama_vocab * ctsvocab = llama_model_get_vocab(model_cts); const int ttc_n_vocab = llama_vocab_n_tokens(ttcvocab); std::string prompt = inputs.prompt; - const std::string sampletext = process_text("but that is what it is",ttsver); + std::string custom_speaker_text = inputs.custom_speaker_text; + std::string custom_speaker_data = inputs.custom_speaker_data; + const std::string sampletext = (custom_speaker_text=="")?process_text("but that is what it is",ttsver):process_text(custom_speaker_text,ttsver); // process prompt and generate voice codes llama_kv_cache_clear(ttc_ctx); @@ -681,13 +684,21 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs) if(speaker_seed>0) //first pass { //if we have a cached speaker, reuse it - if(last_speaker_seed==speaker_seed && !last_speaker_codes.empty()) + if(last_speaker_seed==speaker_seed && !last_speaker_codes.empty() && custom_speaker_data==last_speaker_data) { //able to proceed, do nothing if(!tts_is_quiet && ttsdebugmode==1) { printf("\nReuse speaker ID=%d (%d tokens)...", last_speaker_seed, last_speaker_codes.size()); } + } else if (custom_speaker_data!="" && custom_speaker_text!="") { //custom speaker json + std::string speaker = format_audiotokens(custom_speaker_data,ttsver); + last_speaker_codes = common_tokenize(ttcvocab, speaker, false, true); + last_speaker_seed = speaker_seed; + if(!tts_is_quiet && ttsdebugmode==1) + { + printf("\nCustom Speaker JSON (%d tokens)...", last_speaker_seed, last_speaker_codes.size()); + } } else if (speaker_seed>=1 && speaker_seed<=5){ //special seeds std::string speaker = ""; switch(speaker_seed) @@ -699,7 +710,7 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs) speaker = format_audiotokens("but<|t_0.45|><|code_start|><|920|><|1824|><|1138|><|1387|><|1096|><|1712|><|1642|><|810|><|1685|><|620|><|954|><|584|><|23|><|1467|><|509|><|659|><|1598|><|465|><|567|><|1440|><|3|><|476|><|740|><|288|><|419|><|1440|><|1477|><|254|><|25|><|811|><|882|><|476|><|246|><|246|><|code_end|>\nthat<|t_0.17|><|code_start|><|419|><|1690|><|208|><|1044|><|300|><|1100|><|375|><|1222|><|371|><|1045|><|637|><|1719|><|314|><|code_end|>\nis<|t_0.12|><|code_start|><|319|><|1131|><|794|><|1103|><|1296|><|1615|><|1587|><|233|><|863|><|code_end|>\nwhat<|t_0.16|><|code_start|><|793|><|902|><|391|><|946|><|437|><|95|><|1133|><|110|><|58|><|853|><|1283|><|449|><|code_end|>\nit<|t_0.12|><|code_start|><|774|><|239|><|974|><|213|><|1095|><|1612|><|101|><|1569|><|882|><|code_end|>\nis<|t_0.32|><|code_start|><|1131|><|529|><|1144|><|774|><|1114|><|483|><|693|><|648|><|1112|><|1470|><|1112|><|319|><|1294|><|1417|><|1660|><|729|><|1789|><|1413|><|1728|><|554|><|273|><|736|><|640|><|1549|><|code_end|>",ttsver); break; case 3: - speaker = format_audiotokens("but<|t_0.21|><|code_start|><|348|><|1776|><|1620|><|1262|><|118|><|288|><|258|><|1407|><|1331|><|1102|><|664|><|1300|><|1647|><|1536|><|71|><|23|><|code_end|> \nthat<|t_0.19|><|code_start|><|3|><|1740|><|1253|><|1122|><|549|><|715|><|718|><|657|><|1136|><|1247|><|517|><|1333|><|815|><|634|><|code_end|>\nis<|t_0.12|><|code_start|><|1330|><|839|><|753|><|1826|><|1602|><|50|><|1441|><|889|><|948|><|code_end|>\nwhat<|t_0.16|><|code_start|><|899|><|869|><|250|><|894|><|876|><|1471|><|1308|><|1436|><|1328|><|1700|><|1425|><|1330|><|code_end|>\nit<|t_0.12|><|code_start|><|1027|><|1162|><|1344|><|1170|><|86|><|1562|><|1575|><|176|><|1186|><|code_end|>\nis<|t_0.25|><|code_start|><|361|><|1533|><|1697|><|903|><|333|><|1232|><|1337|><|1611|><|1196|><|0|><|1328|><|1245|><|1718|><|1635|><|1616|><|1599|><|1363|><|962|><|328|><|code_end|>",ttsver); + speaker = format_audiotokens("but<|t_0.21|><|code_start|><|348|><|1776|><|1620|><|1262|><|118|><|288|><|258|><|1407|><|1331|><|1102|><|664|><|1300|><|1647|><|1536|><|71|><|23|><|code_end|>\nthat<|t_0.19|><|code_start|><|3|><|1740|><|1253|><|1122|><|549|><|715|><|718|><|657|><|1136|><|1247|><|517|><|1333|><|815|><|634|><|code_end|>\nis<|t_0.12|><|code_start|><|1330|><|839|><|753|><|1826|><|1602|><|50|><|1441|><|889|><|948|><|code_end|>\nwhat<|t_0.16|><|code_start|><|899|><|869|><|250|><|894|><|876|><|1471|><|1308|><|1436|><|1328|><|1700|><|1425|><|1330|><|code_end|>\nit<|t_0.12|><|code_start|><|1027|><|1162|><|1344|><|1170|><|86|><|1562|><|1575|><|176|><|1186|><|code_end|>\nis<|t_0.25|><|code_start|><|361|><|1533|><|1697|><|903|><|333|><|1232|><|1337|><|1611|><|1196|><|0|><|1328|><|1245|><|1718|><|1635|><|1616|><|1599|><|1363|><|962|><|328|><|code_end|>",ttsver); break; case 4: speaker = format_audiotokens("but<|t_0.20|><|code_start|><|686|><|1288|><|1251|><|1428|><|481|><|702|><|1812|><|829|><|81|><|756|><|76|><|104|><|952|><|1723|><|1632|><|code_end|>\nthat<|t_0.20|><|code_start|><|1006|><|1067|><|1614|><|1810|><|887|><|43|><|1192|><|106|><|400|><|43|><|730|><|660|><|186|><|87|><|467|><|code_end|>\nis<|t_0.27|><|code_start|><|648|><|1625|><|9|><|685|><|243|><|106|><|996|><|990|><|228|><|809|><|1009|><|2|><|806|><|1325|><|1332|><|1766|><|202|><|725|><|416|><|822|><|code_end|>\nwhat<|t_0.36|><|code_start|><|1287|><|328|><|1241|><|1661|><|1651|><|1708|><|1740|><|1685|><|1715|><|1787|><|1381|><|197|><|1769|><|525|><|1000|><|234|><|364|><|115|><|212|><|632|><|1153|><|228|><|73|><|1002|><|1800|><|1277|><|1117|><|code_end|>\nit<|t_0.40|><|code_start|><|1830|><|1199|><|1282|><|1163|><|1195|><|1752|><|1092|><|1481|><|1003|><|513|><|1639|><|1805|><|1485|><|1645|><|195|><|1464|><|181|><|195|><|123|><|87|><|433|><|878|><|170|><|1265|><|375|><|1708|><|1739|><|1519|><|1185|><|1099|><|code_end|>\nis<|t_0.76|><|code_start|><|1748|><|1422|><|276|><|1337|><|1322|><|1519|><|1779|><|1067|><|1724|><|891|><|1205|><|1419|><|1144|><|1667|><|591|><|1003|><|1543|><|566|><|1390|><|426|><|1824|><|182|><|1138|><|52|><|129|><|1056|><|155|><|1056|><|1298|><|919|><|155|><|125|><|500|><|1022|><|571|><|315|><|400|><|100|><|617|><|295|><|757|><|324|><|592|><|1298|><|1310|><|57|><|876|><|1175|><|1353|><|1770|><|1649|><|1828|><|1637|><|362|><|1744|><|884|><|1027|><|code_end|>",ttsver); @@ -811,6 +822,7 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs) prompt_init(prompt_inp, ttcvocab); next_token_uses_guide_token = true; } + last_speaker_data = custom_speaker_data; //second pass: add the speaker before the actual prompt guide_tokens = prepare_guide_tokens(ttcvocab,prompt_clean,ttsver); @@ -987,7 +999,7 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs) last_generation_settings_audio_seed = inputs.audio_seed; last_generation_settings_speaker_seed = inputs.speaker_seed; - last_generation_settings_prompt = std::string(inputs.prompt); + last_generation_settings_prompt = std::string(prompt); total_tts_gens += 1; return output;