{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:28:51Z","timestamp":1775230131302,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":16,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,12,6]],"date-time":"2023-12-06T00:00:00Z","timestamp":1701820800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Grant-in-Aid for Scientific Research (B)","award":["JP23H03454"],"award-info":[{"award-number":["JP23H03454"]}]},{"name":"Grant-in-Aid for Scientific Research (C)","award":["JP23K11227"],"award-info":[{"award-number":["JP23K11227"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,12,6]]},"DOI":"10.1145\/3611380.3628562","type":"proceedings-article","created":{"date-parts":[[2023,12,30]],"date-time":"2023-12-30T12:03:31Z","timestamp":1703937811000},"page":"1-3","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["KyotoMOS: An Automatic MOS Scoring System for Speech Synthesis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-0693-5316","authenticated-orcid":false,"given":"Wangjin","family":"Zhou","sequence":"first","affiliation":[{"name":"Kyoto University, JP"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1629-3266","authenticated-orcid":false,"given":"Zhengdong","family":"Yang","sequence":"additional","affiliation":[{"name":"Kyoto University, JP"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7636-3797","authenticated-orcid":false,"given":"Sheng","family":"Li","sequence":"additional","affiliation":[{"name":"National Institute of Information and Communications Technology (NICT), JP"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9848-6384","authenticated-orcid":false,"given":"Chenhui","family":"Chu","sequence":"additional","affiliation":[{"name":"Kyoto University, JP"}]}],"member":"320","published-online":{"date-parts":[[2023,12,30]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383533"},{"key":"e_1_3_2_1_2_1","volume-title":"Proc","author":"Cooper E.","year":"2023","unstructured":"E. Cooper and et al.2023. The VoiceMOS Challenge 2023: zero-shot subjective speech quality prediction for multiple domains. In Proc. IEEE-ASRU, Vol.\u00a02023."},{"key":"e_1_3_2_1_3_1","volume-title":"Generalization ability of MOS prediction networks. arXiv preprint arXiv:2110.02635","author":"Cooper Erica","year":"2021","unstructured":"Erica Cooper, Wen-Chin Huang, Tomoki Toda, and Junichi Yamagishi. 2021. Generalization ability of MOS prediction networks. arXiv preprint arXiv:2110.02635 (2021)."},{"key":"e_1_3_2_1_4_1","volume-title":"The VoiceMOS Challenge","author":"Huang Wen-Chin","year":"2022","unstructured":"Wen-Chin Huang, Erica Cooper, Yu Tsao, Hsin-Min Wang, Tomoki Toda, and Junichi Yamagishi. 2022. The VoiceMOS Challenge 2022. arXiv preprint arXiv:2203.11389 (2022)."},{"key":"e_1_3_2_1_5_1","volume-title":"LDNet: Unified Listener Dependent Modeling in MOS Prediction for Synthetic Speech. arXiv preprint arXiv:2110.09103","author":"Huang Wen-Chin","year":"2021","unstructured":"Wen-Chin Huang, Erica Cooper, Junichi Yamagishi, and Tomoki Toda. 2021. LDNet: Unified Listener Dependent Modeling in MOS Prediction for Synthetic Speech. arXiv preprint arXiv:2110.09103 (2021)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.21437\/Blizzard.2008-1"},{"key":"e_1_3_2_1_7_1","volume-title":"The Blizzard Challenge","author":"King Simon","year":"2010","unstructured":"Simon King and Vasilis Karaiskos. 2010. The Blizzard Challenge 2010."},{"key":"e_1_3_2_1_8_1","volume-title":"The Blizzard Challenge","author":"King Simon","year":"2011","unstructured":"Simon King and Vasilis Karaiskos. 2011. The Blizzard Challenge 2011."},{"key":"e_1_3_2_1_9_1","volume-title":"The Blizzard Challenge","author":"King Simon","year":"2012","unstructured":"Simon King and Vasilis Karaiskos. 2012. The Blizzard Challenge 2012."},{"key":"e_1_3_2_1_10_1","volume-title":"The Blizzard Challenge","author":"King Simon","year":"2013","unstructured":"Simon King and Vasilis Karaiskos. 2013. The Blizzard Challenge 2013."},{"key":"e_1_3_2_1_11_1","volume-title":"The Blizzard Challenge","author":"King Simon","year":"2016","unstructured":"Simon King and Vasilis Karaiskos. 2016. The Blizzard Challenge 2016."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.21437\/Blizzard.2009-1"},{"key":"e_1_3_2_1_13_1","volume-title":"Mosnet: Deep learning based objective assessment for voice conversion. arXiv preprint arXiv:1904.08352","author":"Lo Chen-Chou","year":"2019","unstructured":"Chen-Chou Lo, Szu-Wei Fu, Wen-Chin Huang, Xin Wang, Junichi Yamagishi, Yu Tsao, and Hsin-Min Wang. 2019. Mosnet: Deep learning based objective assessment for voice conversion. arXiv preprint arXiv:1904.08352 (2019)."},{"key":"e_1_3_2_1_14_1","volume-title":"Proc. IEEE-ASRU, Vol.\u00a02023","author":"Qi Z.","unstructured":"Z. Qi, X. Hu, W. Zhou, S. Li, H. Wu, J. Lu, and X. Xu. 2023. LE-SSL-MOS: Self-Supervised Learning MOS Prediction with Listener Enhancement. In Proc. IEEE-ASRU, Vol.\u00a02023."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10262"},{"key":"e_1_3_2_1_16_1","volume-title":"Deep Learning-based Non-Intrusive Multi-Objective Speech Assessment Model with Cross-Domain Features. arXiv preprint arXiv:2111.02363","author":"Zezario E","year":"2021","unstructured":"Ryandhimas\u00a0E Zezario, Szu-Wei Fu, Fei Chen, Chiou-Shann Fuh, Hsin-Min Wang, and Yu Tsao. 2021. Deep Learning-based Non-Intrusive Multi-Objective Speech Assessment Model with Cross-Domain Features. arXiv preprint arXiv:2111.02363 (2021)."}],"event":{"name":"MMAsia '23: ACM Multimedia Asia","location":"Tainan Taiwan","acronym":"MMAsia '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["ACM Multimedia Asia Workshops"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3611380.3628562","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3611380.3628562","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,26]],"date-time":"2025-08-26T19:09:47Z","timestamp":1756235387000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3611380.3628562"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,6]]},"references-count":16,"alternative-id":["10.1145\/3611380.3628562","10.1145\/3611380"],"URL":"https:\/\/doi.org\/10.1145\/3611380.3628562","relation":{},"subject":[],"published":{"date-parts":[[2023,12,6]]},"assertion":[{"value":"2023-12-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}