{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:24:20Z","timestamp":1775229860733,"version":"3.50.1"},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,6]]},"DOI":"10.1109\/asru65441.2025.11434754","type":"proceedings-article","created":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T19:48:04Z","timestamp":1775159284000},"page":"1-4","source":"Crossref","is-referenced-by-count":1,"title":["Speech Masking System Based on Spatially Separated Multiple TTS Maskers With A Compact Circular Loudspeaker Array"],"prefix":"10.1109","author":[{"given":"Takuma","family":"Okamoto","sequence":"first","affiliation":[{"name":"National Institute of Information and Communications Technology,Japan"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1121\/1.1918525"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1121\/1.1456926"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1121\/1.3438479"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854500"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2014.2360707"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2015.7336920"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1121\/1.4977995"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2700945"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2798804"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3013397"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1121\/10.0001065"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3052564"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA52581.2021.9632736"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1121\/10.0009275"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1121\/10.0010256"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3231715"},{"key":"ref17","volume-title":"Fourier Acoustics: Sound Radiation and Nearfield Acoustic Holography","author":"Williams","year":"1999"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.3397\/in_2023_0842"},{"key":"ref19","first-page":"3539","article-title":"Simultaneous speech translation integrated compact multiple sound spot synthesis system on a laptop carried out with a backpack","volume-title":"Proc. Interspeech","author":"Okamoto"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1121\/1.1918154"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1121\/1.404209"},{"issue":"2","key":"ref22","first-page":"32","article-title":"The acoustical design of conventional open plan offices","volume":"31","author":"Bradley","year":"2003","journal-title":"Can. Acoust"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1121\/1.1408946"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1250\/ast.31.188"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2019.107073"},{"key":"ref26","first-page":"5167","article-title":"Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis","volume-title":"Proc. ICML","author":"Wang"},{"key":"ref27","first-page":"997","article-title":"Mobile PresenTra: NICT fast neural text-to-speech system on smartphones with incremental inference of MS-FC-HiFi-GAN for low-latency synthesis","volume-title":"Proc. Interspeech","author":"Okamoto"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-3015"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414858"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10446890"},{"key":"ref32","article-title":"End-to-end adversarial text-to-speech","volume-title":"Proc. ICLR","author":"Donahue"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10294"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-2504"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2025.3585135"},{"key":"ref36","first-page":"8067","article-title":"Glow-TTS: A generative flow for text-to-speech via monotonic alignment search","volume-title":"Proc. NeurIPS","author":"Kim"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-68"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3366707"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1584"},{"key":"ref40","article-title":"ESPnet2-TTS: Extending the edge of TTS research","author":"Hayashi","year":"2021","journal-title":"arXiv:2110.07840"},{"key":"ref41","first-page":"8024","article-title":"PyTorch: An imperative style, high-performance deep learning library","volume-title":"Proc. NeurIPS","author":"Paszke"},{"key":"ref42","doi-asserted-by":"crossref","DOI":"10.1109\/ASRU65441.2025.11434660","article-title":"The AudioMOS Challenge 2025","volume-title":"Proc. ASRU","author":"Huang"},{"key":"ref43","volume-title":"Hi-Fi-CAPTAIN: High-fidelity and high-capacity conversational speech synthesis corpus developed by NICT","author":"Okamoto","year":"2023"},{"key":"ref44","article-title":"Pure data","volume-title":"Proc. ICMC","author":"Puckette"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806390"}],"event":{"name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,12,6]]},"end":{"date-parts":[[2025,12,10]]}},"container-title":["2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11434577\/11433836\/11434754.pdf?arnumber=11434754","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T04:59:20Z","timestamp":1775192360000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11434754\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/asru65441.2025.11434754","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]}}}