{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T06:51:33Z","timestamp":1764399093968,"version":"3.46.0"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,22]]},"DOI":"10.1109\/apsipaasc65261.2025.11249208","type":"proceedings-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T18:40:26Z","timestamp":1764355226000},"page":"1110-1115","source":"Crossref","is-referenced-by-count":0,"title":["Emotion-Rich Cross-Speaker TTS via Contrastive Prosody Enhancement"],"prefix":"10.1109","author":[{"given":"Jen-Tzung","family":"Chien","sequence":"first","affiliation":[{"name":"Institute of Electrical and Computer Engineering, National Yang Ming Chiao Tung University,Hsinchu,Taiwan"}]},{"given":"Bryan Gautama","family":"Ngo","sequence":"additional","affiliation":[{"name":"Institute of Electrical and Computer Engineering, National Yang Ming Chiao Tung University,Hsinchu,Taiwan"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016706"},{"key":"ref2","article-title":"Fastspeech 2: Fast and high-quality end-to-end text to speech","volume-title":"Proc. of International Conference on Learning Representations","author":"Ren","year":"2021"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/O-COCOSDA64382.2024.10800072"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2025.3561267"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10889012"},{"key":"ref6","first-page":"1003","article-title":"Reliable dialogue system for facilitating student-counselor communication","volume-title":"Proc. of Annual Conference of International Speech Communication Association","author":"Rohmatillah","year":"2024"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPAASC63619.2025.10849064"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10761"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3164181"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/O-COCOSDA64382.2024.10800372"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/O-COCOSDA64382.2024.10800282"},{"key":"ref12","first-page":"7836","article-title":"Unsupervised speech decomposition via triple information bottleneck","volume-title":"Proc. of International Conference on Machine Learning","author":"Qian","year":"2020"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2024.3363444"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747098"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389638"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.69.066138"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2024.3402077"},{"key":"ref18","first-page":"1180","article-title":"Unsupervised domain adaptation by backpropagation","volume-title":"Proc. of International Conference on Machine Learning","volume":"37","author":"Ganin","year":"2015"},{"key":"ref19","article-title":"CLUB: A contrastive log-ratio upper bound of mutual information","volume-title":"Proc. of International Conference on Machine Learning","volume":"119","author":"Cheng","year":"2020"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3044215"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10446746"},{"key":"ref22","first-page":"18661","article-title":"Supervised contrastive learning","volume":"33","author":"Khosla","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref23","article-title":"Representation learning with contrastive predictive coding","author":"Van Den Oord","year":"2018","journal-title":"arXiv preprint"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-1800"},{"key":"ref25","first-page":"21","article-title":"Vocal tract length perturbation (VTLP) improves speech recognition","volume":"117","author":"Jaitly","year":"2013","journal-title":"Proc. ICML Workshop on Deep Learning for Audio, Speech and Language"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747763"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1229"},{"key":"ref28","first-page":"719","article-title":"TADAM: task dependent adaptive metric for improved few-shot learning","author":"Oreshkin","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"volume-title":"The LJ speech dataset","year":"2017","author":"Ito","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-755"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413391"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-008-9076-6"},{"key":"ref33","first-page":"17022","article-title":"HiFi-GAN: Generative adversarial networks for efficient and high fidelity speech synthesis","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Kong","year":"2020"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2023-1212"},{"issue":"11","key":"ref35","first-page":"2579","article-title":"Visualizing data using t-SNE","volume":"9","author":"Van Der Maaten","year":"2008","journal-title":"Journal of Machine Learning Research"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3420425"}],"event":{"name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","start":{"date-parts":[[2025,10,22]]},"location":"Singapore, Singapore","end":{"date-parts":[[2025,10,24]]}},"container-title":["2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11248853\/11248968\/11249208.pdf?arnumber=11249208","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T06:50:44Z","timestamp":1764399044000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11249208\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,22]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/apsipaasc65261.2025.11249208","relation":{},"subject":[],"published":{"date-parts":[[2025,10,22]]}}}