{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,1,18]],"date-time":"2025-01-18T05:07:37Z","timestamp":1737176857661,"version":"3.33.0"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100010428","name":"Innovation and Technology Fund","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100010428","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,2]]},"DOI":"10.1109\/slt61566.2024.10832287","type":"proceedings-article","created":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T18:31:27Z","timestamp":1737052287000},"page":"735-741","source":"Crossref","is-referenced-by-count":0,"title":["Emotion-Coherent Speech Data Augmentation And Self-Supervised Contrastive Style Training For Enhancing Kids\u2019s Story Speech Synthesis"],"prefix":"10.1109","author":[{"given":"Raymond","family":"Chung","sequence":"first","affiliation":[{"name":"Logistics and Supply Chain MultiTech R&#x0026;D Centre,Pok Fu Lam,Hong Kong"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref2","article-title":"Fastspeech 2: Fast and high-quality end-to-end text to speech","author":"Ren","year":"2020","journal-title":"arXiv preprint arXiv:2006.04558"},{"article-title":"The LJ Speech dataset","year":"2017","author":"Ito","key":"ref3"},{"key":"ref4","article-title":"LibriTTS: A corpus derived from librispeech for text-tospeech","author":"Zen","year":"2019","journal-title":"arXiv preprint arXiv:1904.02882"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1972"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054106"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268954"},{"key":"ref8","first-page":"5180","article-title":"Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis","volume-title":"International Conference on Machine Learning","author":"Wang"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639682"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054556"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2019.2931673"},{"key":"ref12","article-title":"Multi-reference tacotron by intercross training for style disentangling, transfer and control in speech synthesis","author":"Bian","year":"2019","journal-title":"arXiv preprint arXiv:1904.02373"},{"key":"ref13","article-title":"Improving the quality of neural tts using long-form content and multi-speaker multi-style modeling","author":"Raitio","year":"2022","journal-title":"arXiv preprint arXiv:2212.10075"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2023-22"},{"key":"ref15","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv preprint arXiv:1810.04805"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095866"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-122"},{"key":"ref18","article-title":"Zeroshot text-to-speech synthesis conditioned using selfsupervised speech representation model","author":"Fujita","year":"2023","journal-title":"arXiv preprint arXiv:2304.11976"},{"author":"Shi","key":"ref19","article-title":"Wfsctacotron2: Chinese dialect speech synthesis based on word frame speech similarity coding and tacotron2"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICTC52510.2021.9621175"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10797"},{"key":"ref22","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume":"33","author":"Baevski","year":"2020"},{"key":"ref23","first-page":"15971607","article-title":"A simple framework for contrastive learning of visual representations","volume-title":"International conference on machine learning","author":"Chen"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-391"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747355"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639682"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683143"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-379"},{"key":"ref29","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","author":"Raffel","year":"2019","journal-title":"ArXiv, vol. abs\/1910.10683"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1404"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2021.11.006"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1386"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2019-18"}],"event":{"name":"2024 IEEE Spoken Language Technology Workshop (SLT)","start":{"date-parts":[[2024,12,2]]},"location":"Macao","end":{"date-parts":[[2024,12,5]]}},"container-title":["2024 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10830790\/10830793\/10832287.pdf?arnumber=10832287","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,17]],"date-time":"2025-01-17T07:50:22Z","timestamp":1737100222000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10832287\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,2]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/slt61566.2024.10832287","relation":{},"subject":[],"published":{"date-parts":[[2024,12,2]]}}}