{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T15:10:27Z","timestamp":1731683427011,"version":"3.28.0"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,12,4]],"date-time":"2023-12-04T00:00:00Z","timestamp":1701648000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,4]],"date-time":"2023-12-04T00:00:00Z","timestamp":1701648000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,12,4]]},"DOI":"10.1109\/o-cocosda60357.2023.10482965","type":"proceedings-article","created":{"date-parts":[[2024,4,2]],"date-time":"2024-04-02T18:38:10Z","timestamp":1712083090000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["Generating Speech with Prosodic Prominence based on SSL-Visually Grounded Models"],"prefix":"10.1109","author":[{"given":"Bella Septina","family":"Ika Hartanti","sequence":"first","affiliation":[{"name":"University of Indonesia,Depok,Indonesia"}]},{"given":"Dipta","family":"Tanaya","sequence":"additional","affiliation":[{"name":"University of Indonesia,Depok,Indonesia"}]},{"given":"Kurniawati","family":"Azizah","sequence":"additional","affiliation":[{"name":"University of Indonesia,Depok,Indonesia"}]},{"given":"Dessi Puji","family":"Lestari","sequence":"additional","affiliation":[{"name":"Bandung Institute of Technology,Bandung,Indonesia"}]},{"given":"Ayu","family":"Purwarianti","sequence":"additional","affiliation":[{"name":"Bandung Institute of Technology,Bandung,Indonesia"}]},{"given":"Sakriani","family":"Sakti","sequence":"additional","affiliation":[{"name":"Japan Advanced Institute of Science and Technology,Ishikawa,Japan"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3139"},{"article-title":"Glow-TTS: A generative flow for text-to-speech via monotonic alignment search","volume-title":"Proc. Advances in Neural Information Processing Systems","author":"Kim","key":"ref2"},{"key":"ref3","first-page":"8599","article-title":"GradTTS: A diffusion probabilistic model for text-to-speech","volume-title":"Proc. International Conference on Machine Learning","author":"Popov"},{"article-title":"Fastspeech 2: Fast and high-quality end-to-end text to speech","volume-title":"Proc. In International Conference on Learning Representations","author":"Ren","key":"ref4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2024.3356232\/mm1"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/eurospeech.1999-513"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1093\/ietisy\/e88-d.3.502"},{"article-title":"Emotional end-to-end neural speech synthesizer","year":"2017","author":"Lee","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1426"},{"article-title":"Towards end-to-end prosody transfer for expressive speech synthesis with tacotron","year":"2018","author":"Skerry-Ryan","key":"ref10"},{"article-title":"Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis","year":"2018","author":"Wang","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-465"},{"article-title":"InstructTTS: Modelling expressive tts in discrete latent space with natural language style prompts","year":"2023","author":"Yang","key":"ref13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2012-628"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.conll-1.42"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383591"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2904"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2000.869666"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/tmm.2022.3214100"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3077886"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747837"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU51503.2021.9687906"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/icassp49357.2023.10096057"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10652"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/K19-1006"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016706"},{"article-title":"Bigv-gan: A universal neural vocoder with large-scale training","year":"2022","author":"Lee","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404800"},{"article-title":"Collecting image annotations using amazon\u2019s mechanical turk","volume-title":"Proc. NAACL HLT","author":"Rashtchian","key":"ref31"},{"key":"ref32","doi-asserted-by":"crossref","DOI":"10.21437\/Blizzard.2021-14","article-title":"Delightfultts: The microsoft speech synthesis system for blizzard challenge 2021","author":"Liu","year":"2021"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-802"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-299"}],"event":{"name":"2023 26th Conference of the Oriental COCOSDA International Committee for the Co-ordination and Standardisation of Speech Databases and Assessment Techniques (O-COCOSDA)","start":{"date-parts":[[2023,12,4]]},"location":"Delhi, India","end":{"date-parts":[[2023,12,6]]}},"container-title":["2023 26th Conference of the Oriental COCOSDA International Committee for the Co-ordination and Standardisation of Speech Databases and Assessment Techniques (O-COCOSDA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10482896\/10482911\/10482965.pdf?arnumber=10482965","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T14:37:25Z","timestamp":1731681445000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10482965\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,4]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/o-cocosda60357.2023.10482965","relation":{},"subject":[],"published":{"date-parts":[[2023,12,4]]}}}