{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T03:00:10Z","timestamp":1730343610181,"version":"3.28.0"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,9,4]],"date-time":"2023-09-04T00:00:00Z","timestamp":1693785600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,9,4]],"date-time":"2023-09-04T00:00:00Z","timestamp":1693785600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001695","name":"JST","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001695","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100016995","name":"CREST","doi-asserted-by":"publisher","award":["JPMJCR19A3"],"award-info":[{"award-number":["JPMJCR19A3"]}],"id":[{"id":"10.13039\/100016995","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,9,4]]},"DOI":"10.23919\/eusipco58844.2023.10289823","type":"proceedings-article","created":{"date-parts":[[2023,11,1]],"date-time":"2023-11-01T13:55:44Z","timestamp":1698846944000},"page":"296-300","source":"Crossref","is-referenced-by-count":0,"title":["W2N-AVSC: Audiovisual Extension For Whisper-To-Normal Speech Conversion"],"prefix":"10.23919","author":[{"given":"Shogo","family":"Seki","sequence":"first","affiliation":[{"name":"NTT Corporation,NTT Communication Science Laboratories,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"family":"Kanami","sequence":"additional","affiliation":[{"name":"Graduate school of information science and technology, the university of tokyo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"family":"Imamura","sequence":"additional","affiliation":[{"name":"Graduate school of information science and technology, the university of tokyo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hirokazu","family":"Kameoka","sequence":"additional","affiliation":[{"name":"NTT Corporation,NTT Communication Science Laboratories,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Takuhiro","family":"Kaneko","sequence":"additional","affiliation":[{"name":"NTT Corporation,NTT Communication Science Laboratories,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kou","family":"Tanaka","sequence":"additional","affiliation":[{"name":"NTT Corporation,NTT Communication Science Laboratories,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Noboru","family":"Harada","sequence":"additional","affiliation":[{"name":"NTT Corporation,NTT Communication Science Laboratories,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","first-page":"101027","article-title":"VoxCeleb: Large-Scale Speaker Verification in the Wild","author":"nagrani","year":"2019","journal-title":"Computer Science and Language"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1929"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2003"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"1899","DOI":"10.1109\/TSP.2021.3066038","article-title":"Mixture of Inference Networks for VAE-Based Audio-Visual Speech Enhancement","volume":"69","author":"mostafa","year":"2021","journal-title":"IEEE Transactions on Signal Processing"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1400"},{"key":"ref31","first-page":"1","article-title":"Adam: A Method for Stochastic Gradient Optimization","author":"kingma","year":"0","journal-title":"International Conference on Leanrning Representations"},{"key":"ref30","first-page":"1243","article-title":"Convolutional Sequence to Sequence Learning","author":"gehring","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-950"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1121\/1.4799597"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639535"},{"key":"ref32","first-page":"17022","article-title":"HiFi-GAN: Generative Adversarial Networks for Efficient and High Fidelity Speech Synthesis","author":"kong","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2940700"},{"key":"ref1","first-page":"1","article-title":"Whisper to Normal Speech Based on Deep Neural Networks with MCC and FO Features","author":"lian","year":"0","journal-title":"International Conference on Digital Signal Processing"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-11232"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00879"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01381"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548397"},{"journal-title":"Computational Differences between Whispered and Non-whispered Speech","year":"2011","author":"lim","key":"ref24"},{"key":"ref23","first-page":"3581","article-title":"Semi-Supervised Learning with Deep Generative Models","author":"kingma","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1121\/1.2229005"},{"key":"ref25","first-page":"431","article-title":"The CHAINS Corpus: Characterizing individual speakers","author":"cummins","year":"0","journal-title":"International Conference on Speech and Computer"},{"key":"ref20","first-page":"7048","article-title":"Speech Prediction in Silent Videos Using Variational Autoen-coders","author":"yadav","year":"0","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref22","first-page":"278","article-title":"A neural attention model for disfluency detection","author":"wang","year":"0","journal-title":"International Conference on Computational Linguistics (COLING)"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639243"},{"key":"ref28","first-page":"1","article-title":"Cross-Dataset Learning and Person-Specific Normalisation for Automatic Action Unit Detection","author":"baltru\u0161aitis","year":"0","journal-title":"IEEE Int Conf Automatic Face and Gesture Recognition"},{"key":"ref27","first-page":"59","article-title":"OpenFace 2.0: Facial Behavior Analysis Toolkit","author":"baltru\u0161aitis","year":"0","journal-title":"IEEE International Conference on Automatic Face and Gesture Recognition"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.296"},{"key":"ref8","first-page":"5210","article-title":"AutoVC: Zero-Shot Voice Style Transfer with Only Autoencoder Loss","author":"qian","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref7","first-page":"1","article-title":"Generative Adversarial Nets","volume":"27","author":"goodfellow","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ISCSLP.2018.8706604"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3270699"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO.2019.8902961"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2917232"},{"key":"ref5","first-page":"1","article-title":"Auto-Encoding Variational Bayes","author":"kingma","year":"0","journal-title":"International Conference on Learning Representations"}],"event":{"name":"2023 31st European Signal Processing Conference (EUSIPCO)","start":{"date-parts":[[2023,9,4]]},"location":"Helsinki, Finland","end":{"date-parts":[[2023,9,8]]}},"container-title":["2023 31st European Signal Processing Conference (EUSIPCO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10289698\/10289713\/10289823.pdf?arnumber=10289823","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,1]],"date-time":"2023-11-01T18:39:03Z","timestamp":1698863943000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10289823\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,4]]},"references-count":34,"URL":"https:\/\/doi.org\/10.23919\/eusipco58844.2023.10289823","relation":{},"subject":[],"published":{"date-parts":[[2023,9,4]]}}}