{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,18]],"date-time":"2025-09-18T10:06:17Z","timestamp":1758189977325,"version":"3.44.0"},"reference-count":47,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T00:00:00Z","timestamp":1754179200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T00:00:00Z","timestamp":1754179200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1109\/ialp68296.2024.11156272","type":"proceedings-article","created":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T17:32:18Z","timestamp":1758043938000},"page":"141-146","source":"Crossref","is-referenced-by-count":0,"title":["Swar (Hindi): A Speaker-Aware Vocoder for Low-Resource Speech Synthesis"],"prefix":"10.1109","author":[{"given":"Ravindrakumar M.","family":"Purohit","sequence":"first","affiliation":[{"name":"Dhirubhai Ambani University (DAU),Speech Research Lab,Gandhinagar,GJ,India"}]},{"given":"Hemant A.","family":"Patil","sequence":"additional","affiliation":[{"name":"Dhirubhai Ambani University (DAU),Speech Research Lab,Gandhinagar,GJ,India"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1452"},{"key":"ref2","first-page":"3171","article-title":"Fastspeech: Fast, robust and controllable text to speech","volume-title":"Advances in Neural Information Processing Systems (NIPS)","volume":"32","author":"Ren","year":"2019"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref4","article-title":"Melgan: Generative adversarial networks for conditional waveform synthesis","volume-title":"Advances in Neural Information Processing Systems (NeurIPS)","volume":"32","author":"Kumar","year":"2019"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683143"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1587\/transinf.2015EDP7457"},{"key":"ref7","article-title":"Wavenet: A generative model for raw audio","author":"Oord","year":"2016","journal-title":"arXiv preprint arXiv"},{"key":"ref8","first-page":"3918","article-title":"Parallel wavenet: Fast high-fidelity speech synthesis","volume-title":"International Conference on Machine Learning (ICML)","author":"Oord","year":"2018"},{"key":"ref9","article-title":"Clarinet: Parallel wave generation in end-to-end text-to-speech","author":"Ping","year":"2018","journal-title":"arXiv preprint arXiv"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053795"},{"key":"ref11","first-page":"17022","article-title":"HiFi-GAN: Generative adversarial networks for efficient and high fidelity speech synthesis","volume-title":"Advances in Neural Information Processing Systems (NIPS)","volume":"33","author":"Kong","year":"2020"},{"key":"ref12","article-title":"Universal MelGAN: A robust neural vocoder for high-fidelity waveform generation in multiple domains","author":"Jang","year":"2020","journal-title":"arXiv preprint arXiv"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2022-730"},{"key":"ref14","article-title":"Wavegrad: Estimating gradients for waveform generation","author":"Chen","year":"2020","journal-title":"arXiv preprint arXiv"},{"key":"ref15","first-page":"2672","article-title":"Generative adversarial networks","volume-title":"Neural Information Processing Systems (NIPS)","volume":"27","author":"Goodfellow","year":"2014"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413605"},{"key":"ref17","article-title":"Bigvgan: A universal neural vocoder with large-scale training","author":"Lee","year":"2022","journal-title":"arXiv preprint arXiv"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2006.06.003"},{"volume-title":"What are the top 200 most spoken languages?","year":"2021","author":"Ethnologue","key":"ref19"},{"volume-title":"Intonation in indian english and hindi late and simultaneous bilinguals","year":"2013","author":"Puri","key":"ref20"},{"key":"ref21","article-title":"Implementing a speech recognition system interface for indian languages","volume-title":"Proceedings of the IJCNLP-08 Workshop on NLP for Less Privileged languages","author":"Aggarwal","year":"2008"},{"key":"ref22","article-title":"Rule-based grapheme to phoneme mapping for hindi speech synthesis","volume-title":"90th Indian Science Congress of the International Speech Communication Association (ISCA)","author":"Choudhury","year":"2003"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/NCC.2011.5734737"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IC3.2015.7346682"},{"key":"ref25","article-title":"Fastspeech 2: Fast and high-quality end-to-end text to speech","author":"Ren","year":"2020","journal-title":"arXiv preprint arXiv"},{"key":"ref26","article-title":"Adversarial audio synthesis","author":"Donahue","year":"2018","journal-title":"arXiv preprint arXiv"},{"key":"ref27","article-title":"Diffwave: A versatile diffusion model for audio synthesis","author":"Kong","year":"2020","journal-title":"arXiv preprint arXiv"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/PROC.1976.10154"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1976.1169987"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1121\/1.383940"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1977.1170350"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/5.18626"},{"key":"ref33","doi-asserted-by":"crossref","first-page":"2347","DOI":"10.21437\/Eurospeech.1999-596","article-title":"Simultaneous modeling of spectrum, pitch and duration in hmm-based speech synthesis","volume-title":"Proc. EUROSPEECH","author":"Yoshimura\u00dd","year":"1999"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1121\/1.1912679"},{"volume-title":"Indic database","year":"2021","key":"ref35"},{"volume-title":"Syspin s1.0 corpus - a tts corpus of 900+ hours in nine indian languages","year":"2025","author":"A","key":"ref36"},{"volume-title":"1111 hours hindi asr challenge","year":"2022","key":"ref37"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-1339"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/s00530-014-0446-1"},{"volume-title":"ITU-T Recommendation P.800: Methods for Subjective determination of transmission quality","year":"1996","key":"ref40"},{"key":"ref41","first-page":"13","article-title":"Testvox: web-based framework for subjective evaluation of speech synthesis","author":"Parlikar","year":"2012","journal-title":"Opensource Software"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1238"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-00296-0_5"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2001.941023"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2114881"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/89.326616"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/PACRIM.1993.407206"}],"event":{"name":"2025 International Conference on Asian Language Processing (IALP)","start":{"date-parts":[[2025,8,3]]},"location":"Sarawak, Malaysia","end":{"date-parts":[[2025,8,6]]}},"container-title":["2025 International Conference on Asian Language Processing (IALP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11156192\/11156242\/11156272.pdf?arnumber=11156272","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T05:11:07Z","timestamp":1758085867000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11156272\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":47,"URL":"https:\/\/doi.org\/10.1109\/ialp68296.2024.11156272","relation":{},"subject":[],"published":{"date-parts":[[2025,8,3]]}}}