{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T14:10:08Z","timestamp":1755871808150,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,1,4]],"date-time":"2024-01-04T00:00:00Z","timestamp":1704326400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,1,4]]},"DOI":"10.1145\/3632410.3632413","type":"proceedings-article","created":{"date-parts":[[2024,1,3]],"date-time":"2024-01-03T18:15:16Z","timestamp":1704305716000},"page":"325-331","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["An Approach for Speech Enhancement in Low SNR Environments using Granular Speaker Embedding"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0839-1849","authenticated-orcid":false,"given":"Jayasree","family":"Saha","sequence":"first","affiliation":[{"name":"IIIT-H, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6628-7065","authenticated-orcid":false,"given":"Rudrabha","family":"Mukhopadhyay","sequence":"additional","affiliation":[{"name":"IIIT-H, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7862-2428","authenticated-orcid":false,"given":"Aparna","family":"Agrawal","sequence":"additional","affiliation":[{"name":"IIIT-H, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2499-2751","authenticated-orcid":false,"given":"Surabhi","family":"Jain","sequence":"additional","affiliation":[{"name":"IIIT-H, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6767-7057","authenticated-orcid":false,"given":"C. V.","family":"Jawahar","sequence":"additional","affiliation":[{"name":"IIIT-H, India"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,1,4]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2291"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"X.\u00a0Lu C.-F.\u00a0Liao Y.\u00a0Tsao and H. Kawai. 2019. Incorporating Symbolic Sequential Modeling for Speech Enhancement. In Interspeech.","DOI":"10.21437\/Interspeech.2020-1400"},{"volume-title":"Proceedings of Speech Synthesis Work- shop (SSW).","author":"Valentini-Botinhao Takaki","key":"e_1_3_2_1_3_1","unstructured":"S.\u00a0Takaki C.\u00a0Valentini-Botinhao, X.\u00a0Wang and J. Yamagishi. 2016. Investigating rnn-based speech enhancement methods for noise robust text-to-speech. In Proceedings of Speech Synthesis Work- shop (SSW)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10781"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Alexandre Defossez Gabriel Synnaeve and Yossi Adi. 2020. Real Time Speech Enhancement in the Waveform Domain. In Interspeech.","DOI":"10.21437\/Interspeech.2020-2409"},{"key":"e_1_3_2_1_6_1","unstructured":"J. Garofolo Lori Lamel W. Fisher Jonathan Fiscus D. Pallett N. Dahlgren and V. Zue. 1992. TIMIT Acoustic-phonetic Continuous Speech Corpus. Linguistic Data Consortium (11 1992)."},{"key":"e_1_3_2_1_7_1","volume-title":"BigVGAN: A Universal Neural Vocoder with Large-Scale Training. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=iTtGCMDEzS_","author":"Lee Sang","year":"2023","unstructured":"Sang gil Lee, Wei Ping, Boris Ginsburg, Bryan Catanzaro, and Sungroh Yoon. 2023. BigVGAN: A Universal Neural Vocoder with Large-Scale Training. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=iTtGCMDEzS_"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i10.21315"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"D. Griffin and J.S. Lim. 1984. Signal Estimation from Modified Short-Time Fourier Transform. IEEE Transactions on Acoustics Speech and Signal Processing (1984).","DOI":"10.1109\/TASSP.1984.1164317"},{"volume-title":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 721\u2013725","author":"Chen Vedaldi","key":"e_1_3_2_1_10_1","unstructured":"A.\u00a0Vedaldi H.\u00a0Chen, W.\u00a0Xie and A. Zisserman. 2020. Vggsound: A large-scale audio-visual dataset. In IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 721\u2013725."},{"volume-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV). 1926\u20131935","author":"Hegde B.","key":"e_1_3_2_1_11_1","unstructured":"Sindhu\u00a0B. Hegde, K.R. Prajwal, Rudrabha Mukhopadhyay, Vinay\u00a0P. Namboodiri, and C.V. Jawahar. 2021. Visual Speech Enhancement Without a Real Visual Stream. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV). 1926\u20131935."},{"volume-title":"Proc. Joint Workshop for the Blizzard Challenge and Voice Conversion Challenge. 140\u2013144","author":"Ho V.","key":"e_1_3_2_1_12_1","unstructured":"T.\u00a0V. Ho and M. Akagi. 2020. Non-parallel Voice Conversion based on Hierarchical Latent Embedding Vector Quantized Variational Autoencoder. In Proc. Joint Workshop for the Blizzard Challenge and Voice Conversion Challenge. 140\u2013144."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-443"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"e_1_3_2_1_15_1","volume-title":"Speech Enhancement Using Self-Adaptation and Multi-Head Self-Attention. In ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 181\u2013185","author":"Koizumi Yuma","year":"2020","unstructured":"Yuma Koizumi, Kohei Yatabe, Marc Delcroix, Yoshiki Masuyama, and Daiki Takeuchi. 2020. Speech Enhancement Using Self-Adaptation and Multi-Head Self-Attention. In ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 181\u2013185."},{"volume-title":"Proceedings of Interspeech. 2462\u20132466","author":"Li X.","key":"e_1_3_2_1_16_1","unstructured":"X. Li and R. Horaud. 2020. Online monaural speech enhancement using delayed subband LSTM. In Proceedings of Interspeech. 2462\u20132466."},{"key":"e_1_3_2_1_17_1","volume-title":"Proc. IEEE International Conference on Acoustics, Speech, and Signal Processing.","author":"Li\u00a0Wan Alan\u00a0Papir","year":"2018","unstructured":"Alan\u00a0Papir Li\u00a0Wan, Quan\u00a0Wang and Ignacio\u00a0Lopez Moreno. 2018. Generalized end-to-end loss for speaker verification. In Proc. IEEE International Conference on Acoustics, Speech, and Signal Processing."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2915167"},{"key":"e_1_3_2_1_19_1","volume-title":"SEGAN: Speech Enhancement Generative Adversarial Network. arXiv preprint arXiv:1703.09452","author":"Pascual Santiago","year":"2017","unstructured":"Santiago Pascual, Antonio Bonafonte, and Joan Serr\u00e0. 2017. SEGAN: Speech Enhancement Generative Adversarial Network. arXiv preprint arXiv:1703.09452 (2017)."},{"volume-title":"IEEE International Conference on Acoustics, Speech, and Signal Processing. Proceedings (Cat. No.01CH37221)","author":"Rix A.W.","key":"e_1_3_2_1_20_1","unstructured":"A.W. Rix, J.G. Beerends, M.P. Hollier, and A.P. Hekstra. 2001. Perceptual evaluation of speech quality (PESQ)-a new method for speech quality assessment of telephone networks and codecs. In IEEE International Conference on Acoustics, Speech, and Signal Processing. Proceedings (Cat. No.01CH37221), Vol.\u00a02. 749\u2013752."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1080\/0269920031000105336"},{"key":"e_1_3_2_1_22_1","volume-title":"International Conference on Machine Learning. PMLR. 2031\u20132041","author":"Fu Tsao","year":"2019","unstructured":"Y.\u00a0Tsao S.-W.\u00a0Fu, C.-F.\u00a0Liao and S.-D. Lin. 2019. Metricgan: Generative adversarial networks based black-box metric scores optimization for speech enhancement. In International Conference on Machine Learning. PMLR. 2031\u20132041."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462068"},{"key":"e_1_3_2_1_24_1","unstructured":"J.\u00a0S.\u00a0Chung T.\u00a0Afouras and A. Zisserman. 2018. Lrs3-ted: a large- scale dataset for visual speech recognition. arXiv preprint arXiv:1809.00496 2018 (2018)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2114881"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2631"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/s12559-020-09817-2"}],"event":{"name":"CODS-COMAD 2024: 7th Joint International Conference on Data Science & Management of Data (11th ACM IKDD CODS and 29th COMAD)","acronym":"CODS-COMAD 2024","location":"Bangalore India"},"container-title":["Proceedings of the 7th Joint International Conference on Data Science &amp; Management of Data (11th ACM IKDD CODS and 29th COMAD)"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3632410.3632413","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3632410.3632413","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T13:34:07Z","timestamp":1755869647000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3632410.3632413"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,4]]},"references-count":27,"alternative-id":["10.1145\/3632410.3632413","10.1145\/3632410"],"URL":"https:\/\/doi.org\/10.1145\/3632410.3632413","relation":{},"subject":[],"published":{"date-parts":[[2024,1,4]]},"assertion":[{"value":"2024-01-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}