{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T06:23:41Z","timestamp":1774419821959,"version":"3.50.1"},"reference-count":20,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,6]]},"DOI":"10.1109\/icassp49660.2025.10888018","type":"proceedings-article","created":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T17:15:02Z","timestamp":1741799702000},"page":"1-5","source":"Crossref","is-referenced-by-count":0,"title":["Towards Bringing Parity in Pretraining Datasets for Low-resource Indian Languages"],"prefix":"10.1109","author":[{"given":"Kaushal Santosh","family":"Bhogale","sequence":"first","affiliation":[{"name":"AI4Bharat"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Deovrat","family":"Mehendale","sequence":"additional","affiliation":[{"name":"AI4Bharat"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tahir","family":"Javed","sequence":"additional","affiliation":[{"name":"AI4Bharat"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Devbrat","family":"Anuragi","sequence":"additional","affiliation":[{"name":"AI4Bharat"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sakshi","family":"Joshi","sequence":"additional","affiliation":[{"name":"AI4Bharat"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sai","family":"Sundaresan","sequence":"additional","affiliation":[{"name":"AI4Bharat"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aparna","family":"Ananthanarayanan","sequence":"additional","affiliation":[{"name":"AI4Bharat"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sharmistha","family":"Dey","sequence":"additional","affiliation":[{"name":"AI4Bharat"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sathish Kumar Reddy","family":"G","sequence":"additional","affiliation":[{"name":"IIT Madras"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anusha","family":"Srinivasan","sequence":"additional","affiliation":[{"name":"AI4Bharat"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Abhigyan","family":"Raman","sequence":"additional","affiliation":[{"name":"Sarvam AI"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pratyush","family":"Kumar","sequence":"additional","affiliation":[{"name":"Sarvam AI"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mitesh M.","family":"Khapra","sequence":"additional","affiliation":[{"name":"AI4Bharat"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"International Conference on Machine Learning, ICML 2023, 23-29 July 2023, Honolulu, Hawaii, USA","volume":"202","author":"Radford"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-3015"},{"key":"ref3","article-title":"Google USM: scaling automatic speech recognition beyond 100 languages","volume-title":"CoRR","author":"Zhang","year":"2023"},{"key":"ref4","article-title":"Scaling speech technology to 1, 000+ languages","volume-title":"CoRR","author":"Pratap","year":"2023"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.639"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.80"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/slt48900.2021.9383459"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/asru57964.2023.10389689"},{"key":"ref9","first-page":"10813","article-title":"Towards buildingASR systems for the next billion users","volume-title":"Thirty-Sixth AAAI Conference on Artificial Intelligence, AAAI 2022, Thirty-Fourth Conference on Innovative Applications of Artificial Intelligence, IAAI 2022, The Twelveth Symposium on Educational Advances in Artificial Intelligence, EAAI 2022 Virtual Event, February 22 - March 1, 2022","author":"Javed"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/asru51503.2021.9688253"},{"key":"ref11","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Minneapolis, MN, USA, June 2-7, 2019, Volume 1 (Long and Short Papers)","author":"Devlin"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2022-143"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/icassp.2018.8461972"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447520"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2008-644"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-1298"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/icassp48485.2024.10446861"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/p16-1162"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref20","article-title":"Sequence transduction with recurrent neural networks","volume-title":"CoRR","author":"Graves","year":"2012"}],"event":{"name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Hyderabad, India","start":{"date-parts":[[2025,4,6]]},"end":{"date-parts":[[2025,4,11]]}},"container-title":["ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10887540\/10887541\/10888018.pdf?arnumber=10888018","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T05:24:53Z","timestamp":1774416293000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10888018\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,6]]},"references-count":20,"URL":"https:\/\/doi.org\/10.1109\/icassp49660.2025.10888018","relation":{},"subject":[],"published":{"date-parts":[[2025,4,6]]}}}