{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T20:55:56Z","timestamp":1776113756941,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":285,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,12]],"date-time":"2023-06-12T00:00:00Z","timestamp":1686528000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,12]]},"DOI":"10.1145\/3593013.3594049","type":"proceedings-article","created":{"date-parts":[[2023,6,12]],"date-time":"2023-06-12T14:40:46Z","timestamp":1686580846000},"page":"881-904","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":21,"title":["Augmented Datasheets for Speech Datasets and Ethical Decision-Making"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4680-0022","authenticated-orcid":false,"given":"Orestis","family":"Papakyriakopoulos","sequence":"first","affiliation":[{"name":"Sony AI, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2482-2713","authenticated-orcid":false,"given":"Anna Seo Gyeong","family":"Choi","sequence":"additional","affiliation":[{"name":"Cornell University, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3515-0385","authenticated-orcid":false,"given":"William","family":"Thong","sequence":"additional","affiliation":[{"name":"Sony AI, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8893-143X","authenticated-orcid":false,"given":"Dora","family":"Zhao","sequence":"additional","affiliation":[{"name":"Sony AI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8552-1213","authenticated-orcid":false,"given":"Jerone","family":"Andrews","sequence":"additional","affiliation":[{"name":"Sony AI, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-3633-5619","authenticated-orcid":false,"given":"Rebecca","family":"Bourke","sequence":"additional","affiliation":[{"name":"Sony AI, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7907-9353","authenticated-orcid":false,"given":"Alice","family":"Xiang","sequence":"additional","affiliation":[{"name":"Sony AI, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6233-8256","authenticated-orcid":false,"given":"Allison","family":"Koenecke","sequence":"additional","affiliation":[{"name":"Cornell University, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,6,12]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. Enable the profanity filter; cloud speech-to-text documentation google cloud. https:\/\/cloud.google.com\/speech-to-text\/docs\/profanity-filter"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the 12th Language Resources and Evaluation Conference. 2819\u20132826","author":"Abraham Basil","year":"2020","unstructured":"Basil Abraham, Danish Goel, Divya Siddarth, Kalika Bali, Manu Chopra, Monojit Choudhury, Pratik Joshi, Preethi Jyoti, Sunayana Sitaram, and Vivek Seshadri. 2020. Crowdsourcing speech data for low-resource languages from low-income workers. In Proceedings of the 12th Language Resources and Evaluation Conference. 2819\u20132826."},{"key":"e_1_3_2_1_3_1","volume-title":"The use of lexica in automatic speech recognition. Lexicon Development for Speech and Language Processing","author":"Adda-Decker Martine","year":"2000","unstructured":"Martine Adda-Decker and Lori Lamel. 2000. The use of lexica in automatic speech recognition. Lexicon Development for Speech and Language Processing (2000), 235\u2013266."},{"key":"e_1_3_2_1_4_1","volume-title":"Automatic speech recognition in Sanskrit: A new speech corpus and modelling insights. arXiv preprint arXiv:2106.05852","author":"Adiga Devaraja","year":"2021","unstructured":"Devaraja Adiga, Rishabh Kumar, Amrith Krishna, Preethi Jyothi, Ganesh Ramakrishnan, and Pawan Goyal. 2021. Automatic speech recognition in Sanskrit: A new speech corpus and modelling insights. arXiv preprint arXiv:2106.05852 (2021)."},{"key":"e_1_3_2_1_5_1","volume-title":"MusicLM: Generating Music From Text. arXiv preprint arXiv:2301.11325","author":"Agostinelli Andrea","year":"2023","unstructured":"Andrea Agostinelli, Timo I Denk, Zal\u00e1n Borsos, Jesse Engel, Mauro Verzetti, Antoine Caillon, Qingqing Huang, Aren Jansen, Adam Roberts, Marco Tagliasacchi, 2023. MusicLM: Generating Music From Text. arXiv preprint arXiv:2301.11325 (2023)."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the Ninth International Conference on Language Resources. 1568\u20131574","author":"Aguiar Ana","year":"2014","unstructured":"Ana Aguiar, Mariana Kaiseler, Mariana Cunha, Hugo Meinedo, J Silva, T Abrudan, and PR Almeida. 2014. VOCE Corpus: Ecologically Collected Speech Annotated with Physiological and Psychological Stress Assessments.. In Proceedings of the Ninth International Conference on Language Resources. 1568\u20131574."},{"key":"e_1_3_2_1_7_1","volume-title":"Accentdb: A database of non-native english accents to assist neural speech recognition. arXiv preprint arXiv:2005.07973","author":"Ahamad Afroz","year":"2020","unstructured":"Afroz Ahamad, Ankit Anand, and Pranesh Bhargava. 2020. Accentdb: A database of non-native english accents to assist neural speech recognition. arXiv preprint arXiv:2005.07973 (2020)."},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of The 12th language resources and evaluation conference. 6586\u20136592","author":"Ahmed Shafayat","year":"2020","unstructured":"Shafayat Ahmed, Nafis Sadeq, Sudipta Saha Shubha, Md Nahidul Islam, Muhammad Abdullah Adnan, and Mohammad Zuberul Islam. 2020. Preparation of bangla speech corpus from publicly available audio & text. In Proceedings of The 12th language resources and evaluation conference. 6586\u20136592."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268952"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCSPA.2013.6487288"},{"key":"e_1_3_2_1_11_1","volume-title":"Ethical considerations for collecting human-centric image datasets. arXiv preprint arXiv:2302.03629","author":"Andrews Jerone TA","year":"2023","unstructured":"Jerone TA Andrews, Dora Zhao, William Thong, Apostolos Modas, Orestis Papakyriakopoulos, Shruti Nagpal, and Alice Xiang. 2023. Ethical considerations for collecting human-centric image datasets. arXiv preprint arXiv:2302.03629 (2023)."},{"key":"e_1_3_2_1_12_1","volume-title":"Common voice: A massively-multilingual speech corpus. arXiv preprint arXiv:1912.06670","author":"Ardila Rosana","year":"2019","unstructured":"Rosana Ardila, Megan Branson, Kelly Davis, Michael Henretty, Michael Kohler, Josh Meyer, Reuben Morais, Lindsay Saunders, Francis M Tyers, and Gregor Weber. 2019. Common voice: A massively-multilingual speech corpus. arXiv preprint arXiv:1912.06670 (2019)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.3390\/app10196936"},{"key":"e_1_3_2_1_14_1","volume-title":"Indian EmoSpeech Command Dataset: A dataset for emotion based speech recognition in the wild. arXiv preprint arXiv:1910.13801","author":"Banga Subham","year":"2019","unstructured":"Subham Banga, Ujjwal Upadhyay, Piyush Agarwal, Aniket Sharma, and Prerana Mukherjee. 2019. Indian EmoSpeech Command Dataset: A dataset for emotion based speech recognition in the wild. arXiv preprint arXiv:1910.13801 (2019)."},{"key":"e_1_3_2_1_15_1","volume-title":"The fifth\u2019CHiME\u2019speech separation and recognition challenge: dataset, task and baselines. arXiv preprint arXiv:1803.10609","author":"Barker Jon","year":"2018","unstructured":"Jon Barker, Shinji Watanabe, Emmanuel Vincent, and Jan Trmal. 2018. The fifth\u2019CHiME\u2019speech separation and recognition challenge: dataset, task and baselines. arXiv preprint arXiv:1803.10609 (2018)."},{"key":"e_1_3_2_1_16_1","volume-title":"From speech to insights: The value of the human voice","author":"Belkacemi Yasmine","year":"2022","unstructured":"Yasmine Belkacemi, Eric Buesing, Arpit Goenka, Vinay Gupta, Damian Lewandowski, and Maurice Obeid. 2022. From speech to insights: The value of the human voice. McKinsey & Company (January 2022)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00041"},{"key":"e_1_3_2_1_18_1","volume-title":"Effectiveness of Mining Audio and Text Pairs from Public Data for Improving ASR Systems for Low-Resource Languages. arXiv preprint arXiv:2208.12666","author":"Bhogale Kaushal Santosh","year":"2022","unstructured":"Kaushal Santosh Bhogale, Abhigyan Raman, Tahir Javed, Sumanth Doddapaneni, Anoop Kunchukuttan, Pratyush Kumar, and Mitesh M Khapra. 2022. Effectiveness of Mining Audio and Text Pairs from Public Data for Improving ASR Systems for Low-Resource Languages. arXiv preprint arXiv:2208.12666 (2022)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Jayadev Billa. 2021. Leveraging Non-Target Language Resources to Improve ASR Performance in a Target Language.. In Interspeech. 2581\u20132585.","DOI":"10.21437\/Interspeech.2021-1657"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3316782.3322780"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.313"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/1998384.1998389"},{"key":"e_1_3_2_1_23_1","volume-title":"Doroteo T Toledano, F Javier Caminero, and Eduardo L\u00f3pez Gonzalo.","author":"Blanco Jos\u00e9 Luis","year":"2011","unstructured":"Jos\u00e9 Luis Blanco, Rub\u00e9n Fern\u00e1ndez Pozo, Doroteo T Toledano, F Javier Caminero, and Eduardo L\u00f3pez Gonzalo. 2011. Analyzing training dependencies and posterior fusion in discriminant classification of apnea patients based on sustained and connected speech. In Interspeech. International Speech Communication Association."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_25_1","unstructured":"David Boersma Paul & Weenink. 2023. Praat (Version 6.3.06). http:\/\/www.praat.org\/"},{"key":"e_1_3_2_1_26_1","volume-title":"A Study of Gender Impact in Self-supervised Models for Speech-to-Text Systems. arXiv preprint arXiv:2204.01397","author":"Boito Marcely Zanon","year":"2022","unstructured":"Marcely Zanon Boito, Laurent Besacier, Natalia Tomashenko, and Yannick Est\u00e8ve. 2022. A Study of Gender Impact in Self-supervised Models for Speech-to-Text Systems. arXiv preprint arXiv:2204.01397 (2022)."},{"key":"e_1_3_2_1_27_1","volume-title":"ON-TRAC Consortium Systems for the IWSLT 2022 Dialect and Low-resource Speech Translation Tasks. arXiv preprint arXiv:2205","author":"Boito Marcely Zanon","year":"2022","unstructured":"Marcely Zanon Boito, John Ortega, Hugo Riguidel, Antoine Laurent, Lo\u00efc Barrault, Fethi Bougares, Firas Chaabani, Ha Nguyen, Florentin Barbier, Souhir Gahbiche, 2022. ON-TRAC Consortium Systems for the IWSLT 2022 Dialect and Low-resource Speech Translation Tasks. arXiv preprint arXiv:2205.01987 (2022)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Hynek Boril Abhijeet Sangwan and John HL Hansen. 2012. Arabic Dialect Identification-\u2019Is the Secret in the Silence?\u2019and Other Observations.. In INTERSPEECH. 30\u201333.","DOI":"10.21437\/Interspeech.2012-18"},{"key":"e_1_3_2_1_29_1","volume-title":"Audiolm: a language modeling approach to audio generation. arXiv preprint arXiv:2209.03143","author":"Borsos Zal\u00e1n","year":"2022","unstructured":"Zal\u00e1n Borsos, Rapha\u00ebl Marinier, Damien Vincent, Eugene Kharitonov, Olivier Pietquin, Matt Sharifi, Olivier Teboul, David Grangier, Marco Tagliasacchi, and Neil Zeghidour. 2022. Audiolm: a language modeling approach to audio generation. arXiv preprint arXiv:2209.03143 (2022)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-1317"},{"key":"e_1_3_2_1_31_1","volume-title":"Reproduction in education, society and culture","author":"Bourdieu Pierre","unstructured":"Pierre Bourdieu and Jean-Claude Passeron. 1990. Reproduction in education, society and culture. Vol. 4. Sage."},{"key":"e_1_3_2_1_32_1","unstructured":"Thorsten Brants. 2000. Inter-annotator Agreement for a German Newspaper Corpus.. In LREC. Citeseer."},{"key":"e_1_3_2_1_33_1","volume-title":"The Communicative Value of Intonation in English Book","author":"Brazil David","unstructured":"David Brazil. 1997. The Communicative Value of Intonation in English Book. Cambridge University Press."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSDA.2017.8384449"},{"key":"e_1_3_2_1_35_1","volume-title":"Proceedings of the 1st Conference on Fairness, Accountability and Transparency(Proceedings of Machine Learning Research","volume":"91","author":"Buolamwini Joy","year":"2018","unstructured":"Joy Buolamwini and Timnit Gebru. 2018. Gender Shades: Intersectional Accuracy Disparities in Commercial Gender Classification. In Proceedings of the 1st Conference on Fairness, Accountability and Transparency(Proceedings of Machine Learning Research, Vol. 81), Sorelle A. Friedler and Christo Wilson (Eds.). PMLR, 77\u201391. https:\/\/proceedings.mlr.press\/v81\/buolamwini18a.html"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.3389\/frai.2021.612551"},{"key":"e_1_3_2_1_37_1","volume-title":"Isin Demirsahin, Alexander Gutkin, Linne Ha, Fei He, Martin Jansche, Cibu Johny","author":"Butryna Alena","year":"2020","unstructured":"Alena Butryna, Shan-Hui Cathy Chu, Isin Demirsahin, Alexander Gutkin, Linne Ha, Fei He, Martin Jansche, Cibu Johny, Anna Katanova, Oddur Kjartansson, 2020. Google crowdsourced speech corpora and related open-source resources for low-resource languages and dialects: an overview. arXiv preprint arXiv:2010.06778 (2020)."},{"key":"e_1_3_2_1_38_1","volume-title":"Road traffic noise in Montreal and environmental equity: What is the situation for the most vulnerable population groups?Journal of Transport Geography 51","author":"Carrier Mathieu","year":"2016","unstructured":"Mathieu Carrier, Philippe Apparicio, and Anne-Marie S\u00e9guin. 2016. Road traffic noise in Montreal and environmental equity: What is the situation for the most vulnerable population groups?Journal of Transport Geography 51 (2016), 1\u20138."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-404"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"J. A. Casey R. Morello-Frosch K. Mennitt D. J.and Fristrup E. L. Ogburn and P. James. 2017. Race\/Ethnicity Socioeconomic Status Residential Segregation and Spatial Variation in Noise Exposure in the Contiguous United States. Environmental health perspectives 125 7 (2017) 077017.","DOI":"10.1289\/EHP898"},{"key":"e_1_3_2_1_41_1","unstructured":"R.T. Cauldwell. 2002. Streaming speech: Listening and pronunciation for advanced learners of English. Speechninaction."},{"key":"e_1_3_2_1_42_1","volume-title":"Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC\u201916)","author":"\u0106avar Malgorzata","year":"2016","unstructured":"Malgorzata \u0106avar, Damir \u0106avar, Dov-Ber Kerler, and Anya Quilitzsch. 2016. Generating a Yiddish speech corpus, forced aligner and basic ASR system for the AHEYM project. In Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC\u201916). 4688\u20134693."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"Chen-Yu Chen Wei-Zhong Zheng Syu-Siang Wang Yu Tsao Pei-Chun Li and Ying-Hui Lai. 2020. Enhancing Intelligibility of Dysarthric Speech Using Gated Convolutional-Based Voice Conversion System.. In INTERSPEECH. 4686\u20134690.","DOI":"10.21437\/Interspeech.2020-1367"},{"key":"e_1_3_2_1_45_1","volume-title":"Gigaspeech: An evolving, multi-domain asr corpus with 10,000 hours of transcribed audio. arXiv preprint arXiv:2106.06909","author":"Chen Guoguo","year":"2021","unstructured":"Guoguo Chen, Shuzhou Chai, Guanbo Wang, Jiayu Du, Wei-Qiang Zhang, Chao Weng, Dan Su, Daniel Povey, Jan Trmal, Junbo Zhang, 2021. Gigaspeech: An evolving, multi-domain asr corpus with 10,000 hours of transcribed audio. arXiv preprint arXiv:2106.06909 (2021)."},{"key":"e_1_3_2_1_46_1","first-page":"47","article-title":"The creation of a prosodically transcribed intercultural corpus: The Hong Kong Corpus of Spoken English (prosodic)","volume":"29","author":"Cheng Winnie","year":"2005","unstructured":"Winnie Cheng, Christopher Greaves, Martin Warren, 2005. The creation of a prosodically transcribed intercultural corpus: The Hong Kong Corpus of Spoken English (prosodic). ICAME journal 29 (2005), 47\u201368.","journal-title":"ICAME journal"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3410530.3414372"},{"key":"e_1_3_2_1_48_1","volume-title":"Improving gender translation accuracy with filtered self-training. arXiv preprint arXiv:2104.07695","author":"Choubey Prafulla Kumar","year":"2021","unstructured":"Prafulla Kumar Choubey, Anna Currey, Prashant Mathur, and Georgiana Dinu. 2021. Improving gender translation accuracy with filtered self-training. arXiv preprint arXiv:2104.07695 (2021)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACIIAsia.2018.8470362"},{"key":"e_1_3_2_1_50_1","unstructured":"Renee Peje Clapham Lisette van der Molen RJJH van Son M van den Brekel and Frans JM Hilgers. 2012. NKI-CCRT corpus-speech intelligibility before and after advanced head and neck cancer treated with concomitant chemoradiotherapy. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC\u201912). 3350\u20133355."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1093\/iwc\/iwz016"},{"key":"e_1_3_2_1_52_1","volume-title":"Audio de-identification: A new entity recognition task. arXiv preprint arXiv:1903.07037","author":"Cohn Ido","year":"2019","unstructured":"Ido Cohn, Itay Laish, Genady Beryozkin, Gang Li, Izhak Shafran, Idan Szpektor, Tzvika Hartman, Avinatan Hassidim, and Yossi Matias. 2019. Audio de-identification: A new entity recognition task. arXiv preprint arXiv:1903.07037 (2019)."},{"key":"e_1_3_2_1_53_1","volume-title":"Librimix: An open-source dataset for generalizable speech separation. arXiv preprint arXiv:2005.11262","author":"Cosentino Joris","year":"2020","unstructured":"Joris Cosentino, Manuel Pariente, Samuele Cornell, Antoine Deleforge, and Emmanuel Vincent. 2020. Librimix: An open-source dataset for generalizable speech separation. arXiv preprint arXiv:2005.11262 (2020)."},{"key":"e_1_3_2_1_54_1","volume-title":"Mt-adapted datasheets for datasets: template and repository. arXiv preprint arXiv:2005.13156","author":"Marta R","year":"2020","unstructured":"Marta R Costa-juss\u00e0, Roger Creus, Oriol Domingo, Albert Dom\u00ednguez, Miquel Escobar, Cayetana L\u00f3pez, Marina Garcia, and Margarita Geleta. 2020. Mt-adapted datasheets for datasets: template and repository. arXiv preprint arXiv:2005.13156 (2020)."},{"key":"e_1_3_2_1_55_1","volume-title":"Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC\u201906)","author":"Couss\u00e9 Evie","year":"2006","unstructured":"Evie Couss\u00e9 and Steven Gillis. 2006. Regional bias in the broad phonetic transcriptions of the Spoken Dutch Corpus. In Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC\u201906)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/1322391.1322394"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Amit Das Preethi Jyothi and Mark Hasegawa-Johnson. 2016. Automatic Speech Recognition Using Probabilistic Transcriptions in Swahili Amharic and Dinka.. In INTERSPEECH. 3524\u20133528.","DOI":"10.21437\/Interspeech.2016-657"},{"key":"e_1_3_2_1_58_1","unstructured":"Datatang. [n. d.]. 500 hours - Italian conversational speech data by mobile phone. https:\/\/www.datatang.ai\/datasets\/1178?utm_source=PaperwithCode&utm_medium=PaperwithCode&utm_campaign=PaperwithCode&utm_id=PaperwithCode&utm_term=PaperwithCode&utm_content=PaperwithCode"},{"key":"e_1_3_2_1_59_1","volume-title":"proceedings of ISLE Workshop","volume":"20","author":"Devillers Laurence","year":"2002","unstructured":"Laurence Devillers, Ioana Vasilescu, and Lori Lamel. 2002. Annotation and detection of emotion in a task-oriented human-human dialog corpus. In proceedings of ISLE Workshop, Vol. 20. 43."},{"key":"e_1_3_2_1_60_1","volume-title":"Performance Disparities Between Accents in Automatic Speech Recognition. arXiv preprint arXiv:2208.01157","author":"DiChristofano Alex","year":"2022","unstructured":"Alex DiChristofano, Henry Shuster, Shefali Chandra, and Neal Patwari. 2022. Performance Disparities Between Accents in Automatic Speech Recognition. arXiv preprint arXiv:2208.01157 (2022)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.26615\/issn.2603-2821.2019_003"},{"key":"e_1_3_2_1_62_1","volume-title":"Aishell-2: Transforming mandarin asr research into industrial scale. arXiv preprint arXiv:1808.10583","author":"Du Jiayu","year":"2018","unstructured":"Jiayu Du, Xingyu Na, Xuechen Liu, and Hui Bu. 2018. Aishell-2: Transforming mandarin asr research into industrial scale. arXiv preprint arXiv:1808.10583 (2018)."},{"key":"e_1_3_2_1_63_1","volume-title":"Deep Speech Based End-to-End Automated Speech Recognition (ASR) for Indian-English Accents. arXiv preprint arXiv:2204.00977","author":"Dubey Priyank","year":"2022","unstructured":"Priyank Dubey and Bilal Shah. 2022. Deep Speech Based End-to-End Automated Speech Recognition (ASR) for Indian-English Accents. arXiv preprint arXiv:2204.00977 (2022)."},{"key":"e_1_3_2_1_64_1","volume-title":"Applied Phonetics: The sounds of American English","author":"Edwards H. T.","year":"1997","unstructured":"H. T. Edwards. 1997. Applied Phonetics: The sounds of American English. Singular, San Diego, CA."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuroimage.2022.119734"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533080"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1017\/S0140525X0999094X"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"crossref","unstructured":"Alessandro Fabris Stefano Messina Gianmaria Silvello and Gian Antonio Susto. 2022. Tackling documentation debt: a survey on algorithmic fairness datasets. In Equity and Access in Algorithms Mechanisms and Optimization. 1\u201313.","DOI":"10.1145\/3551624.3555286"},{"key":"e_1_3_2_1_69_1","volume-title":"ASR-GLUE: A new multi-task benchmark for asr-robust natural language understanding. arXiv preprint arXiv:2108.13048","author":"Feng Lingyun","year":"2021","unstructured":"Lingyun Feng, Jianwei Yu, Deng Cai, Songxiang Liu, Haitao Zheng, and Yan Wang. 2021. ASR-GLUE: A new multi-task benchmark for asr-robust natural language understanding. arXiv preprint arXiv:2108.13048 (2021)."},{"key":"e_1_3_2_1_70_1","volume-title":"Bence Mark Halpern, and Odette Scharenborg","author":"Feng Siyuan","year":"2021","unstructured":"Siyuan Feng, Olya Kudina, Bence Mark Halpern, and Odette Scharenborg. 2021. Quantifying bias in automatic speech recognition. arXiv preprint arXiv:2103.15122 (2021)."},{"key":"e_1_3_2_1_71_1","volume-title":"A Review of Speech-centric Trustworthy Machine Learning: Privacy, Safety, and Fairness. arXiv preprint arXiv:2212.09006","author":"Feng Tiantian","year":"2022","unstructured":"Tiantian Feng, Rajat Hebbar, Nicholas Mehlman, Xuan Shi, Aditya Kommineni, 2022. A Review of Speech-centric Trustworthy Machine Learning: Privacy, Safety, and Fairness. arXiv preprint arXiv:2212.09006 (2022)."},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58811-3_6"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1037\/0033-2909.97.3.412"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"crossref","unstructured":"Penelope Gardner-Chloros. 2009. Code-switching. Cambridge university press.","DOI":"10.1017\/CBO9780511609787"},{"key":"e_1_3_2_1_75_1","volume-title":"De-identification of Personal Information:.US Department of Commerce","author":"Simson Garfinkel","unstructured":"Simson Garfinkel 2015. De-identification of Personal Information:.US Department of Commerce, National Institute of Standards and Technology."},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1145\/3347449.3357480"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.4324\/9781315841366"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458723"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"e_1_3_2_1_80_1","volume-title":"Speaker Adaptation Using Spectro-Temporal Deep Features for Dysarthric and Elderly Speech Recognition. arXiv preprint arXiv:2202.10290","author":"Geng Mengzhe","year":"2022","unstructured":"Mengzhe Geng, Xurong Xie, Zi Ye, Tianzi Wang, Guinan Li, Shujie Hu, Xunying Liu, and Helen Meng. 2022. Speaker Adaptation Using Spectro-Temporal Deep Features for Dysarthric and Elderly Speech Recognition. arXiv preprint arXiv:2202.10290 (2022)."},{"key":"e_1_3_2_1_81_1","volume-title":"Proceedings of the 12th language resources and evaluation conference. 6469\u20136476","author":"Georgila Kallirroi","year":"2020","unstructured":"Kallirroi Georgila, Anton Leuski, Volodymyr Yanov, and David Traum. 2020. Evaluation of off-the-shelf speech recognizers across diverse dialogue domains. In Proceedings of the 12th language resources and evaluation conference. 6469\u20136476."},{"key":"e_1_3_2_1_82_1","unstructured":"James Sneed German Maria Candea LeAnn Brown Timothy Mahrt and Oriana Reid-Collins. 2022. Gender Spectrum Speech Corpus. https:\/\/hdl.handle.net\/11403\/gender_spectrum_speech\/v2.1 ORTOLANG (Open Resources and TOols for LANGuage) \u2013www.ortolang.fr."},{"key":"e_1_3_2_1_83_1","unstructured":"K. Gerson and S. Damaske. 2020. The Open. Oxford University Press Oxford NY."},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"crossref","unstructured":"K. Gerson and S. Damaske. 2020. The Science and Art of Interviewing. Oxford University Press Oxford NY.","DOI":"10.1093\/oso\/9780199324286.001.0001"},{"key":"e_1_3_2_1_85_1","volume-title":"Multilingual and cross-lingual intent detection from spoken data. arXiv preprint arXiv:2104.08524","author":"Gerz Daniela","year":"2021","unstructured":"Daniela Gerz, Pei-Hao Su, Razvan Kusztos, Avishek Mondal, Micha\u0142 Lis, Eshan Singhal, Nikola Mrk\u0161i\u0107, Tsung-Hsien Wen, and Ivan Vuli\u0107. 2021. Multilingual and cross-lingual intent detection from spoken data. arXiv preprint arXiv:2104.08524 (2021)."},{"key":"e_1_3_2_1_86_1","volume-title":"Comparing the performance of forced aligners used in sociophonetic research. Linguistics Vanguard 6, 1","author":"Gonzalez Simon","year":"2020","unstructured":"Simon Gonzalez, James Grama, and Catherine E Travis. 2020. Comparing the performance of forced aligners used in sociophonetic research. Linguistics Vanguard 6, 1 (2020)."},{"key":"e_1_3_2_1_87_1","volume-title":"Proceedings of the 12th International Conference on Language Resources and Evaluation (LREC)","author":"Gorisch Jan","year":"2020","unstructured":"Jan Gorisch, Michael Gref, and Thomas Schmidt. 2020. Using Automatic Speech Recognition in Spoken Corpus Curation. In Proceedings of the 12th International Conference on Language Resources and Evaluation (LREC), May 11-16, 2020, Palais du Pharo, Marseille, France. European Language Resources Association, 6423\u20136428."},{"key":"e_1_3_2_1_88_1","first-page":"192","article-title":"Prosodylab-aligner: A tool for forced alignment of laboratory speech","volume":"39","author":"Gorman Kyle","year":"2011","unstructured":"Kyle Gorman, Jonathan Howell, and Michael Wagner. 2011. Prosodylab-aligner: A tool for forced alignment of laboratory speech. Canadian Acoustics 39, 3 (2011), 192\u2013193.","journal-title":"Canadian Acoustics"},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1384"},{"key":"e_1_3_2_1_90_1","doi-asserted-by":"crossref","unstructured":"Roberto Gretter. 2014. Euronews: a multilingual speech corpus for ASR.. In LREC. 2635\u20132638.","DOI":"10.21437\/Interspeech.2014-381"},{"key":"e_1_3_2_1_91_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2008.11.002"},{"key":"e_1_3_2_1_92_1","volume-title":"Hanna Wallach, and Meredith Ringel Morris.","author":"Guo Anhong","year":"2020","unstructured":"Anhong Guo, Ece Kamar, Jennifer Wortman Vaughan, Hanna Wallach, and Meredith Ringel Morris. 2020. Toward fairness in AI for people with disabilities SBG@ a research roadmap. ACM SIGACCESS Accessibility and Computing125 (2020), 1\u20131."},{"key":"e_1_3_2_1_93_1","volume-title":"ADIMA: Abuse Detection In Multilingual Audio. In ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 6172\u20136176","author":"Gupta Vikram","year":"2022","unstructured":"Vikram Gupta, Rini Sharon, Ramit Sawhney, and Debdoot Mukherjee. 2022. ADIMA: Abuse Detection In Multilingual Audio. In ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 6172\u20136176."},{"key":"e_1_3_2_1_94_1","volume-title":"Modern standard Arabic phonetics for speech synthesis. Ph. D. Dissertation","author":"Halabi Nawar","unstructured":"Nawar Halabi. 2016. Modern standard Arabic phonetics for speech synthesis. Ph. D. Dissertation. University of Southampton."},{"key":"e_1_3_2_1_95_1","volume-title":"An ethical highlighter for people-centric dataset creation. arXiv preprint arXiv:2011.13583","author":"Hanley Margot","year":"2020","unstructured":"Margot Hanley, Apoorv Khandelwal, Hadar Averbuch-Elor, Noah Snavely, and Helen Nissenbaum. 2020. An ethical highlighter for people-centric dataset creation. arXiv preprint arXiv:2011.13583 (2020)."},{"key":"e_1_3_2_1_96_1","unstructured":"Harveenchadha. [n. d.]. Indic-Voice: Largest Open Source speech corpora for Indic languages. https:\/\/github.com\/harveenchadha\/indic-voice"},{"key":"e_1_3_2_1_97_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-99579-3_21"},{"key":"e_1_3_2_1_98_1","volume-title":"Beyond instructional videos: Probing for more diverse visual-textual grounding on youtube. arXiv preprint arXiv:2004.14338","author":"Hessel Jack","year":"2020","unstructured":"Jack Hessel, Zhenhai Zhu, Bo Pang, and Radu Soricut. 2020. Beyond instructional videos: Probing for more diverse visual-textual grounding on youtube. arXiv preprint arXiv:2004.14338 (2020)."},{"key":"e_1_3_2_1_99_1","volume-title":"The dataset nutrition label: A framework to drive higher data quality standards. arXiv preprint arXiv:1805.03677","author":"Holland Sarah","year":"2018","unstructured":"Sarah Holland, Ahmed Hosny, Sarah Newman, Joshua Joseph, and Kasia Chmielinski. 2018. The dataset nutrition label: A framework to drive higher data quality standards. arXiv preprint arXiv:1805.03677 (2018)."},{"key":"e_1_3_2_1_100_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300830"},{"key":"e_1_3_2_1_101_1","volume-title":"Subjective comparison and evaluation of speech enhancement algorithms. Speech communication 49, 7-8","author":"Hu Yi","year":"2007","unstructured":"Yi Hu and Philipos C Loizou. 2007. Subjective comparison and evaluation of speech enhancement algorithms. Speech communication 49, 7-8 (2007), 588\u2013601."},{"key":"e_1_3_2_1_102_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2021.101272"},{"key":"e_1_3_2_1_103_1","volume-title":"Design Guidelines for Inclusive Speaker Verification Evaluation Datasets. arXiv preprint arXiv:2204.02281","author":"Hutiri Wiebke Toussaint","year":"2022","unstructured":"Wiebke Toussaint Hutiri, Lauriane Gorce, and Aaron Yi Ding. 2022. Design Guidelines for Inclusive Speaker Verification Evaluation Datasets. arXiv preprint arXiv:2204.02281 (2022)."},{"key":"e_1_3_2_1_104_1","unstructured":"Deeply Inc. 202. Korean Read Speech Corpus. https:\/\/github.com\/deeplyinc\/Korean-Read-Speech-Corpus"},{"key":"e_1_3_2_1_105_1","doi-asserted-by":"publisher","DOI":"10.1145\/3371382.3378278"},{"key":"e_1_3_2_1_106_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461628"},{"key":"e_1_3_2_1_107_1","unstructured":"Joseph Darius Jaafari and Nicole Lewis. 2019. In Court Where Are Siri and Alexa?The Marshall Project (February 2019)."},{"key":"e_1_3_2_1_108_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445901"},{"key":"e_1_3_2_1_109_1","volume-title":"The ICSI meeting corpus. In 2003 IEEE International Conference on Acoustics, Speech, and Signal Processing, 2003. Proceedings.(ICASSP\u201903)","volume":"1","author":"Janin Adam","year":"2003","unstructured":"Adam Janin, Don Baron, Jane Edwards, Dan Ellis, David Gelbart, Nelson Morgan, Barbara Peskin, Thilo Pfau, Elizabeth Shriberg, Andreas Stolcke, 2003. The ICSI meeting corpus. In 2003 IEEE International Conference on Acoustics, Speech, and Signal Processing, 2003. Proceedings.(ICASSP\u201903)., Vol. 1. IEEE, I\u2013I."},{"key":"e_1_3_2_1_111_1","doi-asserted-by":"publisher","DOI":"10.1145\/3533767.3534391"},{"key":"e_1_3_2_1_112_1","volume-title":"Proceedings of the 16th Nordic Conference of Computational Linguistics (NODALIDA","author":"Johannessen Janne Bondi","year":"2007","unstructured":"Janne Bondi Johannessen, Kristin Hagen, Joel Priestley, and Lars Nygaard. 2007. An advanced speech corpus for Norwegian. In Proceedings of the 16th Nordic Conference of Computational Linguistics (NODALIDA 2007). 29\u201336."},{"key":"e_1_3_2_1_113_1","volume-title":"Proceedings of the 12th Language Resources and Evaluation Conference. 4089\u20134095","author":"Johnson Khia A","year":"2020","unstructured":"Khia A Johnson, Molly Babel, Ivan Fong, and Nancy Yiu. 2020. SpiCE: A new open-access corpus of conversational bilingual speech in Cantonese and English. In Proceedings of the 12th Language Resources and Evaluation Conference. 4089\u20134095."},{"key":"e_1_3_2_1_114_1","doi-asserted-by":"publisher","DOI":"10.1353\/lan.2019.0042"},{"key":"e_1_3_2_1_115_1","first-page":"209","article-title":"Designing and building the Korean English Learners\u2019 Spoken Corpus (KELSC)","volume":"35","author":"Jung Chae Kwan","year":"2021","unstructured":"Chae Kwan Jung. 2021. Designing and building the Korean English Learners\u2019 Spoken Corpus (KELSC). Studies in Foreign Language Education 35, 3 (2021), 209\u2013223.","journal-title":"Studies in Foreign Language Education"},{"key":"e_1_3_2_1_116_1","volume-title":"Prosody features based low resource Punjabi children ASR and T-NT classifier using data augmentation. Multimedia Tools and Applications","author":"Kadyan Virender","year":"2022","unstructured":"Virender Kadyan, Taniya Hasija, and Amitoj Singh. 2022. Prosody features based low resource Punjabi children ASR and T-NT classifier using data augmentation. Multimedia Tools and Applications (2022), 1\u201322."},{"key":"e_1_3_2_1_117_1","doi-asserted-by":"publisher","unstructured":"Sayash Kapoor and Arvind Narayanan. 2022. Leakage and the Reproducibility Crisis in ML-based Science. https:\/\/doi.org\/10.48550\/ARXIV.2207.07048","DOI":"10.48550\/ARXIV.2207.07048"},{"key":"e_1_3_2_1_118_1","doi-asserted-by":"publisher","DOI":"10.1145\/3555574"},{"key":"e_1_3_2_1_119_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2670141"},{"key":"e_1_3_2_1_120_1","volume-title":"Golos: Russian dataset for speech research. arXiv preprint arXiv:2106.10161","author":"Karpov Nikolay","year":"2021","unstructured":"Nikolay Karpov, Alexander Denisenko, and Fedor Minkin. 2021. Golos: Russian dataset for speech research. arXiv preprint arXiv:2106.10161 (2021)."},{"key":"e_1_3_2_1_121_1","volume-title":"Learning robust and multilingual speech representations. arXiv preprint arXiv:2001.11128","author":"Kawakami Kazuya","year":"2020","unstructured":"Kazuya Kawakami, Luyu Wang, Chris Dyer, Phil Blunsom, and Aaron van den Oord. 2020. Learning robust and multilingual speech representations. arXiv preprint arXiv:2001.11128 (2020)."},{"key":"e_1_3_2_1_122_1","doi-asserted-by":"publisher","DOI":"10.1108\/RR-08-2013-0197"},{"key":"e_1_3_2_1_123_1","first-page":"1","article-title":"The corpus of regional african american language","volume":"6","author":"Kendall Tyler","year":"2018","unstructured":"Tyler Kendall and Charlie Farrington. 2018. The corpus of regional african american language. Version 6 (2018), 1.","journal-title":"Version"},{"key":"e_1_3_2_1_124_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00922"},{"key":"e_1_3_2_1_125_1","doi-asserted-by":"publisher","DOI":"10.13064\/KSSS.2020.12.4.081"},{"key":"e_1_3_2_1_126_1","volume-title":"FT speech: Danish parliament speech corpus. arXiv preprint arXiv:2005.12368","author":"Kirkedal Andreas","year":"2020","unstructured":"Andreas Kirkedal, Marija Stepanovi\u0107, and Barbara Plank. 2020. FT speech: Danish parliament speech corpus. arXiv preprint arXiv:2005.12368 (2020)."},{"key":"e_1_3_2_1_127_1","doi-asserted-by":"publisher","DOI":"10.1145\/3422600"},{"key":"e_1_3_2_1_128_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1915768117"},{"key":"e_1_3_2_1_129_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning(Proceedings of Machine Learning Research","volume":"5664","author":"Koh Pang Wei","year":"2021","unstructured":"Pang Wei Koh, Shiori Sagawa, Henrik Marklund, Sang Michael Xie, Marvin Zhang, Akshay Balsubramani, Weihua Hu, Michihiro Yasunaga, Richard Lanas Phillips, Irena Gao, Tony Lee, Etienne David, Ian Stavness, Wei Guo, Berton Earnshaw, Imran Haque, Sara M Beery, Jure Leskovec, Anshul Kundaje, Emma Pierson, Sergey Levine, Chelsea Finn, and Percy Liang. 2021. WILDS: A Benchmark of in-the-Wild Distribution Shifts. In Proceedings of the 38th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 5637\u20135664."},{"key":"e_1_3_2_1_130_1","volume-title":"Mediaspeech: Multilanguage asr benchmark and dataset. arXiv preprint arXiv:2103.16193","author":"Kolobov Rostislav","year":"2021","unstructured":"Rostislav Kolobov, Olga Okhapkina, Olga Omelchishina, Andrey Platunov, Roman Bedyakin, Vyacheslav Moshkin, Dmitry Menshikov, and Nikolay Mikhaylovskiy. 2021. Mediaspeech: Multilanguage asr benchmark and dataset. arXiv preprint arXiv:2103.16193 (2021)."},{"key":"e_1_3_2_1_131_1","doi-asserted-by":"publisher","DOI":"10.1515\/cllt-2013-0054"},{"key":"e_1_3_2_1_132_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.516"},{"key":"e_1_3_2_1_133_1","volume-title":"Proceedings of the Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference. 125\u2013130","author":"Kulebi Baybars","year":"2022","unstructured":"Baybars Kulebi, Carme Armentano-Oller, Carlos Rodr\u00edguez-Penagos, and Marta Villegas. 2022. ParlamentParla: A speech corpus of catalan parliamentary sessions. In Proceedings of the Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference. 125\u2013130."},{"key":"e_1_3_2_1_134_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00034-021-01885-5"},{"key":"e_1_3_2_1_135_1","doi-asserted-by":"publisher","DOI":"10.1145\/2384916.2384930"},{"key":"e_1_3_2_1_136_1","volume-title":"KT-speech-crawler: Automatic dataset construction for speech recognition from YouTube videos. arXiv preprint arXiv:1903.00216","author":"Lakomkin Egor","year":"2019","unstructured":"Egor Lakomkin, Sven Magg, Cornelius Weber, and Stefan Wermter. 2019. KT-speech-crawler: Automatic dataset construction for speech recognition from YouTube videos. arXiv preprint arXiv:1903.00216 (2019)."},{"key":"e_1_3_2_1_137_1","volume-title":"Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC\u201910)","author":"Lata Swaran","year":"2010","unstructured":"Swaran Lata and Somnath Chandra Vijay Kumar. 2010. Development of Linguistic Resources and Tools for Providing Multilingual Solutions in Indian Languages\u2014A Report on National Initiative. In Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC\u201910)."},{"key":"e_1_3_2_1_138_1","volume-title":"Recommendations for datasets for source code summarization. arXiv preprint arXiv:1904.02660","author":"LeClair Alexander","year":"2019","unstructured":"Alexander LeClair and Collin McMillan. 2019. Recommendations for datasets for source code summarization. arXiv preprint arXiv:1904.02660 (2019)."},{"key":"e_1_3_2_1_139_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2004-424"},{"key":"e_1_3_2_1_140_1","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision. 763\u2013772","author":"Lee Gilwoo","year":"2019","unstructured":"Gilwoo Lee, Zhiwei Deng, Shugao Ma, Takaaki Shiratori, Siddhartha S Srinivasa, and Yaser Sheikh. 2019. Talking with hands 16.2 m: A large-scale dataset of synchronized body-finger motion and audio for conversational motion analysis and synthesis. In Proceedings of the IEEE\/CVF International Conference on Computer Vision. 763\u2013772."},{"key":"e_1_3_2_1_141_1","doi-asserted-by":"crossref","unstructured":"Seonwoo Lee Sunhee Kim and Minhwa Chung. 2022. Building A Speech Corpus Of Children With Cochlear Implants Via An Enhanced Metadata Structure. In 2022 25th Conference of the Oriental COCOSDA International Committee for the Co-ordination and Standardisation of Speech Databases and Assessment Techniques (O-COCOSDA). IEEE 1\u20136.","DOI":"10.1109\/O-COCOSDA202257103.2022.9997935"},{"key":"e_1_3_2_1_142_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472924"},{"key":"e_1_3_2_1_143_1","volume-title":"Review of speech synthesis technology","author":"Lemmety S","unstructured":"S Lemmety. 2000. Review of speech synthesis technology, Helsinki University of Technology. Ph. D. Dissertation. Th\u00e8se."},{"key":"e_1_3_2_1_144_1","volume-title":"TALCS: An Open-Source Mandarin-English Code-Switching Corpus and a Speech Recognition Baseline. ArXiv abs\/2206.13135","author":"Li Chengfei","year":"2022","unstructured":"Chengfei Li, Shuhao Deng, Yaoping Wang, Guangjing Wang, Yaguang Gong, Changbin Chen, and Jinfeng Bai. 2022. TALCS: An Open-Source Mandarin-English Code-Switching Corpus and a Speech Recognition Baseline. ArXiv abs\/2206.13135 (2022)."},{"key":"e_1_3_2_1_145_1","volume-title":"ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","author":"Li Chak-Fai","unstructured":"Chak-Fai Li, Francis Keith, William Hartmann, and Matthew Snover. 2022. Combining Unsupervised and Text Augmented Semi-Supervised Learning For Low Resourced Autoregressive Speech Recognition. In ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 6892\u20136896."},{"key":"e_1_3_2_1_146_1","unstructured":"Jason Li Ravi Gadde Boris Ginsburg and Vitaly Lavrukhin. 2018. Training neural speech recognition systems with synthetic speech augmentation. (2018)."},{"key":"e_1_3_2_1_147_1","volume-title":"Oriental language recognition (OLR) 2020: Summary and analysis. arXiv preprint arXiv:2107.05365","author":"Li Jing","year":"2021","unstructured":"Jing Li, Binling Wang, Yiming Zhi, Zheng Li, Lin Li, Qingyang Hong, and Dong Wang. 2021. Oriental language recognition (OLR) 2020: Summary and analysis. arXiv preprint arXiv:2107.05365 (2021)."},{"key":"e_1_3_2_1_148_1","unstructured":"Yuanchao Li Catherine Lai Divesh Lala Koji Inoue and Tatsuya Kawahara. 2022. Alzheimer\u2019s Dementia Detection through Spontaneous Dialogue with Proactive Robotic Listeners.. In HRI. 875\u2013879."},{"key":"e_1_3_2_1_149_1","volume-title":"Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC\u201912)","author":"Li Ying","year":"2012","unstructured":"Ying Li, Yue Yu, and Pascale Fung. 2012. A Mandarin-English Code-Switching Corpus. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC\u201912). European Language Resources Association (ELRA), Istanbul, Turkey, 2515\u20132519."},{"key":"e_1_3_2_1_150_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11265-019-01483-4"},{"key":"e_1_3_2_1_151_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053176"},{"key":"e_1_3_2_1_152_1","volume-title":"English with an accent: Language ideology and discrimination in the United States","author":"Lippi-Green R.","unstructured":"R. Lippi-Green. 1997. English with an accent: Language ideology and discrimination in the United States. Routledge, London."},{"key":"e_1_3_2_1_153_1","volume-title":"ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","author":"Liu Chunxi","unstructured":"Chunxi Liu, Michael Picheny, Leda Sar\u0131, Pooja Chitkara, Alex Xiao, Xiaohui Zhang, Mark Chou, Andres Alvarado, Caner Hazirbas, and Yatharth Saraf. 2022. Towards Measuring Fairness in Speech Recognition: Casual Conversations Dataset Transcriptions. In ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 6162\u20136166."},{"key":"e_1_3_2_1_154_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-98"},{"key":"e_1_3_2_1_155_1","volume-title":"Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC\u201910)","author":"Liu Yi","year":"2010","unstructured":"Yi Liu, Pascale Fung, Yongsheng Yang, Denise DiPersio, Meghan Glenn, Stephanie Strassel, and Christopher Cieri. 2010. A Very Large Scale Mandarin Chinese Broadcast Corpus for GALE Project. In Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC\u201910)."},{"key":"e_1_3_2_1_156_1","doi-asserted-by":"publisher","DOI":"10.1002\/ohn.170"},{"key":"e_1_3_2_1_157_1","volume-title":"Alberto Abad, and Carmen Garcia-Mateo.","author":"Lopez-Otero Paula","year":"2017","unstructured":"Paula Lopez-Otero, Laura Doc\u00edo Fern\u00e1ndez, Alberto Abad, and Carmen Garcia-Mateo. 2017. Depression Detection Using Automatic Transcriptions of De-Identified Speech.. In INTERSPEECH. 3157\u20133161."},{"key":"e_1_3_2_1_158_1","unstructured":"Hieu-Thi Luong and Hai-Quan Vu. 2016. A non-expert Kaldi recipe for Vietnamese speech recognition system. In Proceedings of the Third International Workshop on Worldwide Language Service Infrastructure and Second Workshop on Open Infrastructures and Analysis Frameworks for Human Language Technologies (WLSI\/OIAF4HLT2016). 51\u201355."},{"key":"e_1_3_2_1_159_1","volume-title":"Proceedings of INTERSPEECH 2020","author":"Luz Saturnino","year":"2004","unstructured":"Saturnino Luz, Fasih Haider, Sofia de la Fuente, Davida Fromm, and Brian MacWhinney. 2020. Alzheimer\u2019s Dementia Recognition through Spontaneous Speech: The ADReSS Challenge. In Proceedings of INTERSPEECH 2020. Shanghai, China. https:\/\/arxiv.org\/abs\/2004.06833"},{"key":"e_1_3_2_1_160_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2010-563"},{"key":"e_1_3_2_1_161_1","doi-asserted-by":"crossref","unstructured":"Andrew Maas Quoc V Le Tyler M O\u2019neil Oriol Vinyals Patrick Nguyen and Andrew Y Ng. 2012. Recurrent neural networks for noise reduction in robust ASR. (2012).","DOI":"10.21437\/Interspeech.2012-6"},{"key":"e_1_3_2_1_162_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412762"},{"key":"e_1_3_2_1_163_1","volume-title":"The CHILDES Project: Tools for analyzing talk","author":"MacWhinney B.","unstructured":"B. MacWhinney. 2000. The CHILDES Project: Tools for analyzing talk. Lawrence Erlbaum Associates, Mahwah, NJ."},{"key":"e_1_3_2_1_164_1","doi-asserted-by":"crossref","unstructured":"B. MacWhinney D. Fromm M. Forbes and A. Holland. 2011. AphasiaBank: Methods for studying discourse. Aphasiology 25 (2011).","DOI":"10.1080\/02687038.2011.589893"},{"key":"e_1_3_2_1_165_1","volume-title":"Low-Resource Languages: A Review of Past Work and Future Challenges. arXiv preprint arXiv:2006.07264v1","author":"Magueresse Alexandre","year":"2020","unstructured":"Alexandre Magueresse, Vincent Carles, and Evan Heetderks. 2020. Low-Resource Languages: A Review of Past Work and Future Challenges. arXiv preprint arXiv:2006.07264v1 (2020)."},{"key":"e_1_3_2_1_166_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.sigdial-1.36"},{"key":"e_1_3_2_1_167_1","doi-asserted-by":"publisher","DOI":"10.1145\/3484508"},{"key":"e_1_3_2_1_168_1","doi-asserted-by":"publisher","DOI":"10.1044\/2020_JSLHR-20-00268"},{"key":"e_1_3_2_1_169_1","doi-asserted-by":"crossref","unstructured":"Adria Mallol-Ragolta Nicholas Cummins and Bj\u00f6rn W Schuller. 2020. An Investigation of Cross-Cultural Semi-Supervised Learning for Continuous Affect Recognition.. In INTERSPEECH. 511\u2013515.","DOI":"10.21437\/Interspeech.2020-2641"},{"key":"e_1_3_2_1_170_1","volume-title":"Proceedings of the First Workshop on Bridging Human\u2013Computer Interaction and Natural Language Processing. 34\u201340","author":"Markl Nina","year":"2021","unstructured":"Nina Markl and Catherine Lai. 2021. Context-sensitive evaluation of automatic speech recognition: considering user experience & language variation. In Proceedings of the First Workshop on Bridging Human\u2013Computer Interaction and Natural Language Processing. 34\u201340."},{"key":"e_1_3_2_1_171_1","volume-title":"Transcribing Takes the Next Step.","author":"Markoff John","year":"2019","unstructured":"John Markoff. 2019. From Your Mouth to Your Screen, Transcribing Takes the Next Step. New York Times (October 2019)."},{"key":"e_1_3_2_1_172_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445893"},{"key":"e_1_3_2_1_173_1","unstructured":"The Language Archive Max Planck Institute for Psycholinguistics. 2022. ELAN (Version 6.4). https:\/\/archive.mpi.nl\/tla\/elan"},{"key":"e_1_3_2_1_174_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1386"},{"key":"e_1_3_2_1_175_1","doi-asserted-by":"publisher","DOI":"10.1177\/002383098803100203"},{"key":"e_1_3_2_1_176_1","unstructured":"Paul Meier. 2022 [Online]. AI Hub. https:\/\/aihub.or.kr\/aihubdata\/data\/view.do?currMenu=116&topMenu=100&aihubDataSe=ty&dataSetSn=118"},{"key":"e_1_3_2_1_177_1","unstructured":"Carlos Mena Michal Borsky David Erik Mollberg Sm\u00e1ri Freyr Gu\u00f0mundsson Staffan Hedstr\u00f6m Ragnar P\u00e1lsson \u00d3lafur Helgi J\u00f3nsson Sunneva \u00deorsteinsd\u00f3ttir J\u00f3hanna Vigd\u00eds Gu\u00f0mundsd\u00f3ttir Eyd\u00eds Huld Magn\u00fasd\u00f3ttir Ragnhei\u00f0ur \u00de\u00f3rhallsd\u00f3ttir and Jon Gudnason. 2021. Samr\u00f3mur Children Icelandic Speech 21.09. Reykjavik University: Language and Voice Lab."},{"key":"e_1_3_2_1_178_1","doi-asserted-by":"publisher","DOI":"10.1145\/1017494.1017497"},{"key":"e_1_3_2_1_179_1","volume-title":"Elizabeth Salesky, Iroro Orife, Colin Leong, Perez Ogayo, Chris Chinenye Emezue, Jonathan Mukiibi, Salomey Osei, Apelete Agbolo, Victor Akinode","author":"Meyer Josh","year":"2022","unstructured":"Josh Meyer, David Adelani, Edresson Casanova, Alp \u00d6ktem, Daniel Whitenack, Julian Weber, Salomon Kabongo Kabenamualu, Elizabeth Salesky, Iroro Orife, Colin Leong, Perez Ogayo, Chris Chinenye Emezue, Jonathan Mukiibi, Salomey Osei, Apelete Agbolo, Victor Akinode, Bernard Opoku, Olanrewaju Samuel, Jesujoba Alabi, and Shamsuddeen Hassan Muhammad. 2022. BibleTTS: a large, high-fidelity, multilingual, and uniquely African speech corpus. In Interspeech. ISCA. https:\/\/arxiv.org\/pdf\/2207.03546.pdf"},{"key":"e_1_3_2_1_180_1","volume-title":"Proceedings of the 12th language resources and evaluation conference. 6462\u20136468","author":"Meyer Josh","year":"2020","unstructured":"Josh Meyer, Lindy Rauchenstein, Joshua D Eisenberg, and Nicholas Howell. 2020. Artie bias corpus: An open dataset for detecting demographic bias in speech applications. In Proceedings of the 12th language resources and evaluation conference. 6462\u20136468."},{"key":"e_1_3_2_1_181_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445880"},{"key":"e_1_3_2_1_182_1","unstructured":"Boyd Michailovsky Martine Mazaudon Alexis Michaud S\u00e9verine Guillaume Alexandre Fran\u00e7ois and Evangelia Adamou. 2014. Documenting and researching endangered languages: the Pangloss Collection. (2014)."},{"key":"e_1_3_2_1_183_1","unstructured":"Microsoft. [n. d.]. Training and testing datasets - speech service - azure cognitive services. https:\/\/learn.microsoft.com\/en-us\/azure\/cognitive-services\/speech-service\/how-to-custom-speech-test-and-train"},{"key":"e_1_3_2_1_184_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682324"},{"key":"e_1_3_2_1_185_1","volume-title":"Authority in language: investigating standard english","author":"Milroy James","unstructured":"James Milroy and Lesley Milroy. 2012. Authority in language: investigating standard english. Routledge, London, England."},{"key":"e_1_3_2_1_186_1","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-statistics-042720-125902"},{"key":"e_1_3_2_1_187_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-018-9427-x"},{"key":"e_1_3_2_1_188_1","volume-title":"Lahjoita puhetta\u2013a large-scale corpus of spoken Finnish with some benchmarks. arXiv preprint arXiv:2203.12906","author":"Moisio Anssi","year":"2022","unstructured":"Anssi Moisio, Dejan Porjazovski, Aku Rouhe, Yaroslav Getman, Anja Virkkunen, Tam\u00e1s Gr\u00f3sz, Krister Lind\u00e9n, and Mikko Kurimo. 2022. Lahjoita puhetta\u2013a large-scale corpus of spoken Finnish with some benchmarks. arXiv preprint arXiv:2203.12906 (2022)."},{"key":"e_1_3_2_1_189_1","volume-title":"Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC\u201908)","author":"Morales Nicol\u00e1s","year":"2008","unstructured":"Nicol\u00e1s Morales, Javier Tejedor, Javier Garrido, Jos\u00e9 Col\u00e1s, and Doroteo T Toledano. 2008. STC-TIMIT: Generation of a single-channel telephone corpus. In Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC\u201908)."},{"key":"e_1_3_2_1_190_1","volume-title":"Shammur Absar Chowdhury, and Ahmed Ali","author":"Mubarak Hamdy","year":"2021","unstructured":"Hamdy Mubarak, Amir Hussein, Shammur Absar Chowdhury, and Ahmed Ali. 2021. QASR: QCRI Aljazeera Speech Resource\u2013A Large Scale Annotated Arabic Speech Corpus. arXiv preprint arXiv:2106.13000 (2021)."},{"key":"e_1_3_2_1_191_1","volume-title":"Myers and Morton Ann Gernsbacher","author":"David","year":"2021","unstructured":"David G. Myers and Morton Ann Gernsbacher. 2021. Captioning for All. Inside Higher Ed (September 2021)."},{"key":"e_1_3_2_1_192_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308561.3353812"},{"key":"e_1_3_2_1_193_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5946856"},{"key":"e_1_3_2_1_194_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2118"},{"key":"e_1_3_2_1_195_1","volume-title":"Voice Privacy with Smart Digital Assistants in Educational Settings. In International Conference on Intelligent Tutoring Systems. Springer, 286\u2013290","author":"Niknazar Mohammad","year":"2021","unstructured":"Mohammad Niknazar, Aditya Vempaty, and Ravi Kokku. 2021. Voice Privacy with Smart Digital Assistants in Educational Settings. In International Conference on Intelligent Tutoring Systems. Springer, 286\u2013290."},{"key":"e_1_3_2_1_196_1","doi-asserted-by":"publisher","DOI":"10.1145\/2631488.2634058"},{"key":"e_1_3_2_1_197_1","volume-title":"5,000 hours of transcribed financial audio for fully formatted end-to-end speech recognition. arXiv preprint arXiv:2104.02014","author":"O\u2019Neill Patrick K","year":"2021","unstructured":"Patrick K O\u2019Neill, Vitaly Lavrukhin, Somshubra Majumdar, Vahid Noroozi, Yuekai Zhang, Oleksii Kuchaiev, Jagadeesh Balam, Yuliya Dovzhenko, Keenan Freyberg, Michael D Shulman, 2021. Spgispeech: 5,000 hours of transcribed financial audio for fully formatted end-to-end speech recognition. arXiv preprint arXiv:2104.02014 (2021)."},{"key":"e_1_3_2_1_198_1","unstructured":"openslr.org. 2022. Openslr.org. http:\/\/openslr.org\/"},{"key":"e_1_3_2_1_199_1","volume-title":"Milton Samirakshma Bepari, and Joyanta Basu","author":"Pal Madhab","year":"2018","unstructured":"Madhab Pal, Rajib Roy, Soma Khan, Milton Samirakshma Bepari, and Joyanta Basu. 2018. PannoMulloKathan: Voice Enabled Mobile App for Agricultural Commodity Price Dissemination in Bengali Language.. In INTERSPEECH. 1491\u20131492."},{"key":"e_1_3_2_1_200_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"e_1_3_2_1_201_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539597.3575793"},{"key":"e_1_3_2_1_202_1","volume-title":"Mixtures of deep neural experts for automated speech scoring. arXiv preprint arXiv:2106.12475","author":"Papi Sara","year":"2021","unstructured":"Sara Papi, Edmondo Trentin, Roberto Gretter, Marco Matassoni, and Daniele Falavigna. 2021. Mixtures of deep neural experts for automated speech scoring. arXiv preprint arXiv:2106.12475 (2021)."},{"key":"e_1_3_2_1_203_1","volume-title":"Yo Joong Choe, and Jiyeon Ham","author":"Park Kyubyong","year":"2019","unstructured":"Kyubyong Park, Yo Joong Choe, and Jiyeon Ham. 2019. Jejueo Datasets for Machine Translation and Speech Synthesis. arXiv preprint arXiv:1911.12071 (2019)."},{"key":"e_1_3_2_1_204_1","volume-title":"Css10: A collection of single speaker speech datasets for 10 languages. arXiv preprint arXiv:1903.11269","author":"Park Kyubyong","year":"2019","unstructured":"Kyubyong Park and Thomas Mulc. 2019. Css10: A collection of single speaker speech datasets for 10 languages. arXiv preprint arXiv:1903.11269 (2019)."},{"key":"e_1_3_2_1_205_1","unstructured":"R. Paul. 1995. Language disorders from infancy through adolescence: Assessment and intervention. Mosby St.Louis MO."},{"key":"e_1_3_2_1_206_1","volume-title":"Research on the Construction of Multimodal Corpus of Tibetan Teaching. In 1st International Conference on Education: Current Issues and Digital Technologies (ICECIDT","author":"Pengcuo Dawa","year":"2021","unstructured":"Dawa Pengcuo and Daojie Ben. 2021. Research on the Construction of Multimodal Corpus of Tibetan Teaching. In 1st International Conference on Education: Current Issues and Digital Technologies (ICECIDT 2021). Atlantis Press, 408\u2013412."},{"key":"e_1_3_2_1_207_1","volume-title":"Subword Dictionary Learning and Segmentation Techniques for Automatic Speech Recognition in Tamil and Kannada. arXiv preprint arXiv:2207.13331","author":"Bharathi Pilar","year":"2022","unstructured":"Bharathi Pilar 2022. Subword Dictionary Learning and Segmentation Techniques for Automatic Speech Recognition in Tamil and Kannada. arXiv preprint arXiv:2207.13331 (2022)."},{"key":"e_1_3_2_1_208_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2004.09.001"},{"key":"e_1_3_2_1_209_1","volume-title":"Mls: A large-scale multilingual dataset for speech research. arXiv preprint arXiv:2012.03411","author":"Pratap Vineel","year":"2020","unstructured":"Vineel Pratap, Qiantong Xu, Anuroop Sriram, Gabriel Synnaeve, and Ronan Collobert. 2020. Mls: A large-scale multilingual dataset for speech research. arXiv preprint arXiv:2012.03411 (2020)."},{"key":"e_1_3_2_1_210_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533231"},{"key":"e_1_3_2_1_211_1","volume-title":"Presenting an Experimental Dataset. arXiv preprint arXiv:1911.13087","author":"Qader Akam","year":"2019","unstructured":"Akam Qader and Hossein Hassani. 2019. Kurdish (Sorani) Speech to Text: Presenting an Experimental Dataset. arXiv preprint arXiv:1911.13087 (2019)."},{"key":"e_1_3_2_1_212_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24033-6_54"},{"key":"e_1_3_2_1_213_1","volume-title":"Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever.","author":"Radford Alec","year":"2022","unstructured":"Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2022. Robust Speech Recognition via Large-Scale Weak Supervision. arXiv:arXiv:2212.04356"},{"key":"e_1_3_2_1_214_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jfludis.2018.03.002"},{"key":"e_1_3_2_1_215_1","volume-title":"The interspeech 2020 deep noise suppression challenge: Datasets, subjective speech quality and testing framework. arXiv preprint arXiv:2001.08662","author":"Reddy Chandan KA","year":"2020","unstructured":"Chandan KA Reddy, Ebrahim Beyrami, Harishchandra Dubey, Vishak Gopal, Roger Cheng, Ross Cutler, Sergiy Matusevych, Robert Aichner, Ashkan Aazami, Sebastian Braun, 2020. The interspeech 2020 deep noise suppression challenge: Datasets, subjective speech quality and testing framework. arXiv preprint arXiv:2001.08662 (2020)."},{"key":"e_1_3_2_1_216_1","unstructured":"Microsoft Research. 2022. Neural networks-based speech enhancement: AI to improve audio quality. https:\/\/www.microsoft.com\/en-us\/research\/project\/nn-speech-enhancement\/"},{"key":"e_1_3_2_1_217_1","volume-title":"Allen Stauffer, Julien van Hout","author":"Richey Colleen","year":"2018","unstructured":"Colleen Richey, Maria A Barrios, Zeb Armstrong, Chris Bartels, Horacio Franco, Martin Graciarena, Aaron Lawson, Mahesh Kumar Nandwana, Allen Stauffer, Julien van Hout, 2018. Voices obscured in complex environmental settings (voices) corpus. arXiv preprint arXiv:1804.05053 (2018)."},{"key":"e_1_3_2_1_218_1","volume-title":"AI and Accessibility: A Discussion of Ethical Considerations. arXiv e-prints","author":"Morris Meredith Ringel","year":"2019","unstructured":"Meredith Ringel Morris. 2019. AI and Accessibility: A Discussion of Ethical Considerations. arXiv e-prints (2019), arXiv\u20131908."},{"key":"e_1_3_2_1_219_1","volume-title":"Assessing local noise level estimation methods: Application to noise robust ASR. Speech communication 34, 1-2","author":"Ris Christophe","year":"2001","unstructured":"Christophe Ris and Stephane Dupont. 2001. Assessing local noise level estimation methods: Application to noise robust ASR. Speech communication 34, 1-2 (2001), 141\u2013158."},{"key":"e_1_3_2_1_220_1","volume-title":"Speech communications and signal detection in noise. The noise manual 5","author":"Robinson GS","year":"2000","unstructured":"GS Robinson and JG Casali. 2000. Speech communications and signal detection in noise. The noise manual 5 (2000), 567\u2013600."},{"key":"e_1_3_2_1_221_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1995.479278"},{"key":"e_1_3_2_1_222_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533239"},{"key":"e_1_3_2_1_223_1","volume-title":"The Sociolinguistic Speech Corpus of Chilean Spanish (COSCACH). A socially stratified text, audio and video corpus with multiple speech registers","author":"Sadowsky Scott","year":"2022","unstructured":"Scott Sadowsky. 2022. The Sociolinguistic Speech Corpus of Chilean Spanish (COSCACH). A socially stratified text, audio and video corpus with multiple speech registers (2022)."},{"key":"e_1_3_2_1_224_1","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2013.2245674"},{"key":"e_1_3_2_1_225_1","volume-title":"A corpus for large-scale phonetic typology. arXiv preprint arXiv:2005.13962","author":"Salesky Elizabeth","year":"2020","unstructured":"Elizabeth Salesky, Eleanor Chodroff, Tiago Pimentel, Matthew Wiesner, Ryan Cotterell, Alan W Black, and Jason Eisner. 2020. A corpus for large-scale phonetic typology. arXiv preprint arXiv:2005.13962 (2020)."},{"key":"e_1_3_2_1_226_1","volume-title":"Proceedings of the 9th International Conference on Language Resources and Evaluation (LREC","author":"Santos Ana L\u00facia","year":"2014","unstructured":"Ana L\u00facia Santos, Michel G\u00e9n\u00e9reux, Aida Cardoso, Celina Agostinho, and Silvana Abalada. 2014. A corpus of European Portuguese child and child-directed speech. In Proceedings of the 9th International Conference on Language Resources and Evaluation (LREC 2014). European Language Resources Association, 1488\u20131491."},{"key":"e_1_3_2_1_227_1","doi-asserted-by":"publisher","DOI":"10.1145\/3531146.3533192"},{"key":"e_1_3_2_1_228_1","volume-title":"Advances in Data Science: Methodologies and Applications","author":"Sharma Garima","unstructured":"Garima Sharma and Abhinav Dhall. 2021. A survey on automatic multimodal emotion recognition in the wild. In Advances in Data Science: Methodologies and Applications. Springer, 35\u201364."},{"key":"e_1_3_2_1_229_1","doi-asserted-by":"publisher","unstructured":"Shakeel Ahmad Sheikh Md Sahidullah Fabrice Hirsch and Slim Ouni. 2021. Machine Learning for Stuttering Identification: Review Challenges and Future Directions. https:\/\/doi.org\/10.48550\/ARXIV.2107.04057","DOI":"10.48550\/ARXIV.2107.04057"},{"key":"e_1_3_2_1_230_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747384"},{"key":"e_1_3_2_1_231_1","volume-title":"prisons mull AI to analyze inmate phone calls","author":"Sherfinski David","year":"2021","unstructured":"David Sherfinski and Avi Asher-Schapiro. 2021. U.S. prisons mull AI to analyze inmate phone calls. Thomson Reuters Foundation News (August 2021)."},{"key":"e_1_3_2_1_232_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413386"},{"key":"e_1_3_2_1_233_1","volume-title":"Aishell-3: A multi-speaker mandarin tts corpus and the baselines. arXiv preprint arXiv:2010.11567","author":"Shi Yao","year":"2020","unstructured":"Yao Shi, Hui Bu, Xin Xu, Shaoji Zhang, and Ming Li. 2020. Aishell-3: A multi-speaker mandarin tts corpus and the baselines. arXiv preprint arXiv:2010.11567 (2020)."},{"key":"e_1_3_2_1_234_1","unstructured":"Koichi Shinoda Sadaoki Furui [n. d.]. Tokyo Institute of Technology Multilingual Speech Corpus-Indonesian (TITML-IDN). ([n. d.])."},{"key":"e_1_3_2_1_235_1","volume-title":"Personalizing ASR for dysarthric and accented speech with limited data. arXiv preprint arXiv:1907.13511","author":"Shor Joel","year":"2019","unstructured":"Joel Shor, Dotan Emanuel, Oran Lang, Omry Tuval, Michael Brenner, Julie Cattiau, Fernando Vieira, Maeve McNally, Taylor Charbonneau, Melissa Nollstadt, 2019. Personalizing ASR for dysarthric and accented speech with limited data. arXiv preprint arXiv:1907.13511 (2019)."},{"key":"e_1_3_2_1_236_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.computel-1.3"},{"key":"e_1_3_2_1_237_1","volume-title":"Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC","author":"Sini Aghilas","year":"2018","unstructured":"Aghilas Sini, Damien Lolive, Ga\u00eblle Vidal, Marie Tahon, and \u00c9lisabeth Delais-Roussarie. 2018. Synpaflex-corpus: An expressive french audiobooks corpus dedicated to expressive speech synthesis. In Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)."},{"key":"e_1_3_2_1_238_1","volume-title":"Musan: A music, speech, and noise corpus. arXiv preprint arXiv:1510.08484","author":"Snyder David","year":"2015","unstructured":"David Snyder, Guoguo Chen, and Daniel Povey. 2015. Musan: A music, speech, and noise corpus. arXiv preprint arXiv:1510.08484 (2015)."},{"key":"e_1_3_2_1_239_1","volume-title":"Artsheets for Art Datasets. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2).","author":"Srinivasan Ramya","year":"2021","unstructured":"Ramya Srinivasan, Emily Denton, Jordan Famularo, Negar Rostamzadeh, Fernando Diaz, and Beth Coleman. 2021. Artsheets for Art Datasets. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)."},{"key":"e_1_3_2_1_240_1","volume-title":"ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2802\u20132806","author":"Lal Brij Mohan","year":"2020","unstructured":"Brij Mohan Lal Srivastava, Nathalie Vauquier, Md Sahidullah, Aur\u00e9lien Bellet, Marc Tommasi, and Emmanuel Vincent. 2020. Evaluating voice conversion-based privacy protection against informed attackers. In ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2802\u20132806."},{"key":"e_1_3_2_1_241_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2010.12.002"},{"key":"e_1_3_2_1_242_1","first-page":"922","article-title":"Physiognomic Artificial Intelligence","volume":"32","author":"Stark Luke","year":"2022","unstructured":"Luke Stark and Jevan Hutson. 2022. Physiognomic Artificial Intelligence. Fordham Intellectual Property, Media and Entertainment Law Journal 32, 4 (2022), 922.","journal-title":"Fordham Intellectual Property, Media and Entertainment Law Journal"},{"key":"e_1_3_2_1_243_1","volume-title":"Papers with Code-The latest in Machine Learning. URL: https:\/\/paperswithcode. com","author":"Stojnic Robert","year":"2022","unstructured":"Robert Stojnic, Ross Taylor, Marcin Kardas, Viktor Kerkez, and Ludovic Viaud. 2022. Papers with Code-The latest in Machine Learning. URL: https:\/\/paperswithcode. com (2022)."},{"key":"e_1_3_2_1_244_1","volume-title":"Building an ASR Error Robust Spoken Virtual Patient System in a Highly Class-Imbalanced Scenario Without Speech Data. arXiv preprint arXiv:2204.05183","author":"Sunder Vishal","year":"2022","unstructured":"Vishal Sunder, Prashant Serai, and Eric Fosler-Lussier. 2022. Building an ASR Error Robust Spoken Virtual Patient System in a Highly Class-Imbalanced Scenario Without Speech Data. arXiv preprint arXiv:2204.05183 (2022)."},{"key":"e_1_3_2_1_245_1","unstructured":"Surfingtech. [n. d.]. Free ST American English Corpus. https:\/\/openslr.magicdatatech.com\/45\/"},{"key":"e_1_3_2_1_246_1","volume-title":"JTubeSpeech: corpus of Japanese speech collected from YouTube for speech recognition and speaker verification. arXiv preprint arXiv:2112.09323","author":"Takamichi Shinnosuke","year":"2021","unstructured":"Shinnosuke Takamichi, Ludwig K\u00fcrzinger, Takaaki Saeki, Sayaka Shiota, and Shinji Watanabe. 2021. JTubeSpeech: corpus of Japanese speech collected from YouTube for speech recognition and speaker verification. arXiv preprint arXiv:2112.09323 (2021)."},{"key":"e_1_3_2_1_247_1","volume-title":"A survey on neural speech synthesis. arXiv preprint arXiv:2106.15561","author":"Tan Xu","year":"2021","unstructured":"Xu Tan, Tao Qin, Frank Soong, and Tie-Yan Liu. 2021. A survey on neural speech synthesis. arXiv preprint arXiv:2106.15561 (2021)."},{"key":"e_1_3_2_1_248_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_249_1","unstructured":"TEI. [n. d.]. TEI P5: Guidelines for Electronic Text Encoding and Interchange. ([n. d.])."},{"key":"e_1_3_2_1_250_1","doi-asserted-by":"crossref","unstructured":"Louis ten Bosch. 2000. ASR dialects and acoustic\/phonological distances.. In INTERSPEECH. 1009\u20131012.","DOI":"10.21437\/ICSLP.2000-705"},{"key":"e_1_3_2_1_251_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.440"},{"key":"e_1_3_2_1_252_1","doi-asserted-by":"publisher","DOI":"10.4324\/9780203856949.ch8"},{"key":"e_1_3_2_1_253_1","volume-title":"On-device personalization of automatic speech recognition models for disordered speech. arXiv preprint arXiv:2106.10259","author":"Tomanek Katrin","year":"2021","unstructured":"Katrin Tomanek, Fran\u00e7oise Beaufays, Julie Cattiau, Angad Chandorkar, and Khe Chai Sim. 2021. On-device personalization of automatic speech recognition models for disordered speech. arXiv preprint arXiv:2106.10259 (2021)."},{"key":"e_1_3_2_1_254_1","volume-title":"A glossary of sociolinguistics","unstructured":"Peter. Trudgill. 2003. A glossary of sociolinguistics. Oxford University Press, Oxford."},{"key":"e_1_3_2_1_255_1","volume-title":"EasyCall corpus: a dysarthric speech dataset. arXiv preprint arXiv:2104.02542","author":"Turrisi Rosanna","year":"2021","unstructured":"Rosanna Turrisi, Arianna Braccia, Marco Emanuele, Simone Giulietti, Maura Pugliatti, Mariachiara Sensi, Luciano Fadiga, and Leonardo Badino. 2021. EasyCall corpus: a dysarthric speech dataset. arXiv preprint arXiv:2104.02542 (2021)."},{"key":"e_1_3_2_1_256_1","volume-title":"William Labov.","author":"Marvin","year":"1968","unstructured":"Marvin I. Herzog Uriel Weinreich, William Labov. 1968. Empirical Foundations for a Theory of Language Change. In Directions for Historical Linguistics, Winfred P. Lehmann and Yakov Malkiel (Eds.). Univer\u2019sity of Texas Press, Austin, 95\u2013195."},{"key":"e_1_3_2_1_257_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10816-2_47"},{"key":"e_1_3_2_1_258_1","volume-title":"CSTR VCTK corpus: English multi-speaker corpus for CSTR voice cloning toolkit","author":"Veaux Christophe","unstructured":"Christophe Veaux, Junichi Yamagishi, Kirsten MacDonald, 2017. CSTR VCTK corpus: English multi-speaker corpus for CSTR voice cloning toolkit. University of Edinburgh. The Centre for Speech Technology Research (CSTR) (2017)."},{"key":"e_1_3_2_1_259_1","volume-title":"Methodological guidelines for publishing government linked data. Linking government data","author":"Villaz\u00f3n-Terrazas Boris","year":"2011","unstructured":"Boris Villaz\u00f3n-Terrazas, Luis M Vilches-Bl\u00e1zquez, Oscar Corcho, and Asunci\u00f3n G\u00f3mez-P\u00e9rez. 2011. Methodological guidelines for publishing government linked data. Linking government data (2011), 27\u201349."},{"key":"e_1_3_2_1_260_1","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2019-19"},{"key":"e_1_3_2_1_261_1","doi-asserted-by":"publisher","DOI":"10.1145\/3507657.3528558"},{"key":"e_1_3_2_1_262_1","volume-title":"Voxpopuli: A large-scale multilingual speech corpus for representation learning, semi-supervised learning and interpretation. arXiv preprint arXiv:2101.00390","author":"Wang Changhan","year":"2021","unstructured":"Changhan Wang, Morgane Riviere, Ann Lee, Anne Wu, Chaitanya Talnikar, Daniel Haziza, Mary Williamson, Juan Pino, and Emmanuel Dupoux. 2021. Voxpopuli: A large-scale multilingual speech corpus for representation learning, semi-supervised learning and interpretation. arXiv preprint arXiv:2101.00390 (2021)."},{"key":"e_1_3_2_1_263_1","volume-title":"The methodology of the social sciences","author":"Weber Max","year":"1949","unstructured":"Max Weber. 1949. \" Objectivity\" in social science and social policy. The methodology of the social sciences (1949), 49\u2013112."},{"key":"e_1_3_2_1_264_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00240"},{"key":"e_1_3_2_1_265_1","volume-title":"Learning from Strangers: The Art and Method of Qualitative Interview Studies","author":"Weiss R. S.","unstructured":"R. S. Weiss. 1995. Learning from Strangers: The Art and Method of Qualitative Interview Studies. Simon & Schuster, New York, NY."},{"key":"e_1_3_2_1_266_1","volume-title":"Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC\u201906)","author":"Westerhout Eline","year":"2006","unstructured":"Eline Westerhout and Paola Monachesi. 2006. A pilot study for a Corpus of Dutch Aphasic Speech (CoDAS). In Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC\u201906). European Language Resources Association (ELRA), Genoa, Italy."},{"key":"e_1_3_2_1_267_1","volume-title":"Emmett McQuinn, Dwight Crow, Ethan Manilow, and Jonathan Le Roux.","author":"Wichern Gordon","year":"2019","unstructured":"Gordon Wichern, Joe Antognini, Michael Flynn, Licheng Richard Zhu, Emmett McQuinn, Dwight Crow, Ethan Manilow, and Jonathan Le Roux. 2019. Wham!: Extending speech separation to noisy environments. arXiv preprint arXiv:1907.01160 (2019)."},{"key":"e_1_3_2_1_268_1","doi-asserted-by":"publisher","DOI":"10.1086\/267606"},{"key":"e_1_3_2_1_269_1","volume-title":"ASR in German: A Detailed Error Analysis. arXiv preprint arXiv:2204.05617","author":"Wirth Johannes","year":"2022","unstructured":"Johannes Wirth and Rene Peinl. 2022. ASR in German: A Detailed Error Analysis. arXiv preprint arXiv:2204.05617 (2022)."},{"key":"e_1_3_2_1_270_1","volume-title":"Huggingface\u2019s transformers: State-of-the-art natural language processing. arXiv preprint arXiv:1910.03771","author":"Wolf Thomas","year":"2019","unstructured":"Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Clement Delangue, Anthony Moi, Pierric Cistac, Tim Rault, R\u00e9mi Louf, Morgan Funtowicz, 2019. Huggingface\u2019s transformers: State-of-the-art natural language processing. arXiv preprint arXiv:1910.03771 (2019)."},{"key":"e_1_3_2_1_271_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1995.479276"},{"key":"e_1_3_2_1_272_1","volume-title":"Jiatong Shi, Ruslan Salakhutdinov, Shinji Watanabe, and Louis-Philippe Morency.","author":"Wu Peter","year":"2021","unstructured":"Peter Wu, Paul Pu Liang, Jiatong Shi, Ruslan Salakhutdinov, Shinji Watanabe, and Louis-Philippe Morency. 2021. Understanding the Tradeoffs in Client-side Privacy for Downstream Speech Tasks. In 2021 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC). IEEE, 841\u2013848."},{"key":"e_1_3_2_1_273_1","volume-title":"ATCSpeech: A multilingual pilot-controller speech corpus from real air traffic control environment. arXiv preprint arXiv:1911.11365","author":"Yang Bo","year":"2019","unstructured":"Bo Yang, Xianlong Tan, Zhengmao Chen, Bing Wang, Dan Li, Zhongping Yang, Xiping Wu, and Yi Lin. 2019. ATCSpeech: A multilingual pilot-controller speech corpus from real air traffic control environment. arXiv preprint arXiv:1911.11365 (2019)."},{"key":"e_1_3_2_1_274_1","volume-title":"Open Source MagicData-RAMC: A Rich Annotated Mandarin Conversational (RAMC) Speech Dataset. arXiv preprint arXiv:2203.16844","author":"Yang Zehui","year":"2022","unstructured":"Zehui Yang, Yifan Chen, Lei Luo, Runyan Yang, Lingxuan Ye, Gaofeng Cheng, Ji Xu, Yaohui Jin, Qingqing Zhang, Pengyuan Zhang, 2022. Open Source MagicData-RAMC: A Rich Annotated Mandarin Conversational (RAMC) Speech Dataset. arXiv preprint arXiv:2203.16844 (2022)."},{"key":"e_1_3_2_1_275_1","volume-title":"On the difficulties of automatic speech recognition for kindergarten-aged children. Interspeech 2018","author":"Yeung Gary","year":"2018","unstructured":"Gary Yeung and Abeer Alwan. 2018. On the difficulties of automatic speech recognition for kindergarten-aged children. Interspeech 2018 (2018)."},{"key":"e_1_3_2_1_276_1","volume-title":"A Frequency Normalization Technique for Kindergarten Speech Recognition Inspired by the Role of f0 in Vowel Perception. Interspeech 2019","author":"Yeung Gary","year":"2019","unstructured":"Gary Yeung and Abeer Alwan. 2019. A Frequency Normalization Technique for Kindergarten Speech Recognition Inspired by the Role of f0 in Vowel Perception. Interspeech 2019 (2019)."},{"key":"e_1_3_2_1_277_1","volume-title":"Klaus Zechner, and Keelan Evanini.","author":"Yoon Su-Youn","year":"2019","unstructured":"Su-Youn Yoon, Chong Min Lee, Klaus Zechner, and Keelan Evanini. 2019. Development of Robust Automated Scoring Models Using Adversarial Input for Oral Proficiency Assessment.. In INTERSPEECH. 1871\u20131875."},{"key":"e_1_3_2_1_278_1","volume-title":"The SLT 2021 children speech recognition challenge: Open datasets, rules and baselines. In 2021 IEEE Spoken Language Technology Workshop (SLT). IEEE, 1117\u20131123","author":"Yu Fan","year":"2021","unstructured":"Fan Yu, Zhuoyuan Yao, Xiong Wang, Keyu An, Lei Xie, Zhijian Ou, Bo Liu, Xiulin Li, and Guanqiong Miao. 2021. The SLT 2021 children speech recognition challenge: Open datasets, rules and baselines. In 2021 IEEE Spoken Language Technology Workshop (SLT). IEEE, 1117\u20131123."},{"key":"e_1_3_2_1_279_1","volume-title":"Disruptive Technologies in Information Sciences","author":"Yun Kyongsik","unstructured":"Kyongsik Yun, Joseph Osborne, Madison Lee, Thomas Lu, and Edward Chow. 2018. Automatic speech recognition for launch control center communication using recurrent neural networks with data augmentation and custom language model. In Disruptive Technologies in Information Sciences, Vol. 10652. SPIE, 1065202."},{"key":"e_1_3_2_1_280_1","volume-title":"That sounds familiar: an analysis of phonetic representations transfer across languages. arXiv preprint arXiv:2005.08118","author":"\u017belasko Piotr","year":"2020","unstructured":"Piotr \u017belasko, Laureano Moro-Vel\u00e1zquez, Mark Hasegawa-Johnson, Odette Scharenborg, and Najim Dehak. 2020. That sounds familiar: an analysis of phonetic representations transfer across languages. arXiv preprint arXiv:2005.08118 (2020)."},{"key":"e_1_3_2_1_281_1","volume-title":"LibriTTS: A corpus derived from LibriSpeech for text-to-speech. arXiv preprint arXiv:1904.02882","author":"Zen Heiga","year":"2019","unstructured":"Heiga Zen, Viet Dang, Rob Clark, Yu Zhang, Ron J Weiss, Ye Jia, Zhifeng Chen, and Yonghui Wu. 2019. LibriTTS: A corpus derived from LibriSpeech for text-to-speech. arXiv preprint arXiv:1904.02882 (2019)."},{"key":"e_1_3_2_1_282_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1259"},{"key":"e_1_3_2_1_283_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053207"},{"key":"e_1_3_2_1_284_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1996.543236"},{"key":"e_1_3_2_1_285_1","volume-title":"The latest leap in HireVue\u2019s assessment technology. HireVue (September","author":"Zuloaga Lindsey","year":"2021","unstructured":"Lindsey Zuloaga. 2021. The latest leap in HireVue\u2019s assessment technology. HireVue (September 2021)."},{"key":"e_1_3_2_1_286_1","volume-title":"Iuliia Nigmatulina","author":"Zuluaga-Gomez Juan","year":"2022","unstructured":"Juan Zuluaga-Gomez, Karel Vesel\u1ef3, Igor Sz\u00f6ke, Petr Motlicek, Martin Kocour, Mickael Rigault, Khalid Choukri, Amrutha Prasad, Seyyed Saeed Sarfjoo, Iuliia Nigmatulina, 2022. ATCO2 corpus: A Large-Scale Dataset for Research on Automatic Speech Recognition and Natural Language Understanding of Air Traffic Control Communications. arXiv preprint arXiv:2211.04054 (2022)."}],"event":{"name":"FAccT '23: the 2023 ACM Conference on Fairness, Accountability, and Transparency","location":"Chicago IL USA","acronym":"FAccT '23"},"container-title":["2023 ACM Conference on Fairness Accountability and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3593013.3594049","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3593013.3594049","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:18Z","timestamp":1750178238000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3593013.3594049"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,12]]},"references-count":285,"alternative-id":["10.1145\/3593013.3594049","10.1145\/3593013"],"URL":"https:\/\/doi.org\/10.1145\/3593013.3594049","relation":{},"subject":[],"published":{"date-parts":[[2023,6,12]]},"assertion":[{"value":"2023-06-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}