{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T10:35:55Z","timestamp":1763202955400,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,27]],"date-time":"2024-10-27T00:00:00Z","timestamp":1729987200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,27]]},"DOI":"10.1145\/3663548.3688536","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T18:37:25Z","timestamp":1729449445000},"page":"1-5","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Self-supervised learning using unlabeled speech with multiple types of speech disorder for disordered speech recognition"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9808-0250","authenticated-orcid":false,"given":"Ryoichi","family":"Takashima","sequence":"first","affiliation":[{"name":"Kobe University, Graduate School of System Informatics, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1278-7088","authenticated-orcid":false,"given":"Takeru","family":"Otani","sequence":"additional","affiliation":[{"name":"Kobe University, Faculty of Engineering, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6492-3754","authenticated-orcid":false,"given":"Ryo","family":"Aihara","sequence":"additional","affiliation":[{"name":"Mitsubishi Electric Corporation, Information Technology R&amp;D Center, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5005-7679","authenticated-orcid":false,"given":"Tetsuya","family":"Takiguchi","sequence":"additional","affiliation":[{"name":"Kobe University, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7791-6617","authenticated-orcid":false,"given":"Shinya","family":"Taguchi","sequence":"additional","affiliation":[{"name":"Mitsubishi Electric Corporation, Information Technology R&amp;D Center, Japan"}]}],"member":"320","published-online":{"date-parts":[[2024,10,27]]},"reference":[{"volume-title":"wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations","author":"Baevski Alexei","key":"e_1_3_2_1_1_1","unstructured":"Alexei Baevski, Yuhao Zhou, Abdelrahman Mohamed, and Michael Auli. 2020. wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations. In NeurIPS. Curran Associates Inc., 12449\u201312460."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Murali\u00a0Karthick Baskar Tim Herzig Diana Nguyen Mireia Diez Tim Polzehl Lukas Burget and Jan \u010cernock\u00fd. 2022. Speaker adaptation for Wav2vec2 based dysarthric ASR. In Interspeech. ISCA 3403\u20133407.","DOI":"10.21437\/Interspeech.2022-10896"},{"volume-title":"Cleft Lip and Palate: Diagnosis and Management","author":"Berkowitz Samuel","key":"e_1_3_2_1_3_1","unstructured":"Samuel Berkowitz. 2006. Cleft Lip and Palate: Diagnosis and Management. Springer."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Chitralekha Bhat Ashish Panda and Helmer Strik. 2022. Improved ASR Performance for Dysarthric Speech Using Two-stage Data Augmentation. In Interspeech. ISCA 46\u201350.","DOI":"10.21437\/Interspeech.2022-10335"},{"key":"e_1_3_2_1_5_1","volume-title":"NeurIPS, Vol.\u00a033","author":"Brown Tom","year":"1877","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, 2020. Language Models are Few-Shot Learners. In NeurIPS, Vol.\u00a033. Curran Associates Inc., 1877\u20131901."},{"volume-title":"Unsupervised learning of visual features by contrasting cluster assignments","author":"Caron Mathilde","key":"e_1_3_2_1_6_1","unstructured":"Mathilde Caron, Ishan Misra, Julien Mairal, Priya Goyal, Piotr Bojanowski, and Armand Joulin. 2020. Unsupervised learning of visual features by contrasting cluster assignments. In NeurIPS. Curran Associates Inc., 1\u201313."},{"volume-title":"Emerging Properties in Self-Supervised Vision Transformers","author":"Caron Mathilde","key":"e_1_3_2_1_7_1","unstructured":"Mathilde Caron, Hugo Touvron, Ishan Misra, Herv\u00e9 J\u00e9gou, Julien Mairal, Piotr Bojanowski, and Armand Joulin. 2021. Emerging Properties in Self-Supervised Vision Transformers. In ICCV. IEEE, 9630\u20139640."},{"key":"e_1_3_2_1_8_1","unstructured":"Ting Chen Simon Kornblith Mohammad Norouzi and Geoffrey Hinton. 2020. A simple framework for contrastive learning of visual representations. In ICML. 1597\u20131607."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Xinlei Chen and Kaiming He. 2021. Exploring Simple Siamese Representation Learning. In CVPR. 15745\u201315753.","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Yu-An Chung Wei-Ning Hsu Hao Tang and James Glass. 2019. An unsupervised autoregressive model for speech representation learning. In Interspeech. ISCA 146\u2013150.","DOI":"10.21437\/Interspeech.2019-1473"},{"key":"e_1_3_2_1_11_1","volume-title":"Unsupervised Cross-lingual Representation Learning for Speech Recognition. arXiv abs\/2006.13979","author":"Conneau Alexis","year":"2020","unstructured":"Alexis Conneau, Alexei Baevski, Ronan Collobert, Abdelrahman Mohamed, and Michael Auli. 2020. Unsupervised Cross-lingual Representation Learning for Speech Recognition. arXiv abs\/2006.13979 (2020), 1\u201312."},{"key":"e_1_3_2_1_12_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT. Association for Computational Linguistics, 4171\u20134186."},{"key":"e_1_3_2_1_13_1","unstructured":"Kento Fujiwara Ryoichi Takashima Chihiro Sugiyama Nobukazu Tanaka Kanji Nohara Kazunori Nozaki and Tetsuya Takiguchi. 2021. Data Augmentation Based on Frequency Warping for Recognition of Cleft Palate Speech. In APSIPA ASC. APSIPA 471\u2013476."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Abner Hernandez Paula\u00a0Andrea P\u00e9rez-Toro Elmar Noeth Juan\u00a0Rafael Orozco-Arroyave Andreas Maier and Seung\u00a0Hee Yang. 2022. Cross-lingual Self-Supervised Speech Representations for Improved Dysarthric Speech Recognition. In Interspeech. ISCA 51\u201355.","DOI":"10.21437\/Interspeech.2022-10674"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"e_1_3_2_1_16_1","volume-title":"Improving Transformer-based Speech Recognition Using Unsupervised Pre-training. ArXiv abs\/1910.09932","author":"Jiang Dongwei","year":"2019","unstructured":"Dongwei Jiang, Xiaoning Lei, Wubo Li, Ne Luo, Yuxuan Hu, Wei Zou, and Xiangang Li. 2019. Improving Transformer-based Speech Recognition Using Unsupervised Pre-training. ArXiv abs\/1910.09932 (2019), 1\u20135."},{"volume-title":"Simulating Dysarthric Speech for Training Data Augmentation in Clinical Speech Applications","author":"Jiao Yishan","key":"e_1_3_2_1_17_1","unstructured":"Yishan Jiao, Ming Tu, Visar Berisha, and Julie Liss. 2018. Simulating Dysarthric Speech for Training Data Augmentation in Clinical Speech Applications. In ICASSP. IEEE, 6009\u20136013."},{"key":"e_1_3_2_1_18_1","unstructured":"Heejin Kim Mark Hasegawa-Johnson Adrienne Perlman Jon Gunderson Thomas\u00a0S. Huang Kenneth Watkin and Simone Frame. 2008. Dysarthric speech database for universal access research. In Interspeech. ISCA 1741\u20131744."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/0167-6393(90)90011-W"},{"key":"e_1_3_2_1_20_1","volume-title":"ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. In ICLR. 1\u201317.","author":"Lan Zhenzhong","year":"2020","unstructured":"Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. 2020. ALBERT: A Lite BERT for Self-supervised Learning of Language Representations. In ICLR. 1\u201317."},{"key":"e_1_3_2_1_21_1","volume-title":"Corpus of Spontaneous Japanese: Its Design and Evaluation. In proceedings of the ISCA and IEEE Workshop on Spontaneous Speech Processing and Recognition (SSPR","author":"Maekawa Kikuo","year":"2003","unstructured":"Kikuo Maekawa. 2003. Corpus of Spontaneous Japanese: Its Design and Evaluation. In proceedings of the ISCA and IEEE Workshop on Spontaneous Speech Processing and Recognition (SSPR 2003). 7\u201312."},{"key":"e_1_3_2_1_22_1","volume-title":"Cerebral Palsy: Hope Through Research","author":"National\u00a0Institute of Neurological\u00a0Disorders, Stroke (U.S.).\u00a0Office of Communications, and Public Liaison.","year":"2009","unstructured":"National\u00a0Institute of Neurological\u00a0Disorders, Stroke (U.S.).\u00a0Office of Communications, and Public Liaison. 2009. Cerebral Palsy: Hope Through Research. National Institute of Neurological Disorders and Stroke, National Institutes of Health."},{"volume-title":"Unsupervised pretraining transfers well across languages","author":"Rivi\u00e8re Morgane","key":"e_1_3_2_1_23_1","unstructured":"Morgane Rivi\u00e8re, Armand Joulin, Pierre-Emmanuel Mazar\u00e9, and Emmanuel Dupoux. 2020. Unsupervised pretraining transfers well across languages. In ICASSP. IEEE, 7414\u20137418."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-011-9145-0"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"George Saon Gakuto Kurata Tom Sercu Kartik Audhkhasi Samuel Thomas Dimitrios Dimitriadis Xiaodong Cui Bhuvana Ramabhadran Michael Picheny Lynn-Li Lim Bergul Roomi and Phil Hall. 2017. English Conversational Telephone Speech Recognition by Humans and Machines. In Interspeech. ISCA 132\u2013136.","DOI":"10.21437\/Interspeech.2017-405"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Joel Shor Dotan Emanuel Oran Lang Omry Tuval Michael Brenner Julie Cattiau Fernando Vieira Maeve McNally Taylor Charbonneau Melissa Nollstadt Avinatan Hassidim and Yossi Matias. 2019. Personalizing ASR for Dysarthric and Accented Speech with Limited Data. In Interspeech. ISCA 784\u2013788.","DOI":"10.21437\/Interspeech.2019-1427"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3374874"},{"volume-title":"Two-Step Acoustic Model Adaptation for Dysarthric Speech Recognition","author":"Takashima Ryoichi","key":"e_1_3_2_1_28_1","unstructured":"Ryoichi Takashima, Tetsuya Takiguchi, and Yasuo Ariki. 2020. Two-Step Acoustic Model Adaptation for Dysarthric Speech Recognition. In ICASSP. IEEE, 6104\u20136108."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2951856"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Bhavik Vachhani Chitralekha Bhat and Sunil\u00a0Kumar Kopparapu. 2018. Data Augmentation Using Healthy Speech for Dysarthric Speech Recognition. In Interspeech. ISCA 471\u2013475.","DOI":"10.21437\/Interspeech.2018-1751"},{"key":"e_1_3_2_1_31_1","volume-title":"Representation Learning with Contrastive Predictive Coding. arXiv abs\/1807.03748","author":"van\u00a0den Oord A\u00e4ron","year":"2018","unstructured":"A\u00e4ron van\u00a0den Oord, Yazhe Li, and Oriol Vinyals. 2018. Representation Learning with Contrastive Predictive Coding. arXiv abs\/1807.03748 (2018), 1\u201313."},{"volume-title":"NeurIPS, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates","author":"Vaswani Ashish","key":"e_1_3_2_1_32_1","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141\u00a0ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In NeurIPS, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates, Inc."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Lester\u00a0Phillip Violeta Wen\u00a0Chin Huang and Tomoki Toda. 2022. Investigating Self-supervised Pretraining Frameworks for Pathological Speech Recognition. In Interspeech. ISCA 41\u201345.","DOI":"10.21437\/Interspeech.2022-10043"},{"volume-title":"Unsupervised Pre-Training of Bidirectional Speech Encoders via Masked Reconstruction","author":"Wang Weiran","key":"e_1_3_2_1_34_1","unstructured":"Weiran Wang, Qingming Tang, and Karen Livescu. 2020. Unsupervised Pre-Training of Bidirectional Speech Encoders via Masked Reconstruction. In ICASSP. IEEE, 6889\u20136893."},{"volume-title":"Phonetic Analysis of Dysarthric Speech Tempo and Applications to Robust Personalised Dysarthric Speech Recognition","author":"Xiong Feifei","key":"e_1_3_2_1_35_1","unstructured":"Feifei Xiong, Jon Barker, and Heidi Christensen. 2019. Phonetic Analysis of Dysarthric Speech Tempo and Applications to Robust Personalised Dysarthric Speech Recognition. In ICASSP. IEEE, 5836\u20135840."},{"volume-title":"Source Domain Data Selection for Improved Transfer Learning Targeting Dysarthric Speech Recognition","author":"Xiong Feifei","key":"e_1_3_2_1_36_1","unstructured":"Feifei Xiong, Jon Barker, Zhengjun Yue, and Heidi Christensen. 2020. Source Domain Data Selection for Improved Transfer Learning Targeting Dysarthric Speech Recognition. In ICASSP. IEEE, 7424\u20137428."},{"volume-title":"The Microsoft 2017 Conversational Speech Recognition System","author":"Xiong Wayne","key":"e_1_3_2_1_37_1","unstructured":"Wayne Xiong, Lijun Wu, Fil\u00a0A Alleva, Jasha Droppo, Xuedong Huang, and Andreas Stolcke. 2018. The Microsoft 2017 Conversational Speech Recognition System. In ICASSP. IEEE, 5934\u20135938."}],"event":{"name":"ASSETS '24: The 26th International ACM SIGACCESS Conference on Computers and Accessibility","sponsor":["SIGACCESS ACM Special Interest Group on Accessible Computing"],"location":"St. John's NL Canada","acronym":"ASSETS '24"},"container-title":["The 26th International ACM SIGACCESS Conference on Computers and Accessibility"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3663548.3688536","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:59Z","timestamp":1750294679000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3663548.3688536"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,27]]},"references-count":37,"alternative-id":["10.1145\/3663548.3688536","10.1145\/3663548"],"URL":"https:\/\/doi.org\/10.1145\/3663548.3688536","relation":{},"subject":[],"published":{"date-parts":[[2024,10,27]]},"assertion":[{"value":"2024-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}