{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T16:57:57Z","timestamp":1773507477401,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":68,"publisher":"ACM","license":[{"start":{"date-parts":[[2027,3,22]],"date-time":"2027-03-22T00:00:00Z","timestamp":1805673600000},"content-version":"vor","delay-in-days":365,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2326406"],"award-info":[{"award-number":["2326406"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,23]]},"DOI":"10.1145\/3742413.3789167","type":"proceedings-article","created":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T11:32:24Z","timestamp":1772537544000},"page":"1704-1721","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["VoiceAlign: A Shimming Layer for Enhancing the Usability of Legacy Voice User Interface Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8803-2100","authenticated-orcid":false,"given":"Md","family":"Ehtesham-Ul-Haque","sequence":"first","affiliation":[{"name":"College of Information Sciences and Technology, Pennsylvania State University, University Park, Pennsylvania, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5063-3808","authenticated-orcid":false,"given":"Syed Masum","family":"Billah","sequence":"additional","affiliation":[{"name":"College of Information Sciences and Technology, Pennsylvania State University, University Park, Pennsylvania, USA"}]}],"member":"320","published-online":{"date-parts":[[2026,3,22]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445122"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300233"},{"key":"e_1_3_3_2_4_2","unstructured":"Apple. 2023. Voice Control. https:\/\/support.apple.com\/en-us\/HT210417. [Accessed 13-11-2023]."},{"key":"e_1_3_3_2_5_2","unstructured":"Noah Apthorpe Danny\u00a0Yuxing Huang Dillon Reisman Arvind Narayanan and Nick Feamster. 2018. Keeping the smart home private with smart (er) iot traffic shaping. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1812.00955 (2018)."},{"key":"e_1_3_3_2_6_2","volume-title":"Analyzing Qualitative Data","author":"Bryman A.","year":"1994","unstructured":"A. Bryman and R.G. Burgess. 1994. Analyzing Qualitative Data. Routledge. https:\/\/books.google.com\/books?id=KQkotSd9YWkC"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2012.6289036"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511620539"},{"key":"e_1_3_3_2_9_2","first-page":"479","volume-title":"Ninth Australian International Conference on Speech Science and Technology","author":"Collings Penny","year":"2002","unstructured":"Penny Collings, David Walker, and Michael Wagner. 2002. Usability Evaluation of a Commercial Dictation System. In Ninth Australian International Conference on Speech Science and Technology. Australian Speech Science and Technology Associatc, 479\u2013484."},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"crossref","unstructured":"Liwei Dai Rich Goldman Andrew Sears and Jeremy Lozier. 2003. Speech-based cursor control: a study of grid-based solutions. ACM SIGACCESS Accessibility and Computing77-78 (2003) 94\u2013101.","DOI":"10.1145\/1029014.1028648"},{"key":"e_1_3_3_2_11_2","volume-title":"Unsloth","author":"Daniel\u00a0Han Michael\u00a0Han","year":"2023","unstructured":"Michael\u00a0Han Daniel\u00a0Han and Unsloth team. 2023. Unsloth. http:\/\/github.com\/unslothai\/unsloth"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-017-2367-1_5"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445188"},{"key":"e_1_3_3_2_14_2","volume-title":"Design patterns: elements of reusable object-oriented software","author":"Gamma Erich","year":"1995","unstructured":"Erich Gamma, Richard Helm, Ralph Johnson, and John Vlissides. 1995. Design patterns: elements of reusable object-oriented software. Pearson Deutschland GmbH."},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173977"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376173"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"crossref","unstructured":"Debjyoti Ghosh Can Liu Shengdong Zhao and Kotaro Hara. 2020. Commanding and re-dictation: Developing eyes-free voice-based interaction for editing dictated text. ACM Transactions on Computer-Human Interaction (TOCHI) 27 4 (2020) 1\u201331.","DOI":"10.1145\/3390889"},{"key":"e_1_3_3_2_18_2","unstructured":"Google. 2023. Voice Access. https:\/\/support.google.com\/accessibility\/android\/answer\/6151848?hl=en. [Accessed 13-11-2023]."},{"key":"e_1_3_3_2_19_2","first-page":"1","volume-title":"INTERACT","author":"Halverson Christine\u00a0A","year":"1999","unstructured":"Christine\u00a0A Halverson, Daniel\u00a0B Horn, Clare-Marie Karat, and John Karat. 1999. The beauty of errors: Patterns of error correction in desktop speech systems.. In INTERACT , Vol.\u00a099. 1\u20138."},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139644082"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300461"},{"key":"e_1_3_3_2_22_2","unstructured":"Edward\u00a0J Hu Yelong Shen Phillip Wallis Zeyuan Allen-Zhu Yuanzhi Li Shean Wang Lu Wang Weizhu Chen et\u00a0al. 2022. Lora: Low-rank adaptation of large language models.ICLR 1 2 (2022) 3."},{"key":"e_1_3_3_2_23_2","first-page":"6763","volume-title":"Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)","author":"Jamara Rashad\u00a0Albo","year":"2021","unstructured":"Rashad\u00a0Albo Jamara, Nico Herbig, Antonio Kr\u00fcger, and Josef van Genabith. 2021. Mid-air hand gestures for post-editing of machine translation. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers). 6763\u20136773."},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/302979.303160"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICMI.2002.1167020"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2011-236"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"crossref","unstructured":"Josephine Lau Benjamin Zimmerman and Florian Schaub. 2018. Alexa are you listening? Privacy perceptions concerns and privacy-seeking behaviors with smart speakers. Proceedings of the ACM on Human-Computer Interaction 2 CSCW (2018) 1\u201331.","DOI":"10.1145\/3274371"},{"key":"e_1_3_3_2_28_2","unstructured":"Moxin Li Wenjie Wang Fuli Feng Fengbin Zhu Qifan Wang and Tat-Seng Chua. 2024. Think twice before assure: Confidence estimation for large language models through reflection on multiple answers. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.09972 (2024)."},{"key":"e_1_3_3_2_29_2","first-page":"74","volume-title":"Text summarization branches out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74\u201381."},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642217"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"crossref","unstructured":"Can Liu Siying Hu Li Feng and Mingming Fan. 2022. Typist Experiment: an Investigation of Human-to-Human Dictation via Role-play to Inform Voice-based Text Authoring. Proceedings of the ACM on Human-Computer Interaction 6 CSCW2 (2022) 1\u201333.","DOI":"10.1145\/3555758"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-9323-9_15"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"crossref","unstructured":"Nathan Malkin Joe Deatrick Allen Tong Primal Wijesekera Serge Egelman and David Wagner. 2019. Privacy attitudes of smart speaker users. Proceedings on Privacy Enhancing Technologies 2019 4 (2019).","DOI":"10.2478\/popets-2019-0068"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.21437\/ICSLP.1994-339"},{"key":"e_1_3_3_2_35_2","unstructured":"Microsoft. 2023. Voice Access. https:\/\/support.microsoft.com\/en-us\/topic\/get-started-with-voice-access-bd2aa2dc-46c2-486c-93ae-3d75f7d053a4. [Accessed 13-11-2023]."},{"key":"e_1_3_3_2_36_2","unstructured":"Nuance. 2023. Dragon Speech Recognition. https:\/\/www.nuance.com\/dragon.html. [Accessed 13-11-2023]."},{"key":"e_1_3_3_2_37_2","unstructured":"OpenAI. 2025. ChatGPT on your desktop. https:\/\/openai.com\/chatgpt\/desktop\/ Accessed: 2025-04-10."},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"crossref","unstructured":"Sharon Oviatt. 1997. Mulitmodal interactive maps: Designing for human performance. Human\u2013Computer Interaction 12 1-2 (1997) 93\u2013129.","DOI":"10.1207\/s15327051hci1201&2_4"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"crossref","unstructured":"Sharon Oviatt. 2000. Taming recognition errors with a multimodal interface. Commun. ACM 43 9 (2000) 45\u201351.","DOI":"10.1145\/348941.348979"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"crossref","unstructured":"Sharon Oviatt and Philip Cohen. 2000. Perceptual user interfaces: multimodal interfaces that process what comes naturally. Commun. ACM 43 3 (2000) 45\u201353.","DOI":"10.1145\/330534.330538"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"crossref","unstructured":"Sharon Oviatt Phil Cohen Lizhong Wu Lisbeth Duncan Bernhard Suhm Josh Bers Thomas Holzman Terry Winograd James Landay Jim Larson et\u00a0al. 2000. Designing the user interface for multimodal speech and pen-based gesture applications: State-of-the-art systems and future research directions. Human-computer interaction 15 4 (2000) 263\u2013322.","DOI":"10.1207\/S15327051HCI1504_1"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"crossref","unstructured":"Sharon Oviatt and Philip\u00a0R Cohen. 2015. The Paradigm Shift to Multimodality in Contemporary Computer Interfaces. Morgan & Claypool Publishers.","DOI":"10.1007\/978-3-031-02213-5"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSLP.1996.607077"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","DOI":"10.1145\/3338286.3340120"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3236112.3236130"},{"key":"e_1_3_3_2_46_2","unstructured":"Ashwin Ram Rohit Prasad Chandra Khatri Anu Venkatesh Raefer Gabriel Qing Liu Jeff Nunn Behnam Hedayatnia Ming Cheng Ashish Nagar et\u00a0al. 2018. Conversational ai: The science behind the alexa prize. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1801.03604 (2018)."},{"key":"e_1_3_3_2_47_2","volume-title":"Introducing the Model Context Protocol","author":"Research Anthropic","year":"2024","unstructured":"Anthropic Research. 2024. Introducing the Model Context Protocol. Technical Report. Anthropic. https:\/\/www.anthropic.com\/news\/model-context-protocol"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.wmt-1.40"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"crossref","unstructured":"Andrew Sears Jinhuan Feng Kwesi Oseitutu and Claire-Marie Karat. 2003. Hands-free speech-based navigation during dictation: difficulties consequences and solutions. Human-computer interaction 18 3 (2003) 229\u2013257.","DOI":"10.1207\/S15327051HCI1803_2"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376579"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2010.5700825"},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"publisher","DOI":"10.1145\/2388676.2388793"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300433"},{"key":"e_1_3_3_2_54_2","unstructured":"SnapLogic. 2023. Legacy tech upgrades cost the average business nearly $3M last year. https:\/\/www.ciodive.com\/news\/legacy-technology-technical-debt-costs-enterprise-data-AI\/721885\/ Accessed: 2025-04-10."},{"key":"e_1_3_3_2_55_2","first-page":"1087","volume-title":"Interspeech","author":"Sperber Matthias","year":"2013","unstructured":"Matthias Sperber, Graham Neubig, Christian F\u00fcgen, Satoshi Nakamura, and Alex Waibel. 2013. Efficient speech transcription through respeaking.. In Interspeech. 1087\u20131091."},{"key":"e_1_3_3_2_56_2","unstructured":"OrCam Staff. [n. d.]. How Voice-Activated Technology Improves the Lives of Blind People - OrCam \u2014 orcam.com. https:\/\/www.orcam.com\/en-us\/blog\/how-voice-activated-technology-improves-the-lives-of-blind-people. [Accessed 03-04-2024]."},{"key":"e_1_3_3_2_57_2","doi-asserted-by":"crossref","unstructured":"Bernhard Suhm Brad Myers and Alex Waibel. 2001. Multimodal error correction for speech user interfaces. ACM transactions on computer-human interaction (TOCHI) 8 1 (2001) 60\u201398.","DOI":"10.1145\/371127.371166"},{"key":"e_1_3_3_2_58_2","first-page":"37","volume-title":"Psychology of learning and motivation","author":"Sweller John","year":"2011","unstructured":"John Sweller. 2011. Cognitive load theory. In Psychology of learning and motivation. Vol.\u00a055. Elsevier, 37\u201376."},{"key":"e_1_3_3_2_59_2","doi-asserted-by":"crossref","unstructured":"Andries Van\u00a0Dam. 1997. Post-WIMP user interfaces. Commun. ACM 40 2 (1997) 63\u201367.","DOI":"10.1145\/253671.253708"},{"key":"e_1_3_3_2_60_2","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2009.5373347"},{"key":"e_1_3_3_2_61_2","doi-asserted-by":"publisher","DOI":"10.1145\/3027063.3027066"},{"key":"e_1_3_3_2_62_2","doi-asserted-by":"crossref","unstructured":"DeLiang Wang and Jitong Chen. 2018. Supervised speech separation based on deep learning: An overview. IEEE\/ACM transactions on audio speech and language processing 26 10 (2018) 1702\u20131726.","DOI":"10.1109\/TASLP.2018.2842159"},{"key":"e_1_3_3_2_63_2","doi-asserted-by":"crossref","unstructured":"Dianlei Xu Tong Li Yong Li Xiang Su Sasu Tarkoma Tao Jiang Jon Crowcroft and Pan Hui. 2021. Edge intelligence: Empowering intelligence to the edge of network. Proc. IEEE 109 11 (2021) 1778\u20131837.","DOI":"10.1109\/JPROC.2021.3119950"},{"key":"e_1_3_3_2_64_2","doi-asserted-by":"crossref","unstructured":"Yong Xu Jun Du Li-Rong Dai and Chin-Hui Lee. 2014. A regression approach to speech enhancement based on deep neural networks. IEEE\/ACM transactions on audio speech and language processing 23 1 (2014) 7\u201319.","DOI":"10.1109\/TASLP.2014.2364452"},{"key":"e_1_3_3_2_65_2","doi-asserted-by":"crossref","unstructured":"Lishuang Zhan Tianyang Xiong Hongwei Zhang Shihui Guo Xiaowei Chen Jiangtao Gong Juncong Lin and Yipeng Qin. 2024. TouchEditor: Interaction design and evaluation of a flexible touchpad for text editing of head-mounted displays in speech-unfriendly environments. Proceedings of the ACM on Interactive Mobile Wearable and Ubiquitous Technologies 7 4 (2024) 1\u201329.","DOI":"10.1145\/3631454"},{"key":"e_1_3_3_2_66_2","first-page":"41092","volume-title":"International Conference on Machine Learning","author":"Zhang Biao","year":"2023","unstructured":"Biao Zhang, Barry Haddow, and Alexandra Birch. 2023. Prompting large language model for machine translation: A case study. In International Conference on Machine Learning. PMLR, 41092\u201341110."},{"key":"e_1_3_3_2_67_2","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474742"},{"key":"e_1_3_3_2_68_2","doi-asserted-by":"publisher","DOI":"10.1145\/3490099.3511103"},{"key":"e_1_3_3_2_69_2","doi-asserted-by":"crossref","unstructured":"Shengdong Zhao Felicia Tan and Katherine Fennedy. 2023. Heads-Up Computing Moving Beyond the Device-Centered Paradigm. Commun. ACM 66 9 (2023) 56\u201363.","DOI":"10.1145\/3571722"}],"event":{"name":"IUI '26: 31st International Conference on Intelligent User Interfaces","location":"Paphos Cyprus","acronym":"IUI '26","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGAI ACM Special Interest Group on Artificial Intelligence"]},"container-title":["Proceedings of the 31st International Conference on Intelligent User Interfaces"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3742413.3789167","content-type":"text\/html","content-version":"vor","intended-application":"syndication"}],"deposited":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T12:55:30Z","timestamp":1773492930000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3742413.3789167"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,22]]},"references-count":68,"alternative-id":["10.1145\/3742413.3789167","10.1145\/3742413"],"URL":"https:\/\/doi.org\/10.1145\/3742413.3789167","relation":{},"subject":[],"published":{"date-parts":[[2026,3,22]]},"assertion":[{"value":"2026-03-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}