{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T06:55:33Z","timestamp":1781592933229,"version":"3.54.5"},"reference-count":289,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Engineering Applications of Artificial Intelligence"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.engappai.2026.115233","type":"journal-article","created":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T12:41:02Z","timestamp":1780317662000},"page":"115233","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"P1","title":["Multimodal human\u2013computer interaction: A panoptic view"],"prefix":"10.1016","volume":"179","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5556-9451","authenticated-orcid":false,"given":"Thushara","family":"B.","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8977-1984","authenticated-orcid":false,"given":"Adithya","family":"V.","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"N.S.","family":"Sreekanth","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"issue":"10","key":"10.1016\/j.engappai.2026.115233_b1","doi-asserted-by":"crossref","first-page":"1533","DOI":"10.1109\/TASLP.2014.2339736","article-title":"Convolutional neural networks for speech recognition","volume":"22","author":"Abdel-Hamid","year":"2014","journal-title":"IEEE\/ACM Trans. Audio, Speech, Lang. Process."},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b2","doi-asserted-by":"crossref","DOI":"10.1111\/exsy.12398","article-title":"Head mouse control system for people with disabilities","volume":"37","author":"Abiyev","year":"2020","journal-title":"Expert Syst."},{"key":"10.1016\/j.engappai.2026.115233_b3","series-title":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"6403","article-title":"Opportunistic sensing with MIC arrays on smart speakers for distal interaction and exercise tracking","author":"Agarwal","year":"2018"},{"key":"10.1016\/j.engappai.2026.115233_b4","doi-asserted-by":"crossref","unstructured":"Ahn, S., Lee, G., 2019. Gaze-assisted typing for smart glasses. In: Proceedings of the 32nd Annual ACM Symposium on User Interface Software and Technology. pp. 857\u2013869.","DOI":"10.1145\/3332165.3347883"},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b5","doi-asserted-by":"crossref","first-page":"103","DOI":"10.3390\/mi11010103","article-title":"Smart tactile sensing systems based on embedded CNN implementations","volume":"11","author":"Alameh","year":"2020","journal-title":"Micromachines"},{"key":"10.1016\/j.engappai.2026.115233_b6","first-page":"331","article-title":"Human ability improvement with wireless sensors in human computer interaction","volume":"8","author":"Alao","year":"2019","journal-title":"Int. J. Comput. Appl. Technol. Res"},{"key":"10.1016\/j.engappai.2026.115233_b7","doi-asserted-by":"crossref","first-page":"19265","DOI":"10.1007\/s11042-020-08763-3","article-title":"Using olfactory media cues in e-learning\u2013perspectives from an empirical investigation","volume":"79","author":"Alkasasbeh","year":"2020","journal-title":"Multimedia Tools Appl."},{"key":"10.1016\/j.engappai.2026.115233_b8","series-title":"Amazon alexa \u2013 learn what alexa can do \u2014 Amazon.com","author":"AmazonAlexa","year":"2024"},{"key":"10.1016\/j.engappai.2026.115233_b9","series-title":"2020 IEEE\/RSJ International Conference on Intelligent Robots and Systems","first-page":"4249","article-title":"Uncertainty aware texture classification and mapping using soft tactile sensors","author":"Amini","year":"2020"},{"key":"10.1016\/j.engappai.2026.115233_b10","series-title":"Gsr analysis for stress: Development and validation of an open source tool for noisy naturalistic gsr data","author":"Aqajari","year":"2020"},{"key":"10.1016\/j.engappai.2026.115233_b11","doi-asserted-by":"crossref","first-page":"225","DOI":"10.1007\/s00779-011-0395-z","article-title":"Gesture recognition using RFID technology","volume":"16","author":"Asadzadeh","year":"2012","journal-title":"Pers. Ubiquitous Comput."},{"issue":"3","key":"10.1016\/j.engappai.2026.115233_b12","doi-asserted-by":"crossref","DOI":"10.1088\/2631-8695\/ad61bd","article-title":"Optimal path planning using bidirectional rapidly-exploring random tree star-dynamic window approach (BRRT*-DWA) with adaptive Monte Carlo localization (AMCL) for mobile robot","volume":"6","author":"Ayalew","year":"2024","journal-title":"Eng. Res. Express"},{"key":"10.1016\/j.engappai.2026.115233_b13","first-page":"13","article-title":"Systematic review of multimodal human\u2013computer interaction","volume":"vol. 9","author":"Azofeifa","year":"2022"},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b14","doi-asserted-by":"crossref","first-page":"214","DOI":"10.3390\/s150100214","article-title":"Evaluation of the leap motion controller as a new contact-free pointing device","volume":"15","author":"Bachmann","year":"2014","journal-title":"Sensors"},{"issue":"2","key":"10.1016\/j.engappai.2026.115233_b15","doi-asserted-by":"crossref","first-page":"77","DOI":"10.1007\/s41060-016-0008-z","article-title":"Recent methods in vision-based hand gesture recognition","volume":"1","author":"Badi","year":"2016","journal-title":"Int. J. Data Sci. Anal."},{"key":"10.1016\/j.engappai.2026.115233_b16","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume":"33","author":"Baevski","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"3","key":"10.1016\/j.engappai.2026.115233_b17","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1109\/MSP.2009.932166","article-title":"Developments and directions in speech recognition and understanding, Part 1 [DSP Education]","volume":"26","author":"Baker","year":"2009","journal-title":"IEEE Signal Process. Mag."},{"key":"10.1016\/j.engappai.2026.115233_b18","series-title":"Smart Education and E-Learning 2020","first-page":"393","article-title":"Smart universities: gesture recognition systems for college students with disabilities","author":"Bakken","year":"2020"},{"key":"10.1016\/j.engappai.2026.115233_b19","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2024.108443","article-title":"Ultra-range gesture recognition using a web-camera in human\u2013robot interaction","volume":"132","author":"Bamani","year":"2024","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.engappai.2026.115233_b20","series-title":"2016 IEEE ANDESCON","first-page":"1","article-title":"A case study of speech recognition in spanish: from conventional to deep approach","author":"Becerra","year":"2016"},{"key":"10.1016\/j.engappai.2026.115233_b21","doi-asserted-by":"crossref","first-page":"30509","DOI":"10.1007\/s11042-020-09004-3","article-title":"Vision-based human activity recognition: a survey","volume":"79","author":"Beddiar","year":"2020","journal-title":"Multimedia Tools Appl."},{"issue":"5","key":"10.1016\/j.engappai.2026.115233_b22","doi-asserted-by":"crossref","first-page":"418","DOI":"10.1080\/10447318.2020.1823688","article-title":"Integrating mobile multimodal interactions based on programming by demonstration","volume":"37","author":"Bellal","year":"2021","journal-title":"Int. J. Human\u2013Computer Interact."},{"key":"10.1016\/j.engappai.2026.115233_b23","doi-asserted-by":"crossref","first-page":"93","DOI":"10.1007\/978-94-017-2367-1_6","article-title":"Multimodality in language and speech systems\u2014from theory to design support tool","author":"Bernsen","year":"2002","journal-title":"Multimodality Lang. Speech Syst."},{"key":"10.1016\/j.engappai.2026.115233_b24","series-title":"Human-Computer Interaction: Design and Evaluation: 17th International Conference, HCI International 2015, Los Angeles, CA, USA, August 2-7, 2015, Proceedings, Part I 17","first-page":"143","article-title":"ISO 9241-11 revised: What have we learnt about usability since 1998?","author":"Bevan","year":"2015"},{"issue":"4","key":"10.1016\/j.engappai.2026.115233_b25","doi-asserted-by":"crossref","first-page":"839","DOI":"10.3390\/electronics12040839","article-title":"Speech emotion recognition based on multiple acoustic features and deep convolutional neural network","volume":"12","author":"Bhangale","year":"2023","journal-title":"Electronics"},{"issue":"18","key":"10.1016\/j.engappai.2026.115233_b26","doi-asserted-by":"crossref","first-page":"20886","DOI":"10.1109\/JSEN.2021.3100390","article-title":"Prediction of pulmonary diseases with electronic nose using SVM and xgboost","volume":"21","author":"Binson","year":"2021","journal-title":"IEEE Sens. J."},{"key":"10.1016\/j.engappai.2026.115233_b27","doi-asserted-by":"crossref","unstructured":"Bolt, R.A., 1980. \u201cPut-that-there\u201d Voice and gesture at the graphics interface. In: Proceedings of the 7th Annual Conference on Computer Graphics and Interactive Techniques. pp. 262\u2013270.","DOI":"10.1145\/800250.807503"},{"key":"10.1016\/j.engappai.2026.115233_b28","doi-asserted-by":"crossref","first-page":"2161","DOI":"10.1007\/s12161-015-0393-2","article-title":"Classification of honey according to geographical and botanical origins and detection of its adulteration using voltammetric electronic tongue","volume":"9","author":"Bougrini","year":"2016","journal-title":"Food Anal. Methods"},{"issue":"15","key":"10.1016\/j.engappai.2026.115233_b29","doi-asserted-by":"crossref","first-page":"5830","DOI":"10.3390\/s22155830","article-title":"Intelligent user interfaces and their evaluation: a systematic mapping study","volume":"22","author":"Brdnik","year":"2022","journal-title":"Sensors"},{"issue":"3","key":"10.1016\/j.engappai.2026.115233_b30","doi-asserted-by":"crossref","first-page":"493","DOI":"10.1007\/s10044-021-01001-y","article-title":"Multimodal temporal machine learning for bipolar disorder and depression recognition","volume":"25","author":"Ceccarelli","year":"2022","journal-title":"Pattern Anal. Appl."},{"key":"10.1016\/j.engappai.2026.115233_b31","doi-asserted-by":"crossref","unstructured":"Chai, J.Y., Hong, P., Zhou, M.X., 2004. A probabilistic approach to reference resolution in multimodal user interfaces. In: Proceedings of the 9th International Conference on Intelligent User Interfaces. pp. 70\u201377.","DOI":"10.1145\/964442.964457"},{"key":"10.1016\/j.engappai.2026.115233_b32","first-page":"68","article-title":"Natural language based multimodal interface for uav mission planning","volume":"vol. 61","author":"Chandarana","year":"2017"},{"key":"10.1016\/j.engappai.2026.115233_b33","first-page":"1","article-title":"Multimodal emotion recognition using contextualized audio information and ground transcripts on multiple datasets","author":"Chauhan","year":"2023","journal-title":"Arab. J. Sci. Eng."},{"key":"10.1016\/j.engappai.2026.115233_b34","series-title":"2015 IEEE International Conference on Image Processing","first-page":"168","article-title":"UTD-MHAD: A multimodal dataset for human action recognition utilizing a depth camera and a wearable inertial sensor","author":"Chen","year":"2015"},{"key":"10.1016\/j.engappai.2026.115233_b35","series-title":"Shikra: Unleashing multimodal llm\u2019s referential dialogue magic","author":"Chen","year":"2023"},{"key":"10.1016\/j.engappai.2026.115233_b36","series-title":"Towards comprehensive multimodal perception: Introducing the touch-language-vision dataset","author":"Cheng","year":"2024"},{"issue":"6","key":"10.1016\/j.engappai.2026.115233_b37","doi-asserted-by":"crossref","first-page":"429","DOI":"10.1038\/s41928-021-00585-x","article-title":"An artificial neural tactile sensing system","volume":"4","author":"Chun","year":"2021","journal-title":"Nat. Electron."},{"key":"10.1016\/j.engappai.2026.115233_b38","series-title":"CMU-MOSEI dataset \u2014 MultiComp","author":"CMU-MOSEI","year":"2024"},{"key":"10.1016\/j.engappai.2026.115233_b39","doi-asserted-by":"crossref","unstructured":"Cohen, P.R., Dalrymple, M., Moran, D.B., Pereira, F., Sullivan, J.W., 1989. Synergistic use of direct manipulation and natural language. In: Proceedings of the SIGCHI Conference on Human Factors in Computing Systems. pp. 227\u2013233.","DOI":"10.1145\/67449.67494"},{"issue":"4","key":"10.1016\/j.engappai.2026.115233_b40","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1145\/2735589","article-title":"Sketch-thru-plan: A multimodal interface for command and control","volume":"58","author":"Cohen","year":"2015","journal-title":"Commun. ACM"},{"key":"10.1016\/j.engappai.2026.115233_b41","doi-asserted-by":"crossref","unstructured":"Cutugno, F., Leano, V.A., Rinaldi, R., Mignini, G., 2012. Multimodal framework for mobile interaction. In: Proceedings of the International Working Conference on Advanced Visual Interfaces. pp. 197\u2013203.","DOI":"10.1145\/2254556.2254592"},{"issue":"14","key":"10.1016\/j.engappai.2026.115233_b42","doi-asserted-by":"crossref","first-page":"3936","DOI":"10.3390\/s20143936","article-title":"An intelligent and low-cost eye-tracking system for motorized wheelchair control","volume":"20","author":"Dahmani","year":"2020","journal-title":"Sensors"},{"key":"10.1016\/j.engappai.2026.115233_b43","series-title":"Nvlm: Open frontier-class multimodal llms","author":"Dai","year":"2024"},{"key":"10.1016\/j.engappai.2026.115233_b44","doi-asserted-by":"crossref","first-page":"60","DOI":"10.1016\/j.cviu.2019.01.008","article-title":"Heterogeneous hand gesture recognition using 3D dynamic skeletal data","volume":"181","author":"De Smedt","year":"2019","journal-title":"Comput. Vis. Image Underst."},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b45","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3549530","article-title":"The design and observed effects of robot-performed manual gestures: A systematic review","volume":"12","author":"De Wit","year":"2023","journal-title":"ACM Trans. Human-Robot Interact."},{"issue":"4","key":"10.1016\/j.engappai.2026.115233_b46","doi-asserted-by":"crossref","first-page":"10589","DOI":"10.1109\/LRA.2022.3191232","article-title":"A wearable smart glove and its application of pose and gesture detection to sign language classification","volume":"7","author":"DelPreto","year":"2022","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.engappai.2026.115233_b47","doi-asserted-by":"crossref","DOI":"10.1109\/ACCESS.2025.3538689","article-title":"Neural network based lower limb prostheses control using super twisting sliding mode control","author":"Demora","year":"2025","journal-title":"IEEE Access"},{"key":"10.1016\/j.engappai.2026.115233_b48","series-title":"Human-Computer Interaction","author":"Dix","year":"2003"},{"key":"10.1016\/j.engappai.2026.115233_b49","first-page":"1","article-title":"Dynamic hand gesture recognition based on signals from specialized data glove and deep learning algorithms","volume":"70","author":"Dong","year":"2021","journal-title":"IEEE Trans. Instrum. Meas."},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b50","doi-asserted-by":"crossref","first-page":"6","DOI":"10.3390\/mti9010006","article-title":"Multimodal interaction, interfaces, and communication: a survey","volume":"9","author":"Dritsas","year":"2025","journal-title":"Multimodal Technol. Interact."},{"key":"10.1016\/j.engappai.2026.115233_b51","series-title":"Multimodal polynomial fusion for detecting driver distraction","author":"Du","year":"2018"},{"key":"10.1016\/j.engappai.2026.115233_b52","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1007\/s12193-010-0043-3","article-title":"Description languages for multimodal interaction: a set of guidelines and its illustration with SMUIML","volume":"3","author":"Dumas","year":"2010","journal-title":"J. Multimodal User Interfaces"},{"key":"10.1016\/j.engappai.2026.115233_b53","series-title":"Human Machine Interaction: Research Results of the Mmi Program","first-page":"3","article-title":"Multimodal interfaces: A survey of principles, models and frameworks","author":"Dumas","year":"2009"},{"key":"10.1016\/j.engappai.2026.115233_b54","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2021.102756","article-title":"Academic stress detection on university students during COVID-19 outbreak by using an electronic nose and the galvanic skin response","volume":"68","author":"Dur\u00e1n Acevedo","year":"2021","journal-title":"Biomed. Signal Process. Control."},{"issue":"4","key":"10.1016\/j.engappai.2026.115233_b55","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1007\/s12193-013-0126-z","article-title":"Multimodal interaction: a survey from model driven engineering and mobile perspectives","volume":"7","author":"Elouali","year":"2013","journal-title":"J. Multimodal User Interfaces"},{"key":"10.1016\/j.engappai.2026.115233_b56","series-title":"Enterface\u201905: The SIMILAR NoE summer workshop on multimodal interfaces","author":"eNTERFACE","year":"2026"},{"key":"10.1016\/j.engappai.2026.115233_b57","first-page":"1","article-title":"Physiological computing: interfacing with the human nervous system","author":"Fairclough","year":"2011","journal-title":"Sens. Emot.: Impact Context. Exp. Meas."},{"issue":"3","key":"10.1016\/j.engappai.2026.115233_b58","doi-asserted-by":"crossref","first-page":"564","DOI":"10.1109\/TBCAS.2018.2810256","article-title":"A multimodal adaptive wireless control interface for people with upper-body disabilities","volume":"12","author":"Fall","year":"2018","journal-title":"IEEE Trans. Biomed. Circuits Syst."},{"issue":"2","key":"10.1016\/j.engappai.2026.115233_b59","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pone.0192684","article-title":"Electrooculography-based continuous eye-writing recognition system for efficient assistive communication systems","volume":"13","author":"Fang","year":"2018","journal-title":"PLoS One"},{"key":"10.1016\/j.engappai.2026.115233_b60","article-title":"A multitask electronic nose data processing model based on transformer encoder","author":"Feng","year":"2024","journal-title":"IEEE Sens. J."},{"key":"10.1016\/j.engappai.2026.115233_b61","series-title":"2020 International Conference on Omni-Layer Intelligent Systems","first-page":"1","article-title":"Human activity recognition: From sensors to applications","author":"Fereidoonian","year":"2020"},{"key":"10.1016\/j.engappai.2026.115233_b62","series-title":"Objectfolder: A dataset of objects with implicit visual, auditory, and tactile representations","author":"Gao","year":"2021"},{"issue":"18","key":"10.1016\/j.engappai.2026.115233_b63","doi-asserted-by":"crossref","first-page":"17421","DOI":"10.1109\/JSEN.2021.3059685","article-title":"Dynamic hand gesture recognition based on 3D hand pose estimation for human\u2013robot interaction","volume":"22","author":"Gao","year":"2021","journal-title":"IEEE Sens. J."},{"key":"10.1016\/j.engappai.2026.115233_b64","series-title":"HCI International 2021-Late Breaking Papers: Multimodality, EXtended Reality, and Artificial Intelligence: 23rd HCI International Conference, HCII 2021, Virtual Event, July 24\u201329, 2021, Proceedings 23","first-page":"3","article-title":"Towards effective odor diffusion with fuzzy logic in an olfactory interface for a serious game","author":"Garcia-Ruiz","year":"2021"},{"key":"10.1016\/j.engappai.2026.115233_b65","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1016\/j.ijpsycho.2016.10.013","article-title":"An efficient automatic workload estimation method based on electrodermal activity using pattern classifier combinations","volume":"110","author":"Ghaderyan","year":"2016","journal-title":"Int. J. Psychophysiol."},{"issue":"11","key":"10.1016\/j.engappai.2026.115233_b66","doi-asserted-by":"crossref","first-page":"419","DOI":"10.1016\/S1364-6613(99)01397-2","article-title":"The role of gesture in communication and thinking","volume":"3","author":"Goldin-Meadow","year":"1999","journal-title":"Trends Cogn. Sci."},{"key":"10.1016\/j.engappai.2026.115233_b67","series-title":"Google assistant, your own personal google","author":"GoogleAssistant","year":"2024"},{"key":"10.1016\/j.engappai.2026.115233_b68","doi-asserted-by":"crossref","first-page":"1347","DOI":"10.1007\/s11760-017-1092-9","article-title":"Discrimination between different emotional states based on the chaotic behavior of galvanic skin responses","volume":"11","author":"Goshvarpour","year":"2017","journal-title":"Signal, Image Video Process."},{"key":"10.1016\/j.engappai.2026.115233_b69","series-title":"International Conference on Human-Computer Interaction","first-page":"501","article-title":"Natural interaction with traffic control cameras through multimodal interfaces","author":"Grazioso","year":"2021"},{"key":"10.1016\/j.engappai.2026.115233_b70","series-title":"Grove - EMG detector \u2014 seeed studio wiki","author":"GroveEMG","year":"2024"},{"key":"10.1016\/j.engappai.2026.115233_b71","doi-asserted-by":"crossref","first-page":"5691","DOI":"10.1007\/s11042-016-3846-8","article-title":"Extending multimedia languages to support multimodal user interactions","volume":"76","author":"Guedes","year":"2017","journal-title":"Multimedia Tools Appl."},{"key":"10.1016\/j.engappai.2026.115233_b72","series-title":"Conformer: Convolution-augmented transformer for speech recognition","author":"Gulati","year":"2020"},{"key":"10.1016\/j.engappai.2026.115233_b73","series-title":"ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"5874","article-title":"Recent developments on espnet toolkit boosted by conformer","author":"Guo","year":"2021"},{"issue":"16","key":"10.1016\/j.engappai.2026.115233_b74","doi-asserted-by":"crossref","first-page":"6425","DOI":"10.1109\/JSEN.2016.2581023","article-title":"A continuous hand gestures recognition technique for human-machine interaction using accelerometer and gyroscope sensors","volume":"16","author":"Gupta","year":"2016","journal-title":"IEEE Sens. J."},{"issue":"4","key":"10.1016\/j.engappai.2026.115233_b75","doi-asserted-by":"crossref","first-page":"84","DOI":"10.3390\/mti4040084","article-title":"A human\u2013computer interface replacing mouse and keyboard for individuals with limited upper limb mobility","volume":"4","author":"G\u00fcr","year":"2020","journal-title":"Multimodal Technol. Interact."},{"issue":"24","key":"10.1016\/j.engappai.2026.115233_b76","doi-asserted-by":"crossref","first-page":"5408","DOI":"10.3390\/s19245408","article-title":"Evaluation of pattern recognition methods for head gesture-based interface of a virtual reality helmet equipped with a single IMU sensor","volume":"19","author":"Hachaj","year":"2019","journal-title":"Sensors"},{"issue":"12","key":"10.1016\/j.engappai.2026.115233_b77","doi-asserted-by":"crossref","first-page":"13075","DOI":"10.1007\/s11227-023-05168-5","article-title":"Textual emotion detection utilizing a transfer learning approach","volume":"79","author":"Hadikhah Mozhdehi","year":"2023","journal-title":"J. Supercomput."},{"issue":"6","key":"10.1016\/j.engappai.2026.115233_b78","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1109\/MSP.2019.2918706","article-title":"Speech processing for digital home assistants: Combining signal processing with deep-learning techniques","volume":"36","author":"Haeb-Umbach","year":"2019","journal-title":"IEEE Signal Process. Mag."},{"issue":"3","key":"10.1016\/j.engappai.2026.115233_b79","doi-asserted-by":"crossref","first-page":"478","DOI":"10.1109\/TPAMI.2009.30","article-title":"In the eye of the beholder: A survey of models for eyes and gaze","volume":"32","author":"Hansen","year":"2009","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.engappai.2026.115233_b80","series-title":"2018 IEEE Conference on Multimedia Information Processing and Retrieval","first-page":"196","article-title":"Self-attentive feature-level fusion for multimodal emotion detection","author":"Hazarika","year":"2018"},{"key":"10.1016\/j.engappai.2026.115233_b81","doi-asserted-by":"crossref","unstructured":"Heinrich, R., Zimmerer, C., Fischbach, M., Erich Latoschik, M., 2025. A Systematic Review of Fusion Methods for the User-Centered Design of Multimodal Interfaces. In: Proceedings of the 27th International Conference on Multimodal Interaction. pp. 485\u2013495.","DOI":"10.1145\/3716553.3750790"},{"issue":"7","key":"10.1016\/j.engappai.2026.115233_b82","doi-asserted-by":"crossref","first-page":"1485","DOI":"10.3390\/s17071485","article-title":"A novel wearable forehead EOG measurement system for human computer interfaces","volume":"17","author":"Heo","year":"2017","journal-title":"Sensors"},{"key":"10.1016\/j.engappai.2026.115233_b83","doi-asserted-by":"crossref","unstructured":"Herrera, N.S., McMahan, R.P., 2014. Development of a simple and low-cost olfactory display for immersive media experiences. In: Proceedings of the 2nd ACM International Workshop on Immersive Media Experiences. pp. 1\u20136.","DOI":"10.1145\/2660579.2660584"},{"issue":"6","key":"10.1016\/j.engappai.2026.115233_b84","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","article-title":"Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups","volume":"29","author":"Hinton","year":"2012","journal-title":"IEEE Signal Process. Mag."},{"key":"10.1016\/j.engappai.2026.115233_b85","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s42452-019-0771-2","article-title":"Supervised learning classifiers for arabic gestures recognition using kinect V2","volume":"1","author":"Hisham","year":"2019","journal-title":"SN Appl. Sci."},{"key":"10.1016\/j.engappai.2026.115233_b86","first-page":"1221","article-title":"Arabic sign language recognition using ada-boosting based on a leap motion controller","volume":"13","author":"Hisham","year":"2021","journal-title":"Int. J. Inf. Technol."},{"key":"10.1016\/j.engappai.2026.115233_b87","doi-asserted-by":"crossref","first-page":"853","DOI":"10.1613\/jair.3994","article-title":"Framing image description as a ranking task: Data, models and evaluation metrics","volume":"47","author":"Hodosh","year":"2013","journal-title":"J. Artificial Intelligence Res."},{"issue":"8","key":"10.1016\/j.engappai.2026.115233_b88","doi-asserted-by":"crossref","first-page":"639","DOI":"10.1016\/j.tics.2019.05.006","article-title":"Multimodal language processing in human communication","volume":"23","author":"Holler","year":"2019","journal-title":"Trends Cogn. Sci."},{"key":"10.1016\/j.engappai.2026.115233_b89","series-title":"2018 27th IEEE International Symposium on Robot and Human Interactive Communication","first-page":"320","article-title":"Exoten-glove: A force-feedback haptic glove based on twisted string actuation system","author":"Hosseini","year":"2018"},{"key":"10.1016\/j.engappai.2026.115233_b90","doi-asserted-by":"crossref","first-page":"3451","DOI":"10.1109\/TASLP.2021.3122291","article-title":"Hubert: Self-supervised speech representation learning by masked prediction of hidden units","volume":"29","author":"Hsu","year":"2021","journal-title":"IEEE\/ACM Trans. Audio, Speech, Lang. Process."},{"issue":"4","key":"10.1016\/j.engappai.2026.115233_b91","doi-asserted-by":"crossref","first-page":"1035","DOI":"10.1109\/TMM.2018.2866770","article-title":"Quatnet: Quaternion-based head pose estimation with multiregression loss","volume":"21","author":"Hsu","year":"2018","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.engappai.2026.115233_b92","doi-asserted-by":"crossref","unstructured":"Hu, Y.O., Tang, J., Gong, X., Zhou, Z., Zhang, S., Elvitigala, D.S., Mueller, F.F., Hu, W., Quigley, A.J., 2025. Vision-based multimodal interfaces: A survey and taxonomy for enhanced context-aware system design. In: Proceedings of the 2025 CHI Conference on Human Factors in Computing Systems. pp. 1\u201331.","DOI":"10.1145\/3706598.3714161"},{"key":"10.1016\/j.engappai.2026.115233_b93","series-title":"Voice transformer network: Sequence-to-sequence voice conversion using transformer with text-to-speech pretraining","author":"Huang","year":"2019"},{"key":"10.1016\/j.engappai.2026.115233_b94","doi-asserted-by":"crossref","DOI":"10.1016\/j.compag.2023.107985","article-title":"Deep learning with tactile sequences enables fruit recognition and force prediction for damage-free grasping","volume":"211","author":"Huang","year":"2023","journal-title":"Comput. Electron. Agric."},{"issue":"12","key":"10.1016\/j.engappai.2026.115233_b95","doi-asserted-by":"crossref","first-page":"4528","DOI":"10.1109\/JSEN.2019.2898891","article-title":"Activity-aware fall detection and recognition based on wearable sensors","volume":"19","author":"Hussain","year":"2019","journal-title":"IEEE Sens. J."},{"key":"10.1016\/j.engappai.2026.115233_b96","series-title":"Advanced Computer and Communication Engineering Technology: Proceedings of ICOCOE 2015","first-page":"567","article-title":"Analysis of electromyography (EMG) signal for human arm muscle: A review","author":"Ibrahim","year":"2016"},{"key":"10.1016\/j.engappai.2026.115233_b97","series-title":"Mechanoreceptors","author":"Iheanacho","year":"2019"},{"key":"10.1016\/j.engappai.2026.115233_b98","first-page":"151","article-title":"Eye movement-based human-computer interaction techniques: Toward non-command interfaces","volume":"4","author":"Jacob","year":"1993","journal-title":"Adv. Human-Computer Interact."},{"issue":"1\u20132","key":"10.1016\/j.engappai.2026.115233_b99","doi-asserted-by":"crossref","first-page":"116","DOI":"10.1016\/j.cviu.2006.10.019","article-title":"Multimodal human\u2013computer interaction: A survey","volume":"108","author":"Jaimes","year":"2007","journal-title":"Comput. Vis. Image Underst."},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b100","doi-asserted-by":"crossref","first-page":"623","DOI":"10.3390\/s110100623","article-title":"Response identification in the extremely low frequency region of an electret condenser microphone","volume":"11","author":"Jeng","year":"2011","journal-title":"Sensors"},{"issue":"4","key":"10.1016\/j.engappai.2026.115233_b101","doi-asserted-by":"crossref","DOI":"10.1007\/s11818-007-0311-y","article-title":"Monitoring eye and eyelid movements by infrared reflectance oculography to measure drowsiness in drivers","volume":"11","author":"Johns","year":"2007","journal-title":"Somnologie"},{"key":"10.1016\/j.engappai.2026.115233_b102","doi-asserted-by":"crossref","unstructured":"Johnston, M., Bangalore, S., Vasireddy, G., Stent, A., Ehlen, P., Walker, M., Whittaker, S., Maloor, P., 2002. MATCH: An architecture for multimodal dialogue systems. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. pp. 376\u2013383.","DOI":"10.3115\/1073083.1073146"},{"key":"10.1016\/j.engappai.2026.115233_b103","doi-asserted-by":"crossref","unstructured":"Johnston, M., Cohen, P.R., McGee, D., Oviatt, S., Pittman, J.A., Smith, I., 1997. Unification-based multimodal integration. In: 35th Annual Meeting of the Association for Computational Linguistics and 8th Conference of the European Chapter of the Association for Computational Linguistics. pp. 281\u2013288.","DOI":"10.3115\/976909.979653"},{"key":"10.1016\/j.engappai.2026.115233_b104","series-title":"Map-Based Mobile Services: Design, Interaction and Usability","first-page":"168","article-title":"User interaction in mobile navigation applications","author":"Jokinen","year":"2008"},{"key":"10.1016\/j.engappai.2026.115233_b105","series-title":"Cyber-Physical Systems and Supporting Technologies for Industrial Automation","first-page":"245","article-title":"Interfacing of actuators and sensors","author":"Kandasamy","year":"2023"},{"key":"10.1016\/j.engappai.2026.115233_b106","doi-asserted-by":"crossref","first-page":"546","DOI":"10.1134\/S1054661809030225","article-title":"Information enquiry kiosk with multimodal user interface","volume":"19","author":"Karpov","year":"2009","journal-title":"Pattern Recognit. Image Anal."},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b107","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1134\/S1019331618010094","article-title":"Multimodal interfaces of human\u2013computer interaction","volume":"88","author":"Karpov","year":"2018","journal-title":"Her. Russ. Acad. Sci."},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b108","first-page":"137","article-title":"Human-computer interaction: Overview on state of the art","volume":"1","author":"Karray","year":"2008","journal-title":"Int. J. Smart Sens. Intell. Syst."},{"key":"10.1016\/j.engappai.2026.115233_b109","first-page":"225","article-title":"Guidelines for the eye tracker calibration using points of regard","volume":"vol. 4","author":"Kasprowski","year":"2014"},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b110","doi-asserted-by":"crossref","first-page":"109","DOI":"10.1146\/annurev.anthro.26.1.109","article-title":"Gesture","volume":"26","author":"Kendon","year":"1997","journal-title":"Annu. Rev. Anthr."},{"key":"10.1016\/j.engappai.2026.115233_b111","doi-asserted-by":"crossref","unstructured":"Khamis, M., Oechsner, C., Alt, F., Bulling, A., 2018. VRpursuits: Interaction in virtual reality using smooth pursuit eye movements. In: Proceedings of the 2018 International Conference on Advanced Visual Interfaces. pp. 1\u20138.","DOI":"10.1145\/3206505.3206522"},{"key":"10.1016\/j.engappai.2026.115233_b112","doi-asserted-by":"crossref","unstructured":"Khamis, M., Saltuk, O., Hang, A., Stolz, K., Bulling, A., Alt, F., 2016. TextPursuits: using text for pursuits-based interaction and calibration on public displays. In: Proceedings of the 2016 ACM International Joint Conference on Pervasive and Ubiquitous Computing. pp. 274\u2013285.","DOI":"10.1145\/2971648.2971679"},{"key":"10.1016\/j.engappai.2026.115233_b113","doi-asserted-by":"crossref","first-page":"38","DOI":"10.1016\/j.displa.2018.08.001","article-title":"Deepgesture: Deep learning-based gesture recognition scheme using motion sensors","volume":"55","author":"Kim","year":"2018","journal-title":"Displays"},{"issue":"7","key":"10.1016\/j.engappai.2026.115233_b114","article-title":"Tactile avatar: Tactile sensing system mimicking human tactile cognition","volume":"8","author":"Kim","year":"2021","journal-title":"Adv. Sci."},{"issue":"39","key":"10.1016\/j.engappai.2026.115233_b115","article-title":"Skin electronics: next-generation device platform for virtual and augmented reality","volume":"31","author":"Kim","year":"2021","journal-title":"Adv. Funct. Mater."},{"key":"10.1016\/j.engappai.2026.115233_b116","series-title":"Soft Computing: Biomedical and Related Applications","first-page":"59","article-title":"End-to-end hand rehabilitation system with single-shot gesture classification for stroke patients","author":"Koh","year":"2021"},{"key":"10.1016\/j.engappai.2026.115233_b117","doi-asserted-by":"crossref","unstructured":"Koldijk, S., Sappelli, M., Verberne, S., Neerincx, M.A., Kraaij, W., 2014. The swell knowledge work dataset for stress and user modeling research. In: Proceedings of the 16th International Conference on Multimodal Interaction. pp. 291\u2013298.","DOI":"10.1145\/2663204.2663257"},{"key":"10.1016\/j.engappai.2026.115233_b118","series-title":"2021 9th International Conference on Affective Computing and Intelligent Interaction","first-page":"1","article-title":"Multimodal human-agent dialogue corpus with annotations at utterance and dialogue levels","author":"Komatani","year":"2021"},{"issue":"6","key":"10.1016\/j.engappai.2026.115233_b119","doi-asserted-by":"crossref","DOI":"10.1002\/aisy.202270027","article-title":"Bioinspired co-design of tactile sensor and deep learning algorithm for human\u2013robot interaction","volume":"4","author":"Kong","year":"2022","journal-title":"Adv. Intell. Syst."},{"key":"10.1016\/j.engappai.2026.115233_b120","series-title":"2018-3DTV-Conference: The True Vision-Capture, Transmission and Display of 3D Video","first-page":"1","article-title":"Sign language recognition based on hand and body skeletal data","author":"Konstantinidis","year":"2018"},{"key":"10.1016\/j.engappai.2026.115233_b121","series-title":"Multimodal continuation-style architectures for human-robot interaction","author":"Krishnaswamy","year":"2019"},{"key":"10.1016\/j.engappai.2026.115233_b122","doi-asserted-by":"crossref","DOI":"10.1016\/j.sna.2022.113887","article-title":"Recent development and futuristic applications of MEMS based piezoelectric microphones","author":"Kumar","year":"2022","journal-title":"Sensors Actuators A: Phys."},{"key":"10.1016\/j.engappai.2026.115233_b123","doi-asserted-by":"crossref","first-page":"298","DOI":"10.1016\/j.bios.2016.12.001","article-title":"Increasing trend of wearables and multimodal interface for human activity monitoring: A review","volume":"90","author":"Kumari","year":"2017","journal-title":"Biosens. Bioelectron."},{"key":"10.1016\/j.engappai.2026.115233_b124","doi-asserted-by":"crossref","unstructured":"Lai, M.K., 2015. Universal scent blackbox: Engaging visitors communication through creating olfactory experience at art museum. In: Proceedings of the 33rd Annual International Conference on the Design of Communication. pp. 1\u20136.","DOI":"10.1145\/2775441.2775483"},{"key":"10.1016\/j.engappai.2026.115233_b125","doi-asserted-by":"crossref","first-page":"752","DOI":"10.3758\/BF03194970","article-title":"The influence of head contour and nose angle on the perception of eye-gaze direction","volume":"66","author":"Langton","year":"2004","journal-title":"Percept. Psychophys."},{"issue":"3","key":"10.1016\/j.engappai.2026.115233_b126","doi-asserted-by":"crossref","first-page":"1192","DOI":"10.1109\/SURV.2012.110112.00192","article-title":"A survey on human activity recognition using wearable sensors","volume":"15","author":"Lara","year":"2012","journal-title":"IEEE Commun. Surv. Tutor."},{"issue":"7","key":"10.1016\/j.engappai.2026.115233_b127","doi-asserted-by":"crossref","first-page":"2381","DOI":"10.3390\/s21072381","article-title":"Driving stress detection using multimodal convolutional neural networks with nonlinear representation of short-term physiological signals","volume":"21","author":"Lee","year":"2021","journal-title":"Sensors"},{"key":"10.1016\/j.engappai.2026.115233_b128","article-title":"Yogurt classification using an electronic tongue system and machine learning techniques","volume":"16","author":"Leon-Medina","year":"2022","journal-title":"Intell. Syst. Appl."},{"key":"10.1016\/j.engappai.2026.115233_b129","doi-asserted-by":"crossref","DOI":"10.1109\/TIM.2023.3326241","article-title":"VITO-transformer: a visual-tactile fusion network for object recognition","author":"Li","year":"2023","journal-title":"IEEE Trans. Instrum. Meas."},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b130","doi-asserted-by":"crossref","first-page":"80","DOI":"10.1016\/j.patcog.2013.05.028","article-title":"HEGM: A hierarchical elastic graph matching for hand gesture recognition","volume":"47","author":"Li","year":"2014","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.engappai.2026.115233_b131","doi-asserted-by":"crossref","first-page":"647","DOI":"10.1007\/s42235-019-0052-1","article-title":"Identification of gesture based on combination of raw sEMG and sEMG envelope using supervised learning and univariate feature selection","volume":"16","author":"Liang","year":"2019","journal-title":"J. Bionic Eng."},{"key":"10.1016\/j.engappai.2026.115233_b132","series-title":"2019 IEEE\/CVF International Conference on Computer Vision Workshop","first-page":"1140","article-title":"Learning to personalize in appearance-based gaze tracking","author":"Linden","year":"2019"},{"key":"10.1016\/j.engappai.2026.115233_b133","doi-asserted-by":"crossref","unstructured":"Ling, X., Wang, D., Yang, J., 2021. A new head pose estimation method using vision transformer model. In: Proceedings of the 2021 7th International Conference on Computing and Artificial Intelligence. pp. 154\u2013159.","DOI":"10.1145\/3467707.3467729"},{"key":"10.1016\/j.engappai.2026.115233_b134","series-title":"2016 IEEE Virtual Reality","first-page":"223","article-title":"An intelligent multimodal mixed reality real-time strategy game","author":"Link","year":"2016"},{"key":"10.1016\/j.engappai.2026.115233_b135","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2024.106686","article-title":"Integration of convolutional neural network and vision transformer for gesture recognition using sEMG","volume":"98","author":"Liu","year":"2024","journal-title":"Biomed. Signal Process. Control."},{"issue":"5","key":"10.1016\/j.engappai.2026.115233_b136","article-title":"A fused convolutional transformer for voltammetric electronic tongue analysis tasks","volume":"12","author":"Liu","year":"2024","journal-title":"J. Environ. Chem. Eng."},{"key":"10.1016\/j.engappai.2026.115233_b137","doi-asserted-by":"crossref","first-page":"1309","DOI":"10.1007\/s40815-018-0449-8","article-title":"Electronic nose-based odor classification using genetic algorithms and fuzzy support vector machines","volume":"20","author":"Liu","year":"2018","journal-title":"Int. J. Fuzzy Syst."},{"key":"10.1016\/j.engappai.2026.115233_b138","series-title":"The ryerson audio-visual database of emotional speech and song (RAVDESS)","author":"Livingstone","year":"2018"},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b139","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1109\/MPUL.2015.2498474","article-title":"Transforming health care: Body sensor networks, wearables, and the internet of things","volume":"7","author":"Lo","year":"2016","journal-title":"IEEE Pulse"},{"key":"10.1016\/j.engappai.2026.115233_b140","doi-asserted-by":"crossref","first-page":"444","DOI":"10.3389\/fneur.2019.00444","article-title":"Hands-free human-computer interface based on facial myoelectric pattern recognition","volume":"10","author":"Lu","year":"2019","journal-title":"Front. Neurol."},{"issue":"22","key":"10.1016\/j.engappai.2026.115233_b141","doi-asserted-by":"crossref","first-page":"7665","DOI":"10.3390\/s21227665","article-title":"Multimodal emotion recognition on RAVDESS dataset using transfer learning","volume":"21","author":"Luna-Jim\u00e9nez","year":"2021","journal-title":"Sensors"},{"key":"10.1016\/j.engappai.2026.115233_b142","series-title":"Human-Computer Interaction: An Empirical Research Perspective","author":"MacKenzie","year":"2024"},{"key":"10.1016\/j.engappai.2026.115233_b143","article-title":"Enhanced trajectory control of quadrotor UAV using fuzzy PID based recurrent neural network controller","author":"Madebo","year":"2024","journal-title":"IEEE Access"},{"key":"10.1016\/j.engappai.2026.115233_b144","doi-asserted-by":"crossref","first-page":"36183","DOI":"10.1109\/ACCESS.2024.3374894","article-title":"Robust tracking control for quadrotor UAV with external disturbances and uncertainties using neural network based MRAC","volume":"12","author":"Madebo","year":"2024","journal-title":"IEEE Access"},{"key":"10.1016\/j.engappai.2026.115233_b145","series-title":"2020 10th Annual Computing and Communication Workshop and Conference","first-page":"0768","article-title":"Interface for human machine interaction for assistant devices: A review","author":"Mahmud","year":"2020"},{"key":"10.1016\/j.engappai.2026.115233_b146","series-title":"Advances in Physiological Computing","first-page":"39","article-title":"Eye tracking and eye-based human\u2013computer interaction","author":"Majaranta","year":"2014"},{"key":"10.1016\/j.engappai.2026.115233_b147","series-title":"Advanced Concepts for Intelligent Vision Systems: 17th International Conference, ACIVS 2016, Lecce, Italy, October 24-27, 2016, Proceedings 17","first-page":"47","article-title":"Hand gesture recognition using infrared imagery provided by leap motion controller","author":"Mantec\u00f3n","year":"2016"},{"key":"10.1016\/j.engappai.2026.115233_b148","series-title":"2019 8th International Conference on Affective Computing and Intelligent Interaction","first-page":"545","article-title":"Generating robotic emotional body language with variational autoencoders","author":"Marmpena","year":"2019"},{"issue":"10","key":"10.1016\/j.engappai.2026.115233_b149","doi-asserted-by":"crossref","first-page":"782","DOI":"10.1016\/j.specom.2008.04.010","article-title":"Extraction and representation of prosodic features for language and speaker recognition","volume":"50","author":"Mary","year":"2008","journal-title":"Speech Commun."},{"key":"10.1016\/j.engappai.2026.115233_b150","series-title":"Generating natural motion in an android by mapping human motion","first-page":"57","author":"Matsui","year":"2018"},{"issue":"4","key":"10.1016\/j.engappai.2026.115233_b151","doi-asserted-by":"crossref","first-page":"737","DOI":"10.1016\/j.neuron.2014.05.001","article-title":"Discriminative and affective touch: sensing and feeling","volume":"82","author":"McGlone","year":"2014","journal-title":"Neuron"},{"key":"10.1016\/j.engappai.2026.115233_b152","series-title":"Hand and Mind: What Gestures Reveal about Thought","author":"McNeill","year":"1992"},{"key":"10.1016\/j.engappai.2026.115233_b153","series-title":"EMBEC & NBC 2017: Joint Conference of the European Medical and Biological Engineering Conference (EMBEC) and the Nordic-Baltic Conference on Biomedical Engineering and Medical Physics (NBC), Tampere, Finland, June 2017","first-page":"587","article-title":"Human activity recognition using a single optical heart rate monitoring wristband equipped with triaxial accelerometer","author":"Mehrang","year":"2017"},{"key":"10.1016\/j.engappai.2026.115233_b154","doi-asserted-by":"crossref","first-page":"4703","DOI":"10.1109\/ACCESS.2023.3235368","article-title":"Dynamic hand gesture recognition using multi-branch attention based graph and general deep learning model","volume":"11","author":"Miah","year":"2023","journal-title":"IEEE Access"},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b155","doi-asserted-by":"crossref","first-page":"21","DOI":"10.3724\/SP.J.2096-5796.2018.0010","article-title":"Data fusion methods in multimodal human computer dialog","volume":"1","author":"Ming-Hao","year":"2019","journal-title":"Virtual Real. Intell. Hardw."},{"key":"10.1016\/j.engappai.2026.115233_b156","series-title":"A new GNG graph-based hand gesture recognition approach","author":"Mirehi","year":"2019"},{"key":"10.1016\/j.engappai.2026.115233_b157","first-page":"1359","article-title":"M3er: Multiplicative multimodal emotion recognition using facial, textual, and speech cues","volume":"vol. 34","author":"Mittal","year":"2020"},{"key":"10.1016\/j.engappai.2026.115233_b158","doi-asserted-by":"crossref","unstructured":"Miyashita, H., 2020. Taste display that reproduces tastes measured by a taste sensor. In: Proceedings of the 33rd Annual ACM Symposium on User Interface Software and Technology. pp. 1085\u20131093.","DOI":"10.1145\/3379337.3415852"},{"key":"10.1016\/j.engappai.2026.115233_b159","series-title":"17.2 Somatosensation","author":"Molnar","year":"2022"},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b160","doi-asserted-by":"crossref","first-page":"11000","DOI":"10.1038\/s41598-023-36490-w","article-title":"Transformer-based hand gesture recognition from instantaneous to fused neural decomposition of high-density EMG signals","volume":"13","author":"Montazerin","year":"2023","journal-title":"Sci. Rep."},{"issue":"8","key":"10.1016\/j.engappai.2026.115233_b161","first-page":"807","article-title":"Human-computer multimodal interface to internet navigation","volume":"16","author":"Mosquera-DeLaCruz","year":"2021","journal-title":"Disabil. Rehabil.: Assist. Technol."},{"key":"10.1016\/j.engappai.2026.115233_b162","series-title":"MPU 6050 - InvenSense TDK - toradex developer","author":"MPU 6050","year":"2023"},{"issue":"6","key":"10.1016\/j.engappai.2026.115233_b163","doi-asserted-by":"crossref","first-page":"2944","DOI":"10.3390\/s23062944","article-title":"An electro-oculogram (EOG) sensor\u2019s ability to detect driver hypovigilance using machine learning","volume":"23","author":"Murugan","year":"2023","journal-title":"Sensors"},{"key":"10.1016\/j.engappai.2026.115233_b164","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1007\/s11760-012-0421-2","article-title":"3D head pose estimation and camera mouse implementation using a monocular video camera","volume":"9","author":"Nabati","year":"2015","journal-title":"Signal, Image Video Process."},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b165","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1145\/49103.1046407","article-title":"Intelligent multi-media interface technology","volume":"20","author":"Neal","year":"1988","journal-title":"ACM SIGCHI Bull."},{"key":"10.1016\/j.engappai.2026.115233_b166","series-title":"2013 IEEE International Conference on Robotics and Automation","first-page":"4384","article-title":"Autonomous robotic palpation: Machine learning techniques to identify hard inclusions in soft tissues","author":"Nichols","year":"2013"},{"issue":"3","key":"10.1016\/j.engappai.2026.115233_b167","doi-asserted-by":"crossref","DOI":"10.1177\/20416695211023953","article-title":"A method for computerized olfactory assessment and training outside of laboratory or clinical settings","volume":"12","author":"Niedenthal","year":"2021","journal-title":"I-Perception"},{"key":"10.1016\/j.engappai.2026.115233_b168","series-title":"Ten usability heuristics","author":"Nielsen","year":"2005"},{"key":"10.1016\/j.engappai.2026.115233_b169","doi-asserted-by":"crossref","unstructured":"Nigay, L., Coutaz, J., 1993. A design space for multimodal systems: concurrent processing and data fusion. In: Proceedings of the INTERACT\u201993 and CHI\u201993 Conference on Human Factors in Computing Systems. pp. 172\u2013178.","DOI":"10.1145\/169059.169143"},{"key":"10.1016\/j.engappai.2026.115233_b170","doi-asserted-by":"crossref","DOI":"10.3389\/fnhum.2024.1391531","article-title":"Biomimetic learning of hand gestures in a humanoid robot","volume":"18","author":"Olikkal","year":"2024","journal-title":"Front. Hum. Neurosci."},{"key":"10.1016\/j.engappai.2026.115233_b171","doi-asserted-by":"crossref","first-page":"153","DOI":"10.1007\/s12193-015-0176-5","article-title":"Multimodal interaction with virtual worlds XMMVR: extensible language for MultiModal interaction with virtual reality worlds","volume":"9","author":"Olmedo","year":"2015","journal-title":"J. Multimodal User Interfaces"},{"key":"10.1016\/j.engappai.2026.115233_b172","doi-asserted-by":"crossref","DOI":"10.1016\/j.compeleceng.2020.106943","article-title":"Multimodal speech recognition for unmanned aerial vehicles","volume":"90","author":"Onea\u0163\u0103","year":"2021","journal-title":"Comput. Electr. Eng."},{"key":"10.1016\/j.engappai.2026.115233_b173","series-title":"The Anthropology of Language: An Introduction to Linguistic Anthropology","author":"Ottenheimer","year":"2018"},{"key":"10.1016\/j.engappai.2026.115233_b174","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1007\/s10209-002-0041-7","article-title":"Flexible and robust multimodal interfaces for universal access","volume":"2","author":"Oviatt","year":"2003","journal-title":"Univers. Access Inf. Soc."},{"key":"10.1016\/j.engappai.2026.115233_b175","first-page":"439","article-title":"Multimodal interfaces","author":"Oviatt","year":"2007","journal-title":"Human-Computer Interact. Handb."},{"key":"10.1016\/j.engappai.2026.115233_b176","doi-asserted-by":"crossref","unstructured":"\u00d6zdel, S., Rong, Y., Albaba, B.M., Kuo, Y.L., Wang, X., Kasneci, E., 2024. A Transformer-Based Model for the Prediction of Human Gaze Behavior on Videos. In: Proceedings of the 2024 Symposium on Eye Tracking Research and Applications. pp. 1\u20136.","DOI":"10.1145\/3649902.3653439"},{"issue":"17","key":"10.1016\/j.engappai.2026.115233_b177","doi-asserted-by":"crossref","first-page":"4780","DOI":"10.3390\/s20174780","article-title":"Tacsac: A wearable haptic device with capacitive touch-sensing capability for tactile display","volume":"20","author":"Ozioko","year":"2020","journal-title":"Sensors"},{"key":"10.1016\/j.engappai.2026.115233_b178","doi-asserted-by":"crossref","DOI":"10.1016\/j.ijsu.2021.105906","article-title":"The PRISMA 2020 statement: an updated guideline for reporting systematic reviews","volume":"88","author":"Page","year":"2021","journal-title":"Int. J. Surg."},{"issue":"4","key":"10.1016\/j.engappai.2026.115233_b179","doi-asserted-by":"crossref","first-page":"535","DOI":"10.3390\/electronics11040535","article-title":"Design and development of an assistive system based on eye tracking","volume":"11","author":"Paing","year":"2022","journal-title":"Electronics"},{"key":"10.1016\/j.engappai.2026.115233_b180","article-title":"Overview electrotactile feedback for enhancing human computer interface","volume":"1007","author":"Pamungkas","year":"2018","journal-title":"J. Phys.: Conf. Ser."},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b181","first-page":"1261","article-title":"A hybrid of deep CNN and bidirectional LSTM for automatic speech recognition","volume":"29","author":"Passricha","year":"2019","journal-title":"J. Intell. Syst."},{"key":"10.1016\/j.engappai.2026.115233_b182","series-title":"Experimental Robotics: The 17th International Symposium","first-page":"105","article-title":"Digger finger: Gelsight tactile sensor for object identification inside granular media","author":"Patel","year":"2021"},{"key":"10.1016\/j.engappai.2026.115233_b183","series-title":"Advances in Modern Sensors","first-page":"1","article-title":"Introduction to sensors","author":"Patel","year":"2020"},{"issue":"7","key":"10.1016\/j.engappai.2026.115233_b184","doi-asserted-by":"crossref","first-page":"677","DOI":"10.1109\/34.598226","article-title":"Visual interpretation of hand gestures for human-computer interaction: A review","volume":"19","author":"Pavlovic","year":"1997","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.engappai.2026.115233_b185","doi-asserted-by":"crossref","unstructured":"Peiris, R.L., Feng, Y.L., Chan, L., Minamizawa, K., 2019. Thermalbracelet: Exploring thermal haptic feedback around the wrist. In: Proceedings of the 2019 CHI Conference on Human Factors in Computing Systems. pp. 1\u201311.","DOI":"10.1145\/3290605.3300400"},{"key":"10.1016\/j.engappai.2026.115233_b186","doi-asserted-by":"crossref","unstructured":"Perakakis, M., Potamianos, A., 2008. Multimodal system evaluation using modality efficiency and synergy metrics. In: Proceedings of the 10th International Conference on Multimodal Interfaces. pp. 9\u201316.","DOI":"10.1145\/1452392.1452397"},{"key":"10.1016\/j.engappai.2026.115233_b187","series-title":"Piezoelectric film sensor (picoleaf\u2122) \u2014 Murata Manufacturing Co., Ltd.","author":"Picoleaf","year":"2024"},{"key":"10.1016\/j.engappai.2026.115233_b188","doi-asserted-by":"crossref","DOI":"10.3389\/fneur.2022.917413","article-title":"Comparison of EOG and VOG obtained eye movements during horizontal head impulse testing","volume":"13","author":"Pleshkov","year":"2022","journal-title":"Front. Neurol."},{"issue":"18","key":"10.1016\/j.engappai.2026.115233_b189","doi-asserted-by":"crossref","first-page":"6024","DOI":"10.3390\/s21186024","article-title":"Tactile object recognition for humanoid robots using new designed piezoresistive tactile sensor and dcnn","volume":"21","author":"Pohtongkam","year":"2021","journal-title":"Sensors"},{"key":"10.1016\/j.engappai.2026.115233_b190","doi-asserted-by":"crossref","first-page":"104","DOI":"10.1016\/j.neunet.2014.10.005","article-title":"Towards an intelligent framework for multimodal affective data analysis","volume":"63","author":"Poria","year":"2015","journal-title":"Neural Netw."},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b191","doi-asserted-by":"crossref","first-page":"101","DOI":"10.1007\/s12193-019-00316-9","article-title":"Interactive gaze and finger controlled HUD for cars","volume":"14","author":"Prabhakar","year":"2020","journal-title":"J. Multimodal User Interfaces"},{"key":"10.1016\/j.engappai.2026.115233_b192","series-title":"Pressure profile systems\u00ae \u2014 tactile pressure mapping systems","author":"Pressure Profile","year":"2024"},{"key":"10.1016\/j.engappai.2026.115233_b193","series-title":"2022 International Conference on Communication, Computing and Internet of Things","first-page":"1","article-title":"Temporal and spectral features based gender recognition from audio signals","author":"Priya","year":"2022"},{"key":"10.1016\/j.engappai.2026.115233_b194","series-title":"Core - hardware - pupil labs docs","author":"Pupil Labs","year":"2024"},{"issue":"3","key":"10.1016\/j.engappai.2026.115233_b195","doi-asserted-by":"crossref","first-page":"373","DOI":"10.1007\/s11370-023-00466-6","article-title":"A multimodal domestic service robot interaction system for people with declined abilities to express themselves","volume":"16","author":"Qin","year":"2023","journal-title":"Intell. Serv. Robot."},{"issue":"31","key":"10.1016\/j.engappai.2026.115233_b196","doi-asserted-by":"crossref","first-page":"eabq2521","DOI":"10.1126\/sciadv.abq2521","article-title":"Artificial tactile perception smart finger for material identification based on triboelectric sensing","volume":"8","author":"Qu","year":"2022","journal-title":"Sci. Adv."},{"key":"10.1016\/j.engappai.2026.115233_b197","series-title":"International Conference on Machine Learning","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","author":"Radford","year":"2023"},{"key":"10.1016\/j.engappai.2026.115233_b198","doi-asserted-by":"crossref","first-page":"1817","DOI":"10.1109\/TNSRE.2022.3187472","article-title":"Touchless head-control (THC): Head gesture recognition for cursor and orientation control","volume":"30","author":"Rahmaniar","year":"2022","journal-title":"IEEE Trans. Neural Syst. Rehabil. Eng."},{"key":"10.1016\/j.engappai.2026.115233_b199","series-title":"Multimodal Human-Computer Interaction: a Constructive and Empirical Study","author":"Raisamo","year":"1999"},{"key":"10.1016\/j.engappai.2026.115233_b200","doi-asserted-by":"crossref","unstructured":"Ranasinghe, N., Cheok, A.D., Nakatsu, R., 2012. Taste\/IP: The sensation of taste for digital communication. In: Proceedings of the 14th ACM International Conference on Multimodal Interaction. pp. 409\u2013416.","DOI":"10.1145\/2388676.2388768"},{"key":"10.1016\/j.engappai.2026.115233_b201","doi-asserted-by":"crossref","unstructured":"Ranasinghe, N., Lee, K.Y., Suthokumar, G., Do, E.Y.L., 2014. The sensation of taste in the future of immersive media. In: Proceedings of the 2nd ACM International Workshop on Immersive Media Experiences. pp. 7\u201312.","DOI":"10.1145\/2660579.2660586"},{"issue":"3","key":"10.1016\/j.engappai.2026.115233_b202","doi-asserted-by":"crossref","first-page":"768","DOI":"10.3390\/s21030768","article-title":"Activity recognition for ambient assisted living with videos, inertial units and ambient sensors","volume":"21","author":"Ranieri","year":"2021","journal-title":"Sensors"},{"key":"10.1016\/j.engappai.2026.115233_b203","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s10462-012-9356-9","article-title":"Vision based hand gesture recognition for human computer interaction: a survey","volume":"43","author":"Rautaray","year":"2015","journal-title":"Artif. Intell. Rev."},{"key":"10.1016\/j.engappai.2026.115233_b204","series-title":"SpeechBrain: A general-purpose speech toolkit","author":"Ravanelli","year":"2021"},{"key":"10.1016\/j.engappai.2026.115233_b205","doi-asserted-by":"crossref","unstructured":"Robinson, F.A., 2020. Audio cells: a spatial audio prototyping environment for Human-Robot Interaction. In: Proceedings of the Fourteenth International Conference on Tangible, Embedded, and Embodied Interaction. pp. 955\u2013960.","DOI":"10.1145\/3374920.3374999"},{"key":"10.1016\/j.engappai.2026.115233_b206","series-title":"Intelligent Technologies for Interactive Entertainment: 6th International Conference, INTETAIN 2014, Chicago, IL, USA, July 9-11, 2014. Proceedings 6","first-page":"67","article-title":"Head pose estimation by perspective-n-point solution based on 2d markerless face tracking","author":"Rocca","year":"2014"},{"key":"10.1016\/j.engappai.2026.115233_b207","doi-asserted-by":"crossref","first-page":"81","DOI":"10.3389\/fbioe.2016.00081","article-title":"Electronic noses and tongues in wine industry","volume":"4","author":"Rodr\u00edguez-M\u00e9ndez","year":"2016","journal-title":"Front. Bioeng. Biotechnol."},{"key":"10.1016\/j.engappai.2026.115233_b208","doi-asserted-by":"crossref","DOI":"10.3389\/fnhum.2022.867377","article-title":"Development of a sensing platform based on hands-free interfaces for controlling electronic devices","volume":"16","author":"Rojas","year":"2022","journal-title":"Front. Hum. Neurosci."},{"issue":"2","key":"10.1016\/j.engappai.2026.115233_b209","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1109\/TOH.2013.2296312","article-title":"Effects of kinesthetic and cutaneous stimulation during the learning of a viscous force field","volume":"7","author":"Rosati","year":"2014","journal-title":"IEEE Trans. Haptics"},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b210","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1080\/07370024.2013.870385","article-title":"Controlling a smartphone using gaze gestures as the input mechanism","volume":"30","author":"Rozado","year":"2015","journal-title":"Human\u2013Computer Interact."},{"issue":"12","key":"10.1016\/j.engappai.2026.115233_b211","doi-asserted-by":"crossref","first-page":"2093","DOI":"10.3390\/electronics9122093","article-title":"A multimodal user interface for an assistive robotic shopping cart","volume":"9","author":"Ryumin","year":"2020","journal-title":"Electronics"},{"issue":"9","key":"10.1016\/j.engappai.2026.115233_b212","doi-asserted-by":"crossref","first-page":"20945","DOI":"10.3390\/s150920945","article-title":"Head pose estimation on top of haar-like face detection: A study using the kinect sensor","volume":"15","author":"Saeed","year":"2015","journal-title":"Sensors"},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b213","doi-asserted-by":"crossref","first-page":"192","DOI":"10.1109\/TBCAS.2017.2771235","article-title":"Simultaneous multimodal PC access for people with disabilities by integrating head tracking, speech recognition, and tongue motion","volume":"12","author":"Sahadat","year":"2017","journal-title":"IEEE Trans. Biomed. Circuits Syst."},{"issue":"10","key":"10.1016\/j.engappai.2026.115233_b214","doi-asserted-by":"crossref","first-page":"1780","DOI":"10.1049\/iet-ipr.2017.1312","article-title":"Hand gesture recognition using DWT and F-ratio based feature descriptor","volume":"12","author":"Sahoo","year":"2018","journal-title":"IET Image Process."},{"issue":"3","key":"10.1016\/j.engappai.2026.115233_b215","doi-asserted-by":"crossref","first-page":"42","DOI":"10.3390\/asi7030042","article-title":"Design and implementation of adam: A humanoid robotic head with social interaction capabilities","volume":"7","author":"Said","year":"2024","journal-title":"Appl. Syst. Innov."},{"key":"10.1016\/j.engappai.2026.115233_b216","series-title":"Fast and accurate recurrent neural network acoustic models for speech recognition","author":"Sak","year":"2015"},{"key":"10.1016\/j.engappai.2026.115233_b217","series-title":"Introduction to arabic speech recognition using cmUSphinx system","author":"Satori","year":"2007"},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b218","doi-asserted-by":"crossref","first-page":"6884","DOI":"10.1038\/s41598-022-10503-6","article-title":"Robot touch with speech boosts positive emotions","volume":"12","author":"Sawabe","year":"2022","journal-title":"Sci. Rep."},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b219","doi-asserted-by":"crossref","first-page":"76","DOI":"10.1145\/2851072","article-title":"Biosignals in human-computer interaction","volume":"23","author":"Schmidt","year":"2015","journal-title":"Interactions"},{"key":"10.1016\/j.engappai.2026.115233_b220","series-title":"2020 24th International Conference on System Theory, Control and Computing","first-page":"232","article-title":"Time series feature extraction for head gesture recognition: Considerations toward HCI applications","author":"Severin","year":"2020"},{"key":"10.1016\/j.engappai.2026.115233_b221","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2022.103970","article-title":"A novel spatio-temporal convolutional neural framework for multimodal emotion recognition","volume":"78","author":"Sharafi","year":"2022","journal-title":"Biomed. Signal Process. Control."},{"key":"10.1016\/j.engappai.2026.115233_b222","series-title":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"4779","article-title":"Natural tts synthesis by conditioning wavenet on mel spectrogram predictions","author":"Shen","year":"2018"},{"key":"10.1016\/j.engappai.2026.115233_b223","series-title":"Siri - apple","author":"Siri","year":"2024"},{"key":"10.1016\/j.engappai.2026.115233_b224","series-title":"Multimodal Human-Computer Communication: Systems, Techniques, and Experiments","first-page":"101","article-title":"Modeling and processing of oral and tactile activities in the georal system","author":"Siroux","year":"2006"},{"key":"10.1016\/j.engappai.2026.115233_b225","series-title":"Skinergy \u2014 Hybrid body lab","author":"Skinergy","year":"2024"},{"key":"10.1016\/j.engappai.2026.115233_b226","doi-asserted-by":"crossref","unstructured":"Snoek, C.G., Worring, M., Smeulders, A.W., 2005. Early versus late fusion in semantic video analysis. In: Proceedings of the 13th Annual ACM International Conference on Multimedia. pp. 399\u2013402.","DOI":"10.1145\/1101149.1101236"},{"key":"10.1016\/j.engappai.2026.115233_b227","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1016\/j.compbiomed.2016.01.012","article-title":"A practical efficient human computer interface based on saccadic eye movements for people with disabilities","volume":"70","author":"Soltani","year":"2016","journal-title":"Comput. Biol. Med."},{"key":"10.1016\/j.engappai.2026.115233_b228","series-title":"ViTGaze: Gaze following with interaction features in vision transformers","author":"Song","year":"2024"},{"key":"10.1016\/j.engappai.2026.115233_b229","series-title":"Smart Electromechanical Systems: Behavioral Decision Making","first-page":"163","article-title":"Robotic wheelchair control system for multimodal interfaces based on a symbolic model of the world","author":"Sorokoumov","year":"2021"},{"key":"10.1016\/j.engappai.2026.115233_b230","doi-asserted-by":"crossref","unstructured":"Ssemugabi, S., De Villiers, R., 2007. A comparative study of two usability evaluation methods using a web-based e-learning application. In: Proceedings of the 2007 Annual Research Conference of the South African Institute of Computer Scientists and Information Technologists on IT Research in Developing Countries. pp. 132\u2013142.","DOI":"10.1145\/1292491.1292507"},{"issue":"24","key":"10.1016\/j.engappai.2026.115233_b231","doi-asserted-by":"crossref","first-page":"7162","DOI":"10.3390\/s20247162","article-title":"Performance analysis of a head and eye motion-based control interface for assistive robots","volume":"20","author":"Stalljann","year":"2020","journal-title":"Sensors"},{"key":"10.1016\/j.engappai.2026.115233_b232","series-title":"Psychophysiological Recording","author":"Stern","year":"2001"},{"key":"10.1016\/j.engappai.2026.115233_b233","doi-asserted-by":"crossref","DOI":"10.1016\/j.compag.2023.108343","article-title":"An electronic nose based on adaptive fusion of transformer-ELM with active temperature modulation algorithm for accurate odor detection in refrigerators","volume":"214","author":"Sun","year":"2023","journal-title":"Comput. Electron. Agric."},{"key":"10.1016\/j.engappai.2026.115233_b234","series-title":"A survey on neural speech synthesis","author":"Tan","year":"2021"},{"key":"10.1016\/j.engappai.2026.115233_b235","doi-asserted-by":"crossref","DOI":"10.1109\/TIM.2024.3400361","article-title":"A convolutional-transformer based approach for dynamic gesture recognition of data gloves","author":"Tang","year":"2024","journal-title":"IEEE Trans. Instrum. Meas."},{"key":"10.1016\/j.engappai.2026.115233_b236","series-title":"Pressure mapping, force measurement, & tactile sensors \u2014 Tekscan","author":"Tekscan","year":"2024"},{"issue":"2","key":"10.1016\/j.engappai.2026.115233_b237","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1177\/1048371312465876","article-title":"Putting your best mic forward","volume":"26","author":"Thompson","year":"2013","journal-title":"Gen. Music. Today"},{"key":"10.1016\/j.engappai.2026.115233_b238","series-title":"Wireless MEMS Networks and Applications","first-page":"177","article-title":"MEMS microphones for wireless applications","author":"Tiete","year":"2017"},{"issue":"2","key":"10.1016\/j.engappai.2026.115233_b239","doi-asserted-by":"crossref","first-page":"154","DOI":"10.1177\/0142723720936789","article-title":"The temporal dynamics of deictic communication","volume":"41","author":"Todisco","year":"2021","journal-title":"First Lang."},{"key":"10.1016\/j.engappai.2026.115233_b240","unstructured":"T\u00f6lgyessy, M., Hubinsk\u1ef3, P., 2011. The Kinect sensor in robotics education. In: Proceedings of 2nd International Conference on Robotics in Education. pp. 143\u2013146."},{"key":"10.1016\/j.engappai.2026.115233_b241","doi-asserted-by":"crossref","first-page":"189","DOI":"10.1016\/j.patrec.2013.07.003","article-title":"Multimodal interaction: A review","volume":"36","author":"Turk","year":"2014","journal-title":"Pattern Recognit. Lett."},{"issue":"3","key":"10.1016\/j.engappai.2026.115233_b242","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1145\/330534.330535","article-title":"Perceptual user interfaces (introduction)","volume":"43","author":"Turk","year":"2000","journal-title":"Commun. ACM"},{"key":"10.1016\/j.engappai.2026.115233_b243","series-title":"Design, User Experience, and Usability. Theories, Methods, and Tools for Designing the User Experience: Third International Conference, DUXU 2014, Held As Part of HCI International 2014, Heraklion, Crete, Greece, June 22-27, 2014, Proceedings, Part I 3","first-page":"484","article-title":"Evaluating the usability on multimodal interfaces: a case study on tablets applications","author":"Vilar Neto","year":"2014"},{"key":"10.1016\/j.engappai.2026.115233_b244","first-page":"3545","article-title":"Building an application framework for speech and pen input integration in multimodal learning interfaces","volume":"vol. 6","author":"Vo","year":"1996"},{"key":"10.1016\/j.engappai.2026.115233_b245","doi-asserted-by":"crossref","DOI":"10.1016\/j.ijhcs.2021.102755","article-title":"\u2018Address and command\u2019: Two-handed mid-air interactions with multiple home devices","volume":"159","author":"Vogiatzidakis","year":"2022","journal-title":"Int. J. Hum.-Comput. Stud."},{"issue":"3","key":"10.1016\/j.engappai.2026.115233_b246","doi-asserted-by":"crossref","first-page":"254","DOI":"10.1109\/TIV.2018.2843120","article-title":"Driver gaze zone estimation using convolutional neural networks: A general framework and ablative analysis","volume":"3","author":"Vora","year":"2018","journal-title":"IEEE Trans. Intell. Veh."},{"issue":"4","key":"10.1016\/j.engappai.2026.115233_b247","first-page":"109","article-title":"A Gaussian mixture model based speech recognition system using matlab","volume":"4","author":"Vyas","year":"2013","journal-title":"Signal Image Process."},{"key":"10.1016\/j.engappai.2026.115233_b248","series-title":"Neural codec language models are zero-shot text to speech synthesizers","author":"Wang","year":"2023"},{"issue":"8","key":"10.1016\/j.engappai.2026.115233_b249","doi-asserted-by":"crossref","DOI":"10.1002\/aisy.201900090","article-title":"Tactile sensors for advanced intelligent systems","volume":"1","author":"Wang","year":"2019","journal-title":"Adv. Intell. Syst."},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b250","doi-asserted-by":"crossref","first-page":"52","DOI":"10.1007\/s42486-023-00137-6","article-title":"Multimodal intent understanding and interaction system for elderly-assisted companionship","volume":"6","author":"Wang","year":"2024","journal-title":"CCF Trans. Pervasive Comput. Interact."},{"key":"10.1016\/j.engappai.2026.115233_b251","series-title":"Fairseq S2T: Fast speech-to-text modeling with fairseq","author":"Wang","year":"2020"},{"key":"10.1016\/j.engappai.2026.115233_b252","series-title":"An Evaluation Framework for Multimodal Interaction","author":"Wechsung","year":"2014"},{"key":"10.1016\/j.engappai.2026.115233_b253","doi-asserted-by":"crossref","first-page":"73","DOI":"10.1007\/s12193-011-0088-y","article-title":"Measuring the quality of service and quality of experience of multimodal human\u2013machine interaction","volume":"6","author":"Wechsung","year":"2012","journal-title":"J. Multimodal User Interfaces"},{"key":"10.1016\/j.engappai.2026.115233_b254","doi-asserted-by":"crossref","unstructured":"Wei, W., Li, S., Okada, S., Komatani, K., 2021. Multimodal user satisfaction recognition for non-task oriented dialogue systems. In: Proceedings of the 2021 International Conference on Multimodal Interaction. pp. 586\u2013594.","DOI":"10.1145\/3462244.3479928"},{"key":"10.1016\/j.engappai.2026.115233_b255","doi-asserted-by":"crossref","first-page":"453","DOI":"10.1016\/j.protcy.2012.02.101","article-title":"Review of electronic-nose technologies and algorithms to detect hazardous chemicals in the environment","volume":"1","author":"Wilson","year":"2012","journal-title":"Procedia Technol."},{"issue":"2","key":"10.1016\/j.engappai.2026.115233_b256","doi-asserted-by":"crossref","first-page":"272","DOI":"10.1016\/j.jpba.2011.02.002","article-title":"A comparative study on two electronic tongues for pharmaceutical formulation development","volume":"55","author":"Woertz","year":"2011","journal-title":"J. Pharm. Biomed. Anal."},{"issue":"3","key":"10.1016\/j.engappai.2026.115233_b257","doi-asserted-by":"crossref","first-page":"46","DOI":"10.1109\/MIS.2013.34","article-title":"Youtube movie reviews: Sentiment analysis in an audio-visual context","volume":"28","author":"W\u00f6llmer","year":"2013","journal-title":"IEEE Intell. Syst."},{"key":"10.1016\/j.engappai.2026.115233_b258","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1007\/s10055-016-0296-6","article-title":"A virtual reality keyboard with realistic haptic feedback in a fully immersive virtual environment","volume":"21","author":"Wu","year":"2017","journal-title":"Virtual Real."},{"issue":"4","key":"10.1016\/j.engappai.2026.115233_b259","doi-asserted-by":"crossref","first-page":"334","DOI":"10.1109\/6046.807953","article-title":"Multimodal integration-a statistical view","volume":"1","author":"Wu","year":"1999","journal-title":"IEEE Trans. Multimed."},{"issue":"5","key":"10.1016\/j.engappai.2026.115233_b260","doi-asserted-by":"crossref","first-page":"E67","DOI":"10.1111\/aor.13004","article-title":"EMG-based estimation of limb movement using deep learning with recurrent convolutional neural networks","volume":"42","author":"Xia","year":"2018","journal-title":"Artif. Organs."},{"issue":"8","key":"10.1016\/j.engappai.2026.115233_b261","doi-asserted-by":"crossref","first-page":"201","DOI":"10.3390\/drones6080201","article-title":"Multimodal fusion of voice and gesture data for UAV control","volume":"6","author":"Xiang","year":"2022","journal-title":"Drones"},{"key":"10.1016\/j.engappai.2026.115233_b262","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2023.102204","article-title":"Instruction-vit: Multi-modal prompts for instruction learning in vision transformer","volume":"104","author":"Xiao","year":"2024","journal-title":"Inf. Fusion"},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b263","doi-asserted-by":"crossref","first-page":"252","DOI":"10.1007\/s12555-022-0051-6","article-title":"A dynamic head gesture recognition method for real-time intention inference and its application to visual human-robot interaction","volume":"22","author":"Xie","year":"2024","journal-title":"Int. J. Control. Autom. Syst."},{"key":"10.1016\/j.engappai.2026.115233_b264","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2021.108210","article-title":"Head pose estimation using deep neural networks and 3D point clouds","volume":"121","author":"Xu","year":"2022","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.engappai.2026.115233_b265","series-title":"A benchmarking on cloud based speech-to-text services for french speech and background noise effect","author":"Xu","year":"2021"},{"key":"10.1016\/j.engappai.2026.115233_b266","doi-asserted-by":"crossref","first-page":"54549","DOI":"10.1109\/ACCESS.2022.3176717","article-title":"Robust hand gesture recognition based on RGB-D data for natural human\u2013computer interaction","volume":"10","author":"Xu","year":"2022","journal-title":"IEEE Access"},{"key":"10.1016\/j.engappai.2026.115233_b267","series-title":"Sensors, 2012 IEEE","first-page":"1","article-title":"A survey of olfactory displays: Making and delivering scents","author":"Yanagida","year":"2012"},{"key":"10.1016\/j.engappai.2026.115233_b268","article-title":"Behavioral and physiological signals-based deep multimodal approach for mobile emotion recognition","author":"Yang","year":"2021","journal-title":"IEEE Trans. Affect. Comput."},{"key":"10.1016\/j.engappai.2026.115233_b269","series-title":"Emollm: Multimodal emotional understanding meets large language models","author":"Yang","year":"2024"},{"key":"10.1016\/j.engappai.2026.115233_b270","series-title":"Computer Vision and Machine Learning with RGB-D Sensors","first-page":"289","article-title":"Real-time hand gesture recognition using RGB-D sensor","author":"Yao","year":"2014"},{"issue":"7","key":"10.1016\/j.engappai.2026.115233_b271","doi-asserted-by":"crossref","first-page":"2513","DOI":"10.3390\/s22072513","article-title":"Motion estimation and hand gesture recognition-based human\u2013UAV interaction approach in real time","volume":"22","author":"Yoo","year":"2022","journal-title":"Sensors"},{"key":"10.1016\/j.engappai.2026.115233_b272","series-title":"2020 16th International Conference on Control, Automation, Robotics and Vision","first-page":"759","article-title":"Srg 3: Speech-driven robot gesture generation with gan","author":"Yu","year":"2020"},{"key":"10.1016\/j.engappai.2026.115233_b273","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2020.107316","article-title":"Single image-based head pose estimation with spherical parametrization and 3D morphing","volume":"103","author":"Yuan","year":"2020","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.engappai.2026.115233_b274","doi-asserted-by":"crossref","DOI":"10.3389\/frobt.2024.1312554","article-title":"Multimodal fusion of emg and vision for human grasp intent inference in prosthetic hand control","volume":"11","author":"Zandigohar","year":"2024","journal-title":"Front. Robot. AI"},{"key":"10.1016\/j.engappai.2026.115233_b275","first-page":"1","article-title":"Contextual object detection with multimodal large language models","author":"Zang","year":"2024","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.engappai.2026.115233_b276","doi-asserted-by":"crossref","first-page":"223","DOI":"10.1007\/s10772-013-9221-5","article-title":"Hybrid continuous speech recognition systems by HMM, MLP and SVM: a comparative study","volume":"17","author":"Zarrouk","year":"2014","journal-title":"Int. J. Speech Technol."},{"key":"10.1016\/j.engappai.2026.115233_b277","article-title":"Residual self-calibrated network with multi-scale channel attention for accurate EOG-based eye movement classification","author":"Zeng","year":"2024","journal-title":"IEEE J. Biomed. Health Informatics"},{"key":"10.1016\/j.engappai.2026.115233_b278","doi-asserted-by":"crossref","first-page":"26","DOI":"10.1016\/j.neucom.2020.09.023","article-title":"Deep unsupervised multi-modal fusion network for detecting driver distraction","volume":"421","author":"Zhang","year":"2021","journal-title":"Neurocomputing"},{"key":"10.1016\/j.engappai.2026.115233_b279","series-title":"ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"6470","article-title":"PyHTK: Python library and ASR pipelines for HTK","author":"Zhang","year":"2019"},{"key":"10.1016\/j.engappai.2026.115233_b280","doi-asserted-by":"crossref","DOI":"10.1155\/2017\/5739301","article-title":"Eye tracking based control system for natural human-computer interaction","volume":"2017","author":"Zhang","year":"2017","journal-title":"Comput. Intell. Neurosci."},{"issue":"4","key":"10.1016\/j.engappai.2026.115233_b281","doi-asserted-by":"crossref","first-page":"109","DOI":"10.1007\/s42452-022-04992-3","article-title":"Mid-air gestures for in-vehicle media player: Elicitation, segmentation, recognition, and eye-tracking testing","volume":"4","author":"Zhang","year":"2022","journal-title":"SN Appl. Sci."},{"issue":"20","key":"10.1016\/j.engappai.2026.115233_b282","doi-asserted-by":"crossref","first-page":"31309","DOI":"10.1007\/s11042-023-14732-3","article-title":"Static hand gesture recognition method based on the vision transformer","volume":"82","author":"Zhang","year":"2023","journal-title":"Multimedia Tools Appl."},{"issue":"3","key":"10.1016\/j.engappai.2026.115233_b283","doi-asserted-by":"crossref","first-page":"478","DOI":"10.1109\/JSTSP.2020.2987728","article-title":"Multimodal intelligence: Representation learning, information fusion, and applications","volume":"14","author":"Zhang","year":"2020","journal-title":"IEEE J. Sel. Top. Signal Process."},{"key":"10.1016\/j.engappai.2026.115233_b284","article-title":"Spatial-temporal synchronous transformer for skeleton-based hand gesture recognition","author":"Zhao","year":"2023","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"1","key":"10.1016\/j.engappai.2026.115233_b285","doi-asserted-by":"crossref","first-page":"10","DOI":"10.3390\/s18010010","article-title":"Dynamic gesture recognition with a terahertz radar based on range profile sequences and Doppler signatures","volume":"18","author":"Zhou","year":"2017","journal-title":"Sensors"},{"key":"10.1016\/j.engappai.2026.115233_b286","doi-asserted-by":"crossref","DOI":"10.1016\/j.jspr.2021.101805","article-title":"Feasibility of detection of infested rice using an electronic nose","volume":"92","author":"Zhou","year":"2021","journal-title":"J. Stored Prod. Res."},{"issue":"8","key":"10.1016\/j.engappai.2026.115233_b287","doi-asserted-by":"crossref","first-page":"2497","DOI":"10.3390\/s18082497","article-title":"Multiday EMG-based classification of hand motions with deep learning techniques","volume":"18","author":"Zia ur Rehman","year":"2018","journal-title":"Sensors"},{"issue":"4","key":"10.1016\/j.engappai.2026.115233_b288","doi-asserted-by":"crossref","first-page":"81","DOI":"10.3390\/mti2040081","article-title":"Semantic fusion for natural multimodal interfaces using concurrent augmented transition networks","volume":"2","author":"Zimmerer","year":"2018","journal-title":"Multimodal Technol. Interact."},{"key":"10.1016\/j.engappai.2026.115233_b289","series-title":"2018 IEEE Conference on Virtual Reality and 3D User Interfaces","first-page":"745","article-title":"Space tentacles-integrating multimodal input into a VR adventure game","author":"Zimmerer","year":"2018"}],"container-title":["Engineering Applications of Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626015174?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626015174?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T06:45:55Z","timestamp":1781592355000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0952197626015174"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":289,"alternative-id":["S0952197626015174"],"URL":"https:\/\/doi.org\/10.1016\/j.engappai.2026.115233","relation":{},"ISSN":["0952-1976"],"issn-type":[{"value":"0952-1976","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Multimodal human\u2013computer interaction: A panoptic view","name":"articletitle","label":"Article Title"},{"value":"Engineering Applications of Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.engappai.2026.115233","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"115233"}}