{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:32:10Z","timestamp":1772119930441,"version":"3.50.1"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,3,10]],"date-time":"2025-03-10T00:00:00Z","timestamp":1741564800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,3,10]],"date-time":"2025-03-10T00:00:00Z","timestamp":1741564800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100012542","name":"Sichuan Provincial Science and Technology Support Program","doi-asserted-by":"publisher","award":["2024YFHZ0059"],"award-info":[{"award-number":["2024YFHZ0059"]}],"id":[{"id":"10.13039\/100012542","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1007\/s11760-025-03916-z","type":"journal-article","created":{"date-parts":[[2025,3,10]],"date-time":"2025-03-10T04:17:45Z","timestamp":1741580265000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Voice-assisted multimodal fusion network for difficult airway assessment"],"prefix":"10.1007","volume":"19","author":[{"given":"Xiaofan","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guangchao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenjin","family":"Ye","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Peng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuan","family":"Yao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,3,10]]},"reference":[{"issue":"5","key":"3916_CR1","doi-asserted-by":"publisher","first-page":"632","DOI":"10.1093\/bja\/aer059","volume":"106","author":"TM Cook","year":"2011","unstructured":"Cook, T.M., Woodall, N., Harper, J., Benger, J.: Fourth National Audit Project: major complications of airway management in the UK: results of the Fourth National Audit Project of the Royal College of Anaesthetists and the Difficult Airway Society. Part 2: intensive care and emergency departments. Brit. J. Anaesth. 106(5), 632\u2013642 (2011). https:\/\/doi.org\/10.1093\/bja\/aer059","journal-title":"Brit. J. Anaesth."},{"issue":"2","key":"3916_CR2","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1097\/EJA.0000000000001127","volume":"37","author":"M Gemma","year":"2020","unstructured":"Gemma, M., Buratti, L., Di Santo, D., Calvi, M.R., Ravizza, A., Bondi, S., Bussi, M., Beretta, L.: Pre-operative transnasal endoscopy as a predictor of difficult airway: a prospective cohort study. Eur. J. Anaesthesiol. 37(2), 98\u2013104 (2020). https:\/\/doi.org\/10.1097\/EJA.0000000000001127","journal-title":"Eur. J. Anaesthesiol."},{"issue":"3","key":"3916_CR3","doi-asserted-by":"publisher","first-page":"272","DOI":"10.1111\/anae.12955","volume":"70","author":"AK N\u00f8rskov","year":"2015","unstructured":"N\u00f8rskov, A.K., Rosenstock, C., Wetterslev, J., Astrup, G., Afshari, A., Lundstr\u00f8m, L.: Diagnostic accuracy of anaesthesiologists? Prediction of difficult airway management in daily clinical practice: a cohort study of 188 064 patients registered in the Danish anaesthesia database. Anaesthesia 70(3), 272\u2013281 (2015)","journal-title":"Anaesthesia"},{"issue":"6","key":"3916_CR4","doi-asserted-by":"publisher","first-page":"1360","DOI":"10.1097\/ALN.0000435832.39353.20","volume":"119","author":"S Kheterpal","year":"2013","unstructured":"Kheterpal, S., Healy, D., Aziz, M.F., Shanks, A.M., Freundlich, R.E., Linton, F., Martin, L.D., Linton, J., Epps, J.L., Fernandez-Bustamante, A., Jameson, L.C., Tremper, T., Tremper, K.K.: Incidence, predictors, and outcome of difficult mask ventilation combined with difficult laryngoscopy: a report from the multicenter perioperative outcomes group. Anesthesiology 119(6), 1360\u20131369 (2013). https:\/\/doi.org\/10.1097\/ALN.0000435832.39353.20","journal-title":"Anesthesiology"},{"issue":"7","key":"3916_CR5","doi-asserted-by":"publisher","first-page":"2645","DOI":"10.18203\/2320-6012.ijrms20161925","volume":"4","author":"A Varghese","year":"2017","unstructured":"Varghese, A., Mohamed, T.: A comparison of mallampati scoring, upper lip bite test and sternomental distance in predicting difficult intubation. Int. J. Res. Med. Sci. 4(7), 2645\u20132648 (2017). https:\/\/doi.org\/10.18203\/2320-6012.ijrms20161925","journal-title":"Int. J. Res. Med. Sci."},{"issue":"1","key":"3916_CR6","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1186\/s40560-021-00551-x","volume":"9","author":"T Hayasaka","year":"2021","unstructured":"Hayasaka, T., Kawano, K., Kurihara, K., Suzuki, H., Kawamae, K.: Creation of an artificial intelligence model for intubation difficulty classification by deep learning (convolutional neural network) using face images: an observational study. J. Intensive Care 9(1), 38 (2021)","journal-title":"J. Intensive Care"},{"key":"3916_CR7","doi-asserted-by":"publisher","first-page":"104737","DOI":"10.1016\/j.compbiomed.2021.104737","volume":"136","author":"TE Tavolara","year":"2021","unstructured":"Tavolara, T.E., Gurcan, M.N., Segal, S., Niazi, M.K.K.: Identification of difficult to intubate patients from frontal face images using an ensemble of deep learning models. Comput. Biol. Med. 136, 104737 (2021). https:\/\/doi.org\/10.1016\/j.compbiomed.2021.104737","journal-title":"Comput. Biol. Med."},{"issue":"5","key":"3916_CR8","doi-asserted-by":"publisher","first-page":"066","DOI":"10.1016\/j.heliyon.2023.e15629","volume":"9","author":"G Wang","year":"2023","unstructured":"Wang, G., Li, C., Tang, F., Wang, Y., Wu, S., Zhi, H., Zhang, F., Wang, M., Zhang, J.: A fully-automatic semi-supervised deep learning model for difficult airway assessment. Heliyon 9(5), 066\u2013111 (2023). https:\/\/doi.org\/10.1016\/j.heliyon.2023.e15629","journal-title":"Heliyon"},{"key":"3916_CR9","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1016\/j.bspc.2024.106738","volume":"98","author":"X Li","year":"2024","unstructured":"Li, X., Peng, B., Yao, Y., Zhang, G., Xie, Z.: Difficult airway assessment with multi-view contrastive representation prior and ensemble classification. Biomed. Signal Process. Control 98, 106\u2013738 (2024). https:\/\/doi.org\/10.1016\/j.bspc.2024.106738","journal-title":"Biomed. Signal Process. Control"},{"issue":"3","key":"3916_CR10","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1007\/s41019-022-00190-8","volume":"7","author":"M-S Chen","year":"2022","unstructured":"Chen, M.-S., Lin, J.-Q., Li, X.-L., Liu, B.-Y., Wang, C.-D., Huang, D., Lai, J.-H.: Representation learning in multi-view clustering: a literature review. Data Sci. Eng. 7(3), 225\u2013241 (2022). https:\/\/doi.org\/10.1007\/s41019-022-00190-8","journal-title":"Data Sci. Eng."},{"issue":"4","key":"3916_CR11","doi-asserted-by":"publisher","first-page":"399","DOI":"10.1111\/anae.16194","volume":"79","author":"M Xia","year":"2024","unstructured":"Xia, M., Jin, C., Zheng, Y., Wang, J., Zhao, M., Cao, S., Xu, T., Pei, B., Irwin, M.G., Lin, Z., Jiang, H.: Deep learning-based facial analysis for predicting difficult videolaryngoscopy: a feasibility study. Anaesthesia 79(4), 399\u2013409 (2024). https:\/\/doi.org\/10.1111\/anae.16194","journal-title":"Anaesthesia"},{"key":"3916_CR12","doi-asserted-by":"publisher","first-page":"108118","DOI":"10.1016\/j.cmpb.2024.108118","volume":"248","author":"F Garc\u00eda-Garc\u00eda","year":"2024","unstructured":"Garc\u00eda-Garc\u00eda, F., Lee, D.-J., Mendoza-Garc\u00e9s, F.J., Garc\u00eda-Guti\u00e9rrez, S.: Reliable prediction of difficult airway for tracheal intubation from patient preoperative photographs by machine learning methods. Comput. Methods Programs Biomed. 248, 108118 (2024). https:\/\/doi.org\/10.1016\/j.cmpb.2024.108118","journal-title":"Comput. Methods Programs Biomed."},{"key":"3916_CR13","doi-asserted-by":"publisher","first-page":"66","DOI":"10.21037\/fomm-20-79","volume":"3","author":"J Wang","year":"2021","unstructured":"Wang, J., Xia, M., Jiang, H.: Advances in studies on imaging and artificial intelligence technology-assisted difficult airway assessment. Front. Oral Maxillofac. Med. 3, 66 (2021)","journal-title":"Front. Oral Maxillofac. Med."},{"issue":"1","key":"3916_CR14","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1213\/ANE.0b013e31820098d6","volume":"112","author":"CW Connor","year":"2011","unstructured":"Connor, C.W., Segal, S.: Accurate classification of difficult intubation by computerized facial analysis. Anesth. Anal. 112(1), 84\u201393 (2011)","journal-title":"Anesth. Anal."},{"issue":"2","key":"3916_CR15","doi-asserted-by":"publisher","first-page":"328","DOI":"10.1109\/TBME.2015.2457032","volume":"63","author":"GL Cuendet","year":"2015","unstructured":"Cuendet, G.L., Schoettker, P., Y\u00fcce, A., Sorci, M., Gao, H., Perruchoud, C., Thiran, J.-P.: Facial image analysis for fully automatic prediction of difficult endotracheal intubation. IEEE Trans. Biomed. Eng. 63(2), 328\u2013339 (2015)","journal-title":"IEEE Trans. Biomed. Eng."},{"key":"3916_CR16","doi-asserted-by":"crossref","unstructured":"Jiao, J., Cai, Y., Alsharid, M., Drukker, L., Papageorghiou, A.T., Noble, J.A.: Self-supervised contrastive video-speech representation learning for ultrasound. In: Medical Image Computing and Computer Assisted Intervention\u2014MICCAI 2020: 23rd International Conference, Lima, Peru, October 4\u20138, 2020, Proceedings, Part III 23, pp. 534\u2013543. Springer, Berlin (2020)","DOI":"10.1007\/978-3-030-59716-0_51"},{"key":"3916_CR17","doi-asserted-by":"crossref","unstructured":"Medina, S., Tome, D., Stoll, C., Tiede, M., Munhall, K., Hauptmann, A.G., Matthews, I.: Speech driven tongue animation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20406\u201320416 (2022)","DOI":"10.1109\/CVPR52688.2022.01976"},{"key":"3916_CR18","doi-asserted-by":"crossref","unstructured":"Jain, R., Yu, B., Wu, P., Prabhune, T., Anumanchipalli, G.: Multimodal segmentation for vocal tract modeling. arXiv preprint arXiv:2406.15754 (2024)","DOI":"10.21437\/Interspeech.2024-2223"},{"issue":"12","key":"3916_CR19","doi-asserted-by":"publisher","first-page":"16029","DOI":"10.1007\/s10489-022-04255-z","volume":"53","author":"Y Ying","year":"2023","unstructured":"Ying, Y., Yang, T., Zhou, H.: Multimodal fusion for Alzheimer\u2019s disease recognition. Appl. Intell. 53(12), 16029\u201316040 (2023)","journal-title":"Appl. Intell."},{"key":"3916_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2024.108949","volume":"180","author":"M Neumann","year":"2024","unstructured":"Neumann, M., Kothare, H., Ramanarayanan, V.: Multimodal speech biomarkers for remote monitoring of als disease progression. Comput. Biol. Med. 180, 108949 (2024). https:\/\/doi.org\/10.1016\/j.compbiomed.2024.108949","journal-title":"Comput. Biol. Med."},{"key":"3916_CR21","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1016\/j.cmpb.2022.107109","volume":"226","author":"S Kumar","year":"2022","unstructured":"Kumar, S., Chaube, M.K., Alsamhi, S.H., Gupta, S.K., Guizani, M., Gravina, R., Fortino, G.: A novel multimodal fusion framework for early diagnosis and accurate classification of covid-19 patients using X-ray images and speech signal processing techniques. Comput. Methods Prog. Biomed. 226, 107\u2013109 (2022). https:\/\/doi.org\/10.1016\/j.cmpb.2022.107109","journal-title":"Comput. Methods Prog. Biomed."},{"key":"3916_CR22","doi-asserted-by":"crossref","unstructured":"Demir, K.C., Rodriguez, B.L., Weise, T., Maier, A., Yang, S.H.: Towards Intelligent Speech Assistants in Operating Rooms: A Multimodal Model for Surgical Workflow Analysis (2024). arXiv:2406.14576","DOI":"10.21437\/Interspeech.2024-975"},{"issue":"11","key":"3916_CR23","doi-asserted-by":"publisher","first-page":"2639","DOI":"10.1109\/TBME.2017.2654361","volume":"64","author":"N Sebkhi","year":"2017","unstructured":"Sebkhi, N., Desai, D., Islam, M., Lu, J., Wilson, K., Ghovanloo, M.: Multimodal speech capture system for speech rehabilitation and learning. IEEE Trans. Biomed. Eng. 64(11), 2639\u20132649 (2017)","journal-title":"IEEE Trans. Biomed. Eng."},{"key":"3916_CR24","unstructured":"Yuan, K., Srivastav, V., Yu, T., Lavanchy, J.L., Mascagni, P., Navab, N., Padoy, N.: Learning multi-modal representations by watching hundreds of surgical video lectures. arXiv preprint arXiv:2307.15220 (2023)"},{"key":"3916_CR25","doi-asserted-by":"publisher","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. CoRR (2014). https:\/\/doi.org\/10.48550\/arXiv.1409.1556","DOI":"10.48550\/arXiv.1409.1556"},{"key":"3916_CR26","doi-asserted-by":"publisher","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 770\u2013778 (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"3916_CR27","doi-asserted-by":"publisher","unstructured":"Huang, G., Liu, Z., Van Der\u00a0Maaten, L., Weinberger, K.Q.: Densely connected convolutional networks. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2261\u20132269 (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.243","DOI":"10.1109\/CVPR.2017.243"},{"key":"3916_CR28","doi-asserted-by":"crossref","unstructured":"Wang, H., Zheng, S., Chen, Y., Cheng, L., Chen, Q.: CAM++: A Fast and Efficient Network for Speaker Verification Using Context-Aware Masking (2023). arXiv:2303.00332","DOI":"10.21437\/Interspeech.2023-1513"},{"key":"3916_CR29","unstructured":"Hannun, A.: Deep speech: Scaling up end-to-end speech recognition. arXiv preprint arXiv:1412.5567 (2014)"},{"issue":"1","key":"3916_CR30","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1109\/LSP.2018.2878620","volume":"26","author":"V Lostanlen","year":"2019","unstructured":"Lostanlen, V., Salamon, J., Cartwright, M., McFee, B., Farnsworth, A., Kelling, S., Bello, J.P.: Per-channel energy normalization: why and how. IEEE Signal Process. Lett. 26(1), 39\u201343 (2019). https:\/\/doi.org\/10.1109\/LSP.2018.2878620","journal-title":"IEEE Signal Process. Lett."},{"key":"3916_CR31","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.-C.: Mobilenetv2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"3916_CR32","doi-asserted-by":"publisher","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. Commun. ACM 60(6), 84\u201390 (2017). https:\/\/doi.org\/10.1145\/3065386","DOI":"10.1145\/3065386"},{"key":"3916_CR33","doi-asserted-by":"publisher","unstructured":"Howard, A., Sandler, M., Chen, B., Wang, W., Chen, L.-C., Tan, M., Chu, G., Vasudevan, V., Zhu, Y., Pang, R., Adam, H., Le, Q.: Searching for mobilenetv3. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 1314\u20131324 (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00140","DOI":"10.1109\/ICCV.2019.00140"},{"key":"3916_CR34","volume-title":"An Introduction to Signal Detection and Estimation","author":"HV Poor","year":"2013","unstructured":"Poor, H.V.: An Introduction to Signal Detection and Estimation. Springer, Berlin (2013)"},{"key":"3916_CR35","doi-asserted-by":"crossref","unstructured":"Rix, A.W., Beerends, J.G., Hollier, M., Hekstra, A.P.: Perceptual evaluation of speech quality (pesq)\u2014a new method for speech quality assessment of telephone networks and codecs. In: Proceedings of the 2001 IEEE International Conference on Acoustics, Speech, and Signal Processing (Cat. No.01CH37221), vol. 2, pp. 749\u20137522 (2001)","DOI":"10.1109\/ICASSP.2001.941023"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-03916-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-025-03916-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-03916-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,8]],"date-time":"2025-04-08T16:08:22Z","timestamp":1744128502000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-025-03916-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,10]]},"references-count":35,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,5]]}},"alternative-id":["3916"],"URL":"https:\/\/doi.org\/10.1007\/s11760-025-03916-z","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-5304495\/v1","asserted-by":"object"}]},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3,10]]},"assertion":[{"value":"21 October 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 November 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 February 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 March 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that we have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"381"}}