{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T05:10:49Z","timestamp":1781586649166,"version":"3.54.5"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2019YFC0118202"],"award-info":[{"award-number":["2019YFC0118202"]}]},{"name":"Key Research and Development of Zhejiang Province of China","award":["2021C03030"],"award-info":[{"award-number":["2021C03030"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1007\/s10489-022-04255-z","type":"journal-article","created":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T12:48:47Z","timestamp":1669898927000},"page":"16029-16040","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":48,"title":["Multimodal fusion for alzheimer\u2019s disease recognition"],"prefix":"10.1007","volume":"53","author":[{"given":"Yangwei","family":"Ying","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tao","family":"Yang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1314-8883","authenticated-orcid":false,"given":"Hong","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,12,1]]},"reference":[{"issue":"7000","key":"4255_CR1","doi-asserted-by":"publisher","first-page":"631","DOI":"10.1038\/nature02621","volume":"430","author":"MP Mattson","year":"2004","unstructured":"Mattson MP (2004) Pathways towards and away from alzheimer\u2019s disease. Nature 430(7000):631\u2013639","journal-title":"Nature"},{"issue":"2","key":"4255_CR2","doi-asserted-by":"publisher","first-page":"182","DOI":"10.1016\/j.cmpb.2015.08.004","volume":"122","author":"L Xu","year":"2015","unstructured":"Xu L, Wu X, Chen K, Li Yao (2015) Multi-modality sparse representation-based classification for alzheimer\u2019s disease and mild cognitive impairment. Comput Methods Prog Biomed 122(2):182\u2013190","journal-title":"Comput Methods Prog Biomed"},{"key":"4255_CR3","doi-asserted-by":"crossref","unstructured":"Mueller KD, Koscik RL, Hermann BP, Johnson SC, Turkstra LS (2018) Declines in connected language are associated with very early mild cognitive impairment: Results from the wisconsin registry for alzheimer\u2019s prevention. Frontiers in Aging Neuroscience, p 9","DOI":"10.3389\/fnagi.2017.00437"},{"issue":"4","key":"4255_CR4","doi-asserted-by":"publisher","first-page":"937","DOI":"10.1007\/s10772-017-9456-7","volume":"20","author":"MOM Khelifa","year":"2017","unstructured":"Khelifa MOM, Elhadj YM, Abdellah Y, Belkasmi M (2017) Constructing accurate and robust hmm\/gmm models for an arabic speech recognition system. Int J Speech Technol 20(4):937\u2013 949","journal-title":"Int J Speech Technol"},{"issue":"8","key":"4255_CR5","doi-asserted-by":"publisher","first-page":"1018","DOI":"10.3390\/sym11081018","volume":"11","author":"D Wang","year":"2019","unstructured":"Wang D, Wang X, Lv S (2019) An overview of end-to-end automatic speech recognition. Symmetry 11(8):1018","journal-title":"Symmetry"},{"issue":"17","key":"4255_CR6","doi-asserted-by":"publisher","first-page":"2086","DOI":"10.3390\/electronics10172086","volume":"10","author":"Y Ying","year":"2021","unstructured":"Ying Y, Tu Y, Zhou H (2021) Unsupervised feature learning for speech emotion recognition based on autoencoder. Electronics 10(17):2086","journal-title":"Electronics"},{"key":"4255_CR7","doi-asserted-by":"crossref","unstructured":"Schuller B, Steidl S, Batliner A, Burkhardt F, Devillers L, M\u00fcller C, Narayanan S (2010) The interspeech 2010 paralinguistic challenge. In: Proc. INTERSPEECH 2010, Makuhari, Japan, pp 2794\u20132797","DOI":"10.21437\/Interspeech.2010-739"},{"issue":"2","key":"4255_CR8","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1109\/TAFFC.2015.2457417","volume":"7","author":"F Eyben","year":"2016","unstructured":"Eyben F, Scherer KR, Schuller BW, Sundberg J, Andr\u00e9 E, Busso C, Devillers LY, Epps J, Laukka P, Narayanan SS, Truong KP (2016) The geneva minimalistic acoustic parameter set (gemaps) for voice research and affective computing. IEEE Trans Affect Comput 7(2):190\u2013202","journal-title":"IEEE Trans Affect Comput"},{"key":"4255_CR9","doi-asserted-by":"crossref","unstructured":"Schuller B, Steidl S, Batliner A, Hirschberg J, Burgoon JK, Baird A, Elkins A, Zhang Y, Coutinho E, Evanini K et al (2016) The interspeech 2016 computational paralinguistics challenge: Deception, sincerity & native language. In: 17TH Annual conference of the international speech communication association (Interspeech 2016), vol 1-5, pp 2001\u20132005","DOI":"10.21437\/Interspeech.2016-129"},{"key":"4255_CR10","doi-asserted-by":"crossref","unstructured":"Eyben F, W\u00f6llmer M, Schuller B (2010) Opensmile: the munich versatile and fast open-source audio feature extractor. In: Proceedings of the 18th ACM international conference on Multimedia, pp 1459\u20131462","DOI":"10.1145\/1873951.1874246"},{"key":"4255_CR11","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2018) Bert:, Pre-training of deep bidirectional transformers for language understanding. arXiv:1810.04805"},{"key":"4255_CR12","unstructured":"Liu Y, Ott M, Goyal N, Du J, Joshi M, Chen D, Levy O, Lewis M, Zettlemoyer L, Stoyanov V (2019) Roberta:, A robustly optimized bert pretraining approach. arXiv:1907.11692"},{"key":"4255_CR13","first-page":"12449","volume":"33","author":"A Baevski","year":"2020","unstructured":"Baevski A, Zhou Y, Mohamed A, Auli M (2020) wav2vec 2.0: A framework for self-supervised learning of speech representations. Adv Neural Inf Process Syst 33:12449\u201312460","journal-title":"Adv Neural Inf Process Syst"},{"key":"4255_CR14","doi-asserted-by":"crossref","unstructured":"Chen S, Wang C, Chen Z, Wu Y, Liu S, Chen Z, Li J, Kanda N, Yoshioka T, Xiao X et al (2021) Wavlm:, Large-scale self-supervised pre-training for full stack speech processing. arXiv:2110.13900","DOI":"10.1109\/JSTSP.2022.3188113"},{"issue":"4","key":"4255_CR15","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1007\/s10072-005-0467-9","volume":"26","author":"KE Forbes-McKay","year":"2005","unstructured":"Forbes-McKay KE, Venneri A (2005) Detecting subtle spontaneous language decline in early alzheimer\u2019s disease with a picture description task. Neurol Sci 26(4):243\u2013254","journal-title":"Neurol Sci"},{"key":"4255_CR16","doi-asserted-by":"publisher","first-page":"437","DOI":"10.3389\/fnagi.2017.00437","volume":"9","author":"KD Mueller","year":"2018","unstructured":"Mueller KD, Koscik RL, Hermann BP, Johnson SC, Turkstra LS (2018) Declines in connected language are associated with very early mild cognitive impairment: Results from the wisconsin registry for alzheimer\u2019s prevention. Front Aging Neurosci 9:437","journal-title":"Front Aging Neurosci"},{"issue":"2","key":"4255_CR17","doi-asserted-by":"publisher","first-page":"272","DOI":"10.1109\/JSTSP.2019.2955022","volume":"14","author":"F Haider","year":"2019","unstructured":"Haider F, De La Fuente S, Luz S (2019) An assessment of paralinguistic acoustic features for detection of alzheimer\u2019s dementia in spontaneous speech. IEEE J Sel Top Signal Process 14(2):272\u2013281","journal-title":"IEEE J Sel Top Signal Process"},{"key":"4255_CR18","doi-asserted-by":"crossref","unstructured":"Nasreen S, Hough J, Purver M et al (2021) Detecting alzheimer\u2019s disease using interactional and acoustic features from spontaneous speech Interspeech","DOI":"10.21437\/Interspeech.2021-1526"},{"key":"4255_CR19","doi-asserted-by":"crossref","unstructured":"Rohanian M, Hough J, Purver M (2021) Alzheimer\u2019s dementia recognition using acoustic, lexical, disfluency and speech pause features robust to noisy inputs, arXiv:2106.15684","DOI":"10.21437\/Interspeech.2021-1633"},{"key":"4255_CR20","doi-asserted-by":"crossref","unstructured":"Yuan J, Bian Y, Cai X, Huang J, Ye Z, Church K (2020) Disfluencies and fine-tuning pre-trained language models for detection of alzheimer\u2019s disease. In: INTERSPEECH, pp 2162\u20132166","DOI":"10.21437\/Interspeech.2020-2516"},{"key":"4255_CR21","doi-asserted-by":"publisher","first-page":"51231","DOI":"10.1109\/ACCESS.2021.3069818","volume":"9","author":"C Zhang","year":"2021","unstructured":"Zhang C, Xue L (2021) Autoencoder with emotion embedding for speech emotion recognition. IEEE Access 9:51231\u201351241","journal-title":"IEEE Access"},{"key":"4255_CR22","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1016\/j.specom.2020.07.005","volume":"122","author":"JC Vasquez-Correa","year":"2020","unstructured":"Vasquez-Correa JC, Arias-Vergara T, Schuster M, Orozco-Arroyave JR, N\u00f6th E (2020) Parallel representation learning for the classification of pathological speech: studies on parkinson\u2019s disease and cleft lip and palate. Speech Comm 122:56\u201367","journal-title":"Speech Comm"},{"key":"4255_CR23","doi-asserted-by":"crossref","unstructured":"Padi S, Sadjadi SO, Sriram RD, Manocha D (2021) Improved speech emotion recognition using transfer learning and spectrogram augmentation. In: Proceedings of the 2021 international conference on multimodal interaction, pp 645\u2013652","DOI":"10.1145\/3462244.3481003"},{"key":"4255_CR24","unstructured":"Chen L-W, Rudnicky A (2021) Exploring wav2vec 2.0 fine-tuning for improved speech emotion recognition. arXiv:2110.06309"},{"key":"4255_CR25","unstructured":"Qin Y, Liu W, Peng Z, Ng S-I, Li J, Hu H, Lee T (2021) Exploiting pre-trained asr models for alzheimer\u2019s disease recognition through spontaneous speech. arXiv:2110.01493"},{"key":"4255_CR26","doi-asserted-by":"crossref","unstructured":"Balagopalan A, Eyre B, Rudzicz F, Novikova J (2020) To bert or not to bert:, comparing speech and language-based approaches for alzheimer\u2019s disease detection. arXiv:2008.01551","DOI":"10.21437\/Interspeech.2020-2557"},{"key":"4255_CR27","doi-asserted-by":"crossref","unstructured":"Luz S, Haider F, De La Fuente S, Fromm D, MacWhinney B (2021) Detecting cognitive decline using speech only:, The adresso challenge. arXiv:2104.09356","DOI":"10.1101\/2021.03.24.21254263"},{"key":"4255_CR28","doi-asserted-by":"crossref","unstructured":"Siriwardhana S, Reis Andrew, Weerasekera R, Nanayakkara S (2020) Jointly fine-tuning \u201cbert-like\u201d self supervised models to improve multimodal speech emotion recognition. arXiv:2008.06682","DOI":"10.21437\/Interspeech.2020-1212"},{"key":"4255_CR29","doi-asserted-by":"crossref","unstructured":"Syed MSS, Syed ZS, Lech M, Pirogova E (2020) Automated screening for alzheimer\u2019s dementia through spontaneous speech. In: INTERSPEECH, pp 2222\u20132226","DOI":"10.21437\/Interspeech.2020-3158"},{"key":"4255_CR30","doi-asserted-by":"crossref","unstructured":"Chen J, Ye J, Tang F, Zhou J (2021) Automatic detection of alzheimer\u2019s disease using spontaneous speech only. In: Proc. Interspeech, pp 3830\u20133834","DOI":"10.21437\/Interspeech.2021-2002"},{"key":"4255_CR31","doi-asserted-by":"crossref","first-page":"3815","DOI":"10.21437\/Interspeech.2021-1572","volume":"2021","author":"ZS Syed","year":"2021","unstructured":"Syed ZS, Syed MSS, Lech M, Pirogova E (2021) Tackling the adresso challenge 2021: the muet-rmit system for alzheimer\u2019s dementia recognition from spontaneous speech. Proc Interspeech 2021:3815\u20133819","journal-title":"Proc Interspeech"},{"key":"4255_CR32","doi-asserted-by":"crossref","unstructured":"Qiao Y, Yin X, Wiechmann D, Kerz E (2021) Alzheimer\u2019s disease detection from spontaneous speech through combining linguistic complexity and (dis) fluency features with pretrained language models arXiv:2106.08689","DOI":"10.21437\/Interspeech.2021-1415"},{"key":"4255_CR33","doi-asserted-by":"crossref","unstructured":"Syed Zafi Sherhan, Sidorov Kirill, Marshall David (2018) Automated screening for bipolar disorder from audio\/visual modalities. In: Proceedings of the 2018 on Audio\/visual emotion challenge and workshop, pp 39\u201345","DOI":"10.1145\/3266302.3266315"},{"key":"4255_CR34","doi-asserted-by":"crossref","unstructured":"Panayotov V, Chen G, Povey D, Khudanpur S (2015) Librispeech: an asr corpus based on public domain audio books. In: 2015 IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 5206\u20135210. IEEE","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"4255_CR35","unstructured":"Zhang T, Wu F, Katiyar A, Weinberger KQ, Artzi Y (2020) Revisiting few-sample bert fine-tuning. arXiv:2006.05987"},{"issue":"2","key":"4255_CR36","doi-asserted-by":"publisher","first-page":"e0212320","DOI":"10.1371\/journal.pone.0212320","volume":"14","author":"T Kim","year":"2019","unstructured":"Kim T, Kim HY (2019) Forecasting stock prices with a feature fusion lstm-cnn model using different representations of the same data. PloS one 14(2):e0212320","journal-title":"PloS one"},{"key":"4255_CR37","doi-asserted-by":"crossref","unstructured":"Liu G, He W, Jin B (2018) Feature fusion of speech emotion recognition based on deep learning. In: 2018 International conference on network infrastructure and digital content (IC-NIDC), pp 193\u2013197, IEEE","DOI":"10.1109\/ICNIDC.2018.8525706"},{"key":"4255_CR38","doi-asserted-by":"crossref","unstructured":"Cui Y, Che W, Liu T, Qin B, Wang S, Hu G (2020) Revisiting pre-trained models for Chinese natural language processing. In: Proceedings of the 2020 Conference on empirical methods in natural language processing: Findings, pp 657\u2013668, Online, November 2020. Association for Computational Linguistics","DOI":"10.18653\/v1\/2020.findings-emnlp.58"},{"key":"4255_CR39","doi-asserted-by":"crossref","unstructured":"P\u00e9rez-Toro PA, Bayerl SP, Arias-Vergara T, V\u00e1squez-Correa JC, Klumpp P, Schuster M, N\u00f6th E, Orozco-Arroyave JR, Riedhammer K (2021) Influence of the interviewer on the automatic assessment of alzheimer\u2019s disease in the context of the adresso challenge. In: Interspeech, pp 3785\u20133789","DOI":"10.21437\/Interspeech.2021-1589"},{"key":"4255_CR40","doi-asserted-by":"crossref","unstructured":"Wang N, Cao Y, Hao S, Shao Z, Subbalakshmi KP (2021) Modular multi-modal attention network for alzheimer\u2019s disease detection using patient audio and language data. In: Interspeech, pp 3835\u20133839","DOI":"10.21437\/Interspeech.2021-2024"},{"key":"4255_CR41","doi-asserted-by":"crossref","unstructured":"Pappagari R, Cho J, Joshi S, Moro-Vel\u00e1zquez L, Zelasko P, Villalba J, Dehak N (2021) Automatic detection and assessment of alzheimer disease using speech and language technologies in low-resource scenarios. In: Interspeech, pp 3825\u20133829","DOI":"10.21437\/Interspeech.2021-1850"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-04255-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-022-04255-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-04255-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,9]],"date-time":"2024-10-09T19:56:14Z","timestamp":1728503774000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-022-04255-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,1]]},"references-count":41,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2023,6]]}},"alternative-id":["4255"],"URL":"https:\/\/doi.org\/10.1007\/s10489-022-04255-z","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12,1]]},"assertion":[{"value":"9 October 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 December 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflicts of interest in this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Conflict of Interests"}}]}}