{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T18:50:27Z","timestamp":1764874227636,"version":"3.46.0"},"reference-count":106,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62571184"],"award-info":[{"award-number":["62571184"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Department of Science and Technology of Hunan Province","award":["2025RC6003"],"award-info":[{"award-number":["2025RC6003"]}]},{"DOI":"10.13039\/501100021171","name":"Basic and Applied Basic Research Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2024A1515010112"],"award-info":[{"award-number":["2024A1515010112"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Changsha Science and Technology Bureau Foundation","award":["kq2402082"],"award-info":[{"award-number":["kq2402082"]}]},{"DOI":"10.13039\/100016804","name":"Natural Science Foundation of Shenzhen Municipality","doi-asserted-by":"publisher","award":["JCYJ20250604190534043"],"award-info":[{"award-number":["JCYJ20250604190534043"]}],"id":[{"id":"10.13039\/100016804","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Hunan Provincial Key Research and Development Program","award":["2024AQ2041"],"award-info":[{"award-number":["2024AQ2041"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE J. Sel. Top. Signal Process."],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1109\/jstsp.2025.3622051","type":"journal-article","created":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T17:58:02Z","timestamp":1761587882000},"page":"796-809","source":"Crossref","is-referenced-by-count":0,"title":["Foundation Model-Based Evaluation of Neuropsychiatric Disorders: A Lifespan-Inclusive, Multi-Modal, and Multi-Lingual Study"],"prefix":"10.1109","volume":"19","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8074-1746","authenticated-orcid":false,"given":"Zhongren","family":"Dong","sequence":"first","affiliation":[{"name":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haotian","family":"Guo","sequence":"additional","affiliation":[{"name":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4249-7524","authenticated-orcid":false,"given":"Weixiang","family":"Xu","sequence":"additional","affiliation":[{"name":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6286-5868","authenticated-orcid":false,"given":"Huan","family":"Zhao","sequence":"additional","affiliation":[{"name":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8487-0561","authenticated-orcid":false,"given":"Zixing","family":"Zhang","sequence":"additional","affiliation":[{"name":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-24926-6_4"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/S0140-6736(20)32205-4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1002\/wps.20050"},{"key":"ref4","first-page":"1","article-title":"World Alzheimer report 2018The state of the art of dementia research: New frontiers","volume-title":"Proc. Alzheimer\u2019s Disease Int.","author":"Patterson","year":"2018"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1002\/alz.073714"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/s41582-023-00789-z"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/S0140-6736(16)30590-6"},{"volume-title":"Neurocognitive Disorders in Aging","year":"2005","author":"Kempler","key":"ref8"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.3109\/17549500903137256"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2015.03.004"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-91280-6_296"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2024.3483232"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/icassp49660.2025.10889576"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447829"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1590\/1516-4446-2013-S104"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.comppsych.2012.06.006"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10446246"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10446795"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3197315"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-2024"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-1633"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3347320.3357696"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3347320.3357697"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"article-title":"Text embeddings by weakly-supervised contrastive pre-training","year":"2022","author":"Wang","key":"ref25"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3207050"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TNSRE.2022.3224135"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.dsp.2023.103986"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2023.3273614"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TCSS.2024.3411616"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.3233\/JAD-200888"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.psychres.2024.116109"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1080\/13803395.2018.1446513"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.31887\/DCNS.2015.17.2\/tchandrasekhar"},{"key":"ref35","first-page":"24","article-title":"Data-driven approach to differentiating between depression and dementia from noisy speech and language data","volume-title":"Proc. 8th W-NUT","author":"Ehghaghi","year":"2022"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095522"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-759"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-1519"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096205"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-723"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1572"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/icassp49357.2023.10094714"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2019.2955012"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2011-750"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ichi48887.2020.9374335"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-024-63556-0"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2023.3235194"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2024.3506554"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-022-12648-y"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.louhi-1.16"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2021.103107"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/MPRV.2022.3163656"},{"key":"ref53","first-page":"22","article-title":"Extraction and classification of acoustic features from Italian speaking children with autism spectrum disorders","volume-title":"Proc. 13th RaPID Workshop","author":"Beccaria","year":"2022"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2017-730"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2021.101287"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2013-56"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.110834"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2023.101485"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2025.114264"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2025.3546950"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2571"},{"article-title":"Probing mental health information in speech foundation models","year":"2024","author":"Gennes","key":"ref62"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-2344"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10888608"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2024.3350071"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12021"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/p18-1208"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1656"},{"key":"ref69","first-page":"6416","article-title":"Learning factorized multimodal representations","volume-title":"Proc. 7th ICLR","author":"Tsai","year":"2019"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1115"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1209"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.723"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref74","first-page":"12449","article-title":"Wav2vec 2.0: A framework for self-supervised learning of speech representations","volume-title":"Proc. 34th NeurIPS","author":"Baevski","year":"2020"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-143"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"ref77","first-page":"1298","article-title":"Data2vec: A general framework for self-supervised learning in speech, vision and language","volume-title":"Proc. 39th ICML","author":"Baevski","year":"2022"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.931"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-280"},{"key":"ref80","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"Proc. 40th ICML","author":"Radford","year":"2023"},{"key":"ref81","article-title":"RoBERTa: A robustly optimized BERT pretraining approach","volume":"364","author":"Liu","year":"2019"},{"key":"ref82","first-page":"5754","article-title":"XLNet: Generalized autoregressive pretraining for language understanding","volume-title":"Proc. 32th NeurIPS","author":"Yang","year":"2019"},{"key":"ref83","first-page":"3986","article-title":"DeBERTa: Decoding-enhanced Bert with disentangled attention","volume-title":"Proc. 9th ICLR","year":"2021"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"article-title":"Towards general text embeddings with multi-stage contrastive learning","year":"2023","author":"Li","key":"ref85"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-industry.103"},{"article-title":"Multilingual E5 text embeddings: A technical report","year":"2024","author":"Wang","key":"ref87"},{"article-title":"Baichuan 2: Open large-scale language models","year":"2023","author":"Yang","key":"ref88"},{"article-title":"The llama 3 herd of models","year":"2024","author":"Dubey","key":"ref89"},{"article-title":"Qwen2. 5 technical report","year":"2024","author":"Yang","key":"ref90"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10887877"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-788"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1001\/archneur.1994.00540180063015"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1220"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10433923"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-1807"},{"key":"ref97","first-page":"3123","article-title":"The distress analysis interview corpus of human and computer interviews","volume-title":"Proc. 9th LREC","author":"Gratch","year":"2014"},{"key":"ref98","first-page":"1061","article-title":"SimSensei Kiosk: A virtual human interviewer for healthcare decision support","volume-title":"Proc. 13th AAMAS","author":"DeVault","year":"2014"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i11.21483"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746569"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01211-x"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2022.3181210"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0132408"},{"article-title":"FunAudioLLM: Voice understanding and generation foundation models for natural interaction between humans and LLMs","year":"2024","author":"An","key":"ref104"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00806"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01918"}],"container-title":["IEEE Journal of Selected Topics in Signal Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/4200690\/11275987\/11217306.pdf?arnumber=11217306","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T18:37:25Z","timestamp":1764873445000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11217306\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7]]},"references-count":106,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/jstsp.2025.3622051","relation":{},"ISSN":["1932-4553","1941-0484"],"issn-type":[{"type":"print","value":"1932-4553"},{"type":"electronic","value":"1941-0484"}],"subject":[],"published":{"date-parts":[[2025,7]]}}}