{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T12:10:14Z","timestamp":1778760614308,"version":"3.51.4"},"reference-count":58,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.eswa.2026.132655","type":"journal-article","created":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T15:31:28Z","timestamp":1778167888000},"page":"132655","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["When audio-visual deep learning meets TCM facial inspection: A novel depression detection method for Chinese population"],"prefix":"10.1016","volume":"326","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-0973-5562","authenticated-orcid":false,"given":"Jiayi","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6302-9751","authenticated-orcid":false,"given":"Sitan","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3260-383X","authenticated-orcid":false,"given":"Jiajun","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8065-8149","authenticated-orcid":false,"given":"Xiaohua","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7544-3207","authenticated-orcid":false,"given":"Yadi","family":"He","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0824-6203","authenticated-orcid":false,"given":"Linfeng","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.eswa.2026.132655_bib0001","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2023.105020","article-title":"Bio-acoustic features of depression: A review","volume":"85","author":"Almaghrabi","year":"2023","journal-title":"Biomedical Signal Processing and Control"},{"key":"10.1016\/j.eswa.2026.132655_bib0002","first-page":"12449","article-title":"Wav2vec 2.0: A framework for self-supervised learning of speech representations","volume":"33","author":"Baevski","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.eswa.2026.132655_bib0003","series-title":"2018 13th IEEE International conference on automatic face & gesture recognition (FG 2018)","first-page":"59","article-title":"OpenFace 2.0: Facial behavior analysis toolkit","author":"Baltrusaitis","year":"2018"},{"issue":"6","key":"10.1016\/j.eswa.2026.132655_bib0004","doi-asserted-by":"crossref","first-page":"719","DOI":"10.1038\/s41551-023-01056-8","article-title":"Algorithmic fairness in artificial intelligence for medicine and healthcare","volume":"7","author":"Chen","year":"2023","journal-title":"Nature Biomedical Engineering"},{"key":"10.1016\/j.eswa.2026.132655_bib0005","series-title":"2014 IEEE International conference on acoustics, speech and signal processing (ICASSP)","first-page":"960","article-title":"Covarep\u2013a collaborative voice analysis repository for speech technologies","author":"Degottex","year":"2014"},{"key":"10.1016\/j.eswa.2026.132655_bib0006","series-title":"Proceedings of the 2014 International conference on autonomous agents and multi-agent systems","first-page":"1061","article-title":"Simsensei kiosk: A virtual human interviewer for healthcare decision support","author":"DeVault","year":"2014"},{"key":"10.1016\/j.eswa.2026.132655_bib0007","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2023.102161","article-title":"Transformer-based multimodal feature enhancement networks for multimodal depression detection integrating video, audio and remote photoplethysmograph signals","volume":"104","author":"Fan","year":"2024","journal-title":"Information Fusion"},{"key":"10.1016\/j.eswa.2026.132655_bib0008","series-title":"LREC","first-page":"3123","article-title":"The distress analysis interview corpus of human and computer interviews","volume":"vol. 14","author":"Gratch","year":"2014"},{"key":"10.1016\/j.eswa.2026.132655_bib0009","series-title":"Assessment of depression","first-page":"143","article-title":"The hamilton rating scale for depression","author":"Hamilton","year":"1986"},{"key":"10.1016\/j.eswa.2026.132655_bib0010","series-title":"Proceedings of the IEEE Conference on computer vision and pattern recognition","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"key":"10.1016\/j.eswa.2026.132655_bib0011","article-title":"LMVD: A large-scale multimodal vlog dataset for depression detection in the wild","author":"He","year":"2025","journal-title":"Information Fusion"},{"issue":"1","key":"10.1016\/j.eswa.2026.132655_bib0012","article-title":"Depression recognition using voice-based pre-training model","volume":"14","author":"Huang","year":"2024","journal-title":"Scientific Reports"},{"issue":"2","key":"10.1016\/j.eswa.2026.132655_bib0013","doi-asserted-by":"crossref","first-page":"174","DOI":"10.1093\/occmed\/kqv087","article-title":"Beck depression inventory","volume":"66","author":"Jackson-Koku","year":"2016","journal-title":"Occupational Medicine"},{"key":"10.1016\/j.eswa.2026.132655_bib0014","doi-asserted-by":"crossref","first-page":"123649","DOI":"10.1109\/ACCESS.2020.3005687","article-title":"Deep facial diagnosis: Deep transfer learning from face recognition to facial diagnosis","volume":"8","author":"Jin","year":"2020","journal-title":"IEEe Access"},{"key":"10.1016\/j.eswa.2026.132655_bib0015","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1016\/j.neunet.2019.04.014","article-title":"Multivariate LSTM-FCNs for time series classification","volume":"116","author":"Karim","year":"2019","journal-title":"Neural Networks"},{"key":"10.1016\/j.eswa.2026.132655_bib0016","series-title":"Proceedings of the IEEE Conference on computer vision and pattern recognition","first-page":"1867","article-title":"One millisecond face alignment with an ensemble of regression trees","author":"Kazemi","year":"2014"},{"key":"10.1016\/j.eswa.2026.132655_bib0017","unstructured":"King, D. E. (2015). Max-margin object detection. arXiv: 1502.00046."},{"key":"10.1016\/j.eswa.2026.132655_bib0018","doi-asserted-by":"crossref","unstructured":"Kroenke, K., & Spitzer, R. L. (2002). The PHQ-9: a new depression diagnostic and severity measure. Psychiatric Annals, 32(9), 509\u2013515.","DOI":"10.3928\/0048-5713-20020901-06"},{"key":"10.1016\/j.eswa.2026.132655_bib0019","doi-asserted-by":"crossref","unstructured":"Kyprakis, I., Skaramagkas, V., Boura, I., Karamanis, G., Fotiadis, D. I., Kefalopoulou, Z., Spanaki, C., & Tsiknakis, M. (2025). A deep learning approach for depressive symptoms assessment in parkinson\u2019s disease patients using facial videos. arXiv: 2505.03845.","DOI":"10.1109\/EMBC58623.2025.11253137"},{"issue":"1","key":"10.1016\/j.eswa.2026.132655_bib0020","doi-asserted-by":"crossref","first-page":"1266","DOI":"10.1038\/s41598-024-51813-1","article-title":"High depressive symptomatology reduces emotional reactions to pictures of social interaction","volume":"14","author":"Lacerda","year":"2024","journal-title":"Scientific Reports"},{"key":"10.1016\/j.eswa.2026.132655_bib0021","article-title":"A facial depression recognition method based on hybrid multi-head cross attention network","volume":"17","author":"Li","year":"2023","journal-title":"Frontiers in Neuroscience"},{"key":"10.1016\/j.eswa.2026.132655_bib0022","series-title":"2020 5th International conference on computational intelligence and applications (ICCIA)","first-page":"113","article-title":"Facial complexion recognition of traditional chinese medicine based on computer vision","author":"Lin","year":"2020"},{"issue":"4","key":"10.1016\/j.eswa.2026.132655_bib0023","doi-asserted-by":"crossref","first-page":"3095","DOI":"10.1109\/TAFFC.2025.3571697","article-title":"Catching the blackdog easily: A convenient depression diagnosis method based on audio-visual deep learning","volume":"16","author":"Liu","year":"2025","journal-title":"IEEE Transactions on Affective Computing"},{"issue":"13","key":"10.1016\/j.eswa.2026.132655_bib0024","doi-asserted-by":"crossref","first-page":"1854","DOI":"10.3748\/wjg.v10.i13.1854","article-title":"Theory of traditional chinese medicine and therapeutic method of diseases","volume":"10","author":"Lu","year":"2004","journal-title":"World Journal of Gastroenterology: WJG"},{"issue":"3","key":"10.1016\/j.eswa.2026.132655_bib0025","doi-asserted-by":"crossref","first-page":"2251","DOI":"10.1109\/TAFFC.2022.3154332","article-title":"Prediction of depression severity based on the prosodic and semantic features with bidirectional LSTM and time distributed CNN","volume":"14","author":"Mao","year":"2022","journal-title":"IEEE Transactions on Affective Computing"},{"key":"10.1016\/j.eswa.2026.132655_bib0026","unstructured":"Metrics, I. (2021). Global health data exchange (GHDx). https:\/\/ghdx.healthdata.org."},{"issue":"7","key":"10.1016\/j.eswa.2026.132655_bib0027","doi-asserted-by":"crossref","first-page":"727","DOI":"10.1089\/acm.2008.0554","article-title":"Understanding the reliability of diagnostic variables in a chinese medicine examination","volume":"15","author":"O\u2019Brien","year":"2009","journal-title":"The Journal of Alternative and Complementary Medicine"},{"issue":"1","key":"10.1016\/j.eswa.2026.132655_bib0028","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1038\/nrdp.2016.65","article-title":"Major depressive disorder","volume":"2","author":"Otte","year":"2016","journal-title":"Nature Reviews Disease Primers"},{"issue":"1","key":"10.1016\/j.eswa.2026.132655_bib0029","doi-asserted-by":"crossref","first-page":"179","DOI":"10.1038\/ki.2013.77","article-title":"Prevalence of depression in chronic kidney disease: systematic review and meta-analysis of observational studies","volume":"84","author":"Palmer","year":"2013","journal-title":"Kidney International"},{"key":"10.1016\/j.eswa.2026.132655_bib0030","series-title":"2017 39th Annual international conference of the IEEE engineering in medicine and biology society (EMBC)","first-page":"1433","article-title":"Facial geometry and speech analysis for depression detection","author":"Pampouchidou","year":"2017"},{"issue":"4","key":"10.1016\/j.eswa.2026.132655_bib0031","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3625287","article-title":"Explainable deep learning methods in medical image classification: A survey","volume":"56","author":"Patr\u00edcio","year":"2023","journal-title":"ACM Computing Surveys"},{"key":"10.1016\/j.eswa.2026.132655_bib0032","series-title":"Proceedings of the 9th International on audio\/visual emotion challenge and workshop","first-page":"81","article-title":"Multi-level attention network using text, audio and video for depression prediction","author":"Ray","year":"2019"},{"issue":"6","key":"10.1016\/j.eswa.2026.132655_bib0033","doi-asserted-by":"crossref","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","article-title":"Faster r-CNN: Towards real-time object detection with region proposal networks","volume":"39","author":"Ren","year":"2016","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.1016\/j.eswa.2026.132655_bib0034","doi-asserted-by":"crossref","unstructured":"Rezwanul Haque, M., Milon Islam, M., Raju, S. M., Altaheri, H., Nassar, L., & Karray, F. (2025). MMFformer: Multimodal fusion transformer network for depression detection. arXiv e-prints, (pp. arXiv\u20132508).","DOI":"10.1109\/SMC58881.2025.11342694"},{"key":"10.1016\/j.eswa.2026.132655_bib0035","doi-asserted-by":"crossref","DOI":"10.3389\/fpsyg.2021.648346","article-title":"The relationship between facial expression and cognitive function in patients with depression","volume":"12","author":"Ruihua","year":"2021","journal-title":"Frontiers in Psychology"},{"key":"10.1016\/j.eswa.2026.132655_bib0036","series-title":"ICASSP 2022-2022 IEEE International conference on acoustics, speech and signal processing (ICASSP)","first-page":"6247","article-title":"Automatic depression detection: An emotional audio-textual corpus and a GRU\/BILSTM-based model","author":"Shen","year":"2022"},{"key":"10.1016\/j.eswa.2026.132655_bib0037","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2023.105675","article-title":"A novel study for depression detecting using audio signals based on graph neural network","volume":"88","author":"Sun","year":"2024","journal-title":"Biomedical Signal Processing and Control"},{"key":"10.1016\/j.eswa.2026.132655_bib0038","doi-asserted-by":"crossref","DOI":"10.1016\/j.compbiomed.2024.108074","article-title":"A review of traditional chinese medicine diagnosis using machine learning: Inspection, auscultation-olfaction, inquiry, and palpation","volume":"170","author":"Tian","year":"2024","journal-title":"Computers in Biology and Medicine"},{"issue":"3","key":"10.1016\/j.eswa.2026.132655_bib0039","doi-asserted-by":"crossref","first-page":"220","DOI":"10.4103\/0019-5545.117131","article-title":"Diagnostic and statistical manual of mental disorders 5: A quick glance","volume":"55","author":"Vahia","year":"2013","journal-title":"Indian Journal of Psychiatry"},{"key":"10.1016\/j.eswa.2026.132655_bib0040","series-title":"Proceedings of the 4th International workshop on audio\/visual emotion challenge","first-page":"3","article-title":"AVEC 2014: 3D dimensional affect and depression recognition challenge","author":"Valstar","year":"2014"},{"key":"10.1016\/j.eswa.2026.132655_bib0041","series-title":"Proceedings of the 3rd ACM International workshop on audio\/visual emotion challenge","first-page":"3","article-title":"AVEC 2013: The continuous audio\/visual emotion and depression recognition challenge","author":"Valstar","year":"2013"},{"issue":"6","key":"10.1016\/j.eswa.2026.132655_bib0042","doi-asserted-by":"crossref","first-page":"688","DOI":"10.3390\/e22060688","article-title":"Automatic detection of depression in speech using ensemble convolutional neural networks","volume":"22","author":"V\u00e1zquez-Romero","year":"2020","journal-title":"Entropy"},{"issue":"40","key":"10.1016\/j.eswa.2026.132655_bib0043","first-page":"1022","article-title":"The association between depression and all-cause, cause-specific mortality in the Chinese population\u2013China, 2010\u20132022","volume":"6","author":"Wang","year":"2024","journal-title":"China CDC Weekly"},{"issue":"3","key":"10.1016\/j.eswa.2026.132655_bib0044","doi-asserted-by":"crossref","first-page":"1855","DOI":"10.1109\/TAFFC.2025.3543226","article-title":"Automatic depression recognition with an ensemble of multimodal spatio-temporal routing features","volume":"16","author":"Wang","year":"2025","journal-title":"IEEE Transactions on Affective Computing"},{"key":"10.1016\/j.eswa.2026.132655_bib0045","unstructured":"World Health Organization(2022). World mental health report: Transforming mental health for all. World Health Organization."},{"key":"10.1016\/j.eswa.2026.132655_bib0046","doi-asserted-by":"crossref","unstructured":"Wu, Y.-X., Huang, Z., Hu, B., & Guan, Z.-H. (2025). RBA-FE: A robust brain-inspired audio feature extractor for depression diagnosis. arXiv: 2506.07118.","DOI":"10.1109\/TCDS.2026.3684337"},{"issue":"1","key":"10.1016\/j.eswa.2026.132655_bib0047","article-title":"Depression detection methods based on multimodal fusion of voice and text","volume":"15","author":"Xu","year":"2025","journal-title":"Scientific Reports"},{"issue":"10","key":"10.1016\/j.eswa.2026.132655_bib0048","doi-asserted-by":"crossref","first-page":"1313","DOI":"10.3390\/life14101313","article-title":"TCEDN: A lightweight time-context enhanced depression detection network","volume":"14","author":"Yan","year":"2024","journal-title":"Life"},{"key":"10.1016\/j.eswa.2026.132655_bib0049","series-title":"Proceedings of the AAAI Conference on artificial intelligence","first-page":"12226","article-title":"D-vlog: Multimodal vlog dataset for depression detection","volume":"vol. 36","author":"Yoon","year":"2022"},{"key":"10.1016\/j.eswa.2026.132655_bib0050","series-title":"Proceedings of the European conference on computer vision (ECCV)","first-page":"325","article-title":"BiseNet: Bilateral segmentation network for real-time semantic segmentation","author":"Yu","year":"2018"},{"key":"10.1016\/j.eswa.2026.132655_bib0051","doi-asserted-by":"crossref","unstructured":"Zadeh, A., Chen, M., Poria, S., Cambria, E., & Morency, L.-P. (2017). Tensor fusion network for multimodal sentiment analysis. arXiv: 1707.07250.","DOI":"10.18653\/v1\/D17-1115"},{"key":"10.1016\/j.eswa.2026.132655_bib0052","series-title":"Proceedings of the AAAI Conference on artificial intelligence","first-page":"11121","article-title":"Are transformers effective for time series forecasting?","volume":"vol. 37","author":"Zeng","year":"2023"},{"issue":"1","key":"10.1016\/j.eswa.2026.132655_bib0053","first-page":"62","article-title":"Role of\u201c facial diagnosis\u201d objectification in tumor diagnosis and treatment","volume":"1","author":"Zhang","year":"2022","journal-title":"Cancer Insight"},{"key":"10.1016\/j.eswa.2026.132655_bib0054","doi-asserted-by":"crossref","DOI":"10.1016\/j.compbiomed.2021.104358","article-title":"Computational traditional chinese medicine diagnosis: a literature survey","volume":"133","author":"Zhang","year":"2021","journal-title":"Computers in Biology and Medicine"},{"issue":"12","key":"10.1016\/j.eswa.2026.132655_bib0055","doi-asserted-by":"crossref","first-page":"3714","DOI":"10.3390\/s24123714","article-title":"Multimodal sensing for depression risk detection: Integrating audio, video, and text data","volume":"24","author":"Zhang","year":"2024","journal-title":"Sensors"},{"key":"10.1016\/j.eswa.2026.132655_bib0056","unstructured":"Zogan, H., Razzak, I., Jameel, S., & Xu, G. (2021). Depressionnet: A novel summarization boosted deep framework for depression detection on social media. arXiv: 2105.10878."},{"issue":"4","key":"10.1016\/j.eswa.2026.132655_bib0057","doi-asserted-by":"crossref","first-page":"2823","DOI":"10.1109\/TAFFC.2022.3181210","article-title":"Semi-structural interview-based chinese multimodal depression corpus towards automatic preliminary screening of depressive disorders","volume":"14","author":"Zou","year":"2022","journal-title":"IEEE Transactions on Affective Computing"},{"issue":"6","key":"10.1016\/j.eswa.2026.132655_bib0058","doi-asserted-by":"crossref","first-page":"508","DOI":"10.1001\/archpsyc.1965.01730060026004","article-title":"Self-rating depression scale in an outpatient clinic: Further validation of the SDS","volume":"13","author":"Zung","year":"1965","journal-title":"Archives of General Psychiatry"}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S095741742601568X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S095741742601568X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T11:29:16Z","timestamp":1778758156000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S095741742601568X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":58,"alternative-id":["S095741742601568X"],"URL":"https:\/\/doi.org\/10.1016\/j.eswa.2026.132655","relation":{},"ISSN":["0957-4174"],"issn-type":[{"value":"0957-4174","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"When audio-visual deep learning meets TCM facial inspection: A novel depression detection method for Chinese population","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.eswa.2026.132655","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"132655"}}