{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T22:40:55Z","timestamp":1760740855286,"version":"build-2065373602"},"publisher-location":"Singapore","reference-count":41,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819534555"},{"type":"electronic","value":"9789819534562"}],"license":[{"start":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T00:00:00Z","timestamp":1760659200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T00:00:00Z","timestamp":1760659200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-3456-2_13","type":"book-chapter","created":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T07:24:08Z","timestamp":1760599448000},"page":"176-190","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["MSTDD: A Multi-scale Transformer Framework for\u00a0Automatic Depression Detection"],"prefix":"10.1007","author":[{"given":"Dongfang","family":"Han","sequence":"first","affiliation":[]},{"given":"Yi","family":"Liang","sequence":"additional","affiliation":[]},{"given":"Xi","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yuanyuan","family":"Liao","sequence":"additional","affiliation":[]},{"given":"Hamdulla","family":"Askar","sequence":"additional","affiliation":[]},{"given":"Turdi","family":"Tohti","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,17]]},"reference":[{"issue":"1","key":"13_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/nrdp.2016.65","volume":"2","author":"C Otte","year":"2016","unstructured":"Otte, C., et al.: Major depressive disorder. Nat. Rev. Dis. Primers. 2(1), 1\u201320 (2016)","journal-title":"Nat. Rev. Dis. Primers."},{"issue":"2","key":"13_CR2","doi-asserted-by":"publisher","first-page":"83","DOI":"10.2174\/2666082219666230518105004","volume":"20","author":"R Balatif","year":"2024","unstructured":"Balatif, R., Mawadhani Sukma, A.A.: Depression and its impact on various aspects of life-a narrative review. Curr. Psychiatry Res. Rev. Formerly Curr. Psychiatry Rev. 20(2), 83\u201393 (2024)","journal-title":"Curr. Psychiatry Res. Rev. Formerly Curr. Psychiatry Rev."},{"issue":"2020","key":"13_CR3","first-page":"134","volume":"126","author":"Q Liu","year":"1990","unstructured":"Liu, Q., He, H., Yang, J., Feng, X., Zhao, F., Lyu, J.: Changes in the global burden of depression from 1990 to 2017: findings from the global burden of disease study. J. Psychiatry Res. 126(2020), 134\u2013140 (1990)","journal-title":"J. Psychiatry Res."},{"issue":"8","key":"13_CR4","doi-asserted-by":"publisher","first-page":"3214","DOI":"10.1038\/s41380-022-01638-z","volume":"27","author":"K Yuan","year":"2022","unstructured":"Yuan, K., et al.: A systematic review and meta-analysis on prevalence of and risk factors associated with depression, anxiety and insomnia in infectious diseases, including covid-19: a call to action. Mol. Psychiatry 27(8), 3214\u20133222 (2022)","journal-title":"Mol. Psychiatry"},{"key":"13_CR5","doi-asserted-by":"publisher","first-page":"4921","DOI":"10.1109\/TNSRE.2023.3339518","volume":"31","author":"M Yang","year":"2023","unstructured":"Yang, M., Weng, Z., Zhang, Y., Tao, Y., Hu, B.: Three-stream convolutional neural network for depression detection with ocular imaging. IEEE Trans. Neural Syst. Rehabil. Eng. 31, 4921\u20134930 (2023)","journal-title":"IEEE Trans. Neural Syst. Rehabil. Eng."},{"key":"13_CR6","doi-asserted-by":"publisher","first-page":"102017","DOI":"10.1016\/j.inffus.2023.102017","volume":"102","author":"J Chen","year":"2024","unstructured":"Chen, J., et al.: IIFDD: intra and inter-modal fusion for depression detection with multi-modal information from internet of medical things. Inf. Fusion 102, 102017 (2024)","journal-title":"Inf. Fusion"},{"key":"13_CR7","doi-asserted-by":"publisher","first-page":"107166","DOI":"10.1016\/j.bspc.2024.107166","volume":"100","author":"X Yuan","year":"2025","unstructured":"Yuan, X., et al.: Intermediary-guided windowed attention aggregation network for fine-grained characterization of major depressive disorder FMRI. Biomed. Signal Process. Control 100, 107166 (2025)","journal-title":"Biomed. Signal Process. Control"},{"key":"13_CR8","doi-asserted-by":"crossref","unstructured":"Cohn, J.F., et al.: Detecting depression from facial actions and vocal prosody. In: 3rd International Conference on Affective Computing and Intelligent Interaction and Workshops, pp. 1\u20137. IEEE (2009)","DOI":"10.1109\/ACII.2009.5349358"},{"key":"13_CR9","doi-asserted-by":"crossref","unstructured":"Scherer, S., et al.: Automatic behavior descriptors for psychological disorder analysis. In: 10th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG), pp. 1\u20138. IEEE (2013)","DOI":"10.1109\/FG.2013.6553789"},{"key":"13_CR10","doi-asserted-by":"crossref","unstructured":"Valstar, M., et al.: Avec 2016: depression, mood, and emotion recognition workshop and challenge. In: Proceedings of the 6th International Workshop on Audio\/Visual Emotion Challenge, pp. 3\u201310 (2016)","DOI":"10.1145\/2988257.2988258"},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Huang, Z., Epps, J., Joachim, D.: Exploiting vocal tract coordination using dilated CNNs for depression detection in naturalistic environments. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6549\u20136553. IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9054323"},{"key":"13_CR12","unstructured":"Haque, A., Guo, M., Miner, A.S., Fei-Fei, L.: Measuring depression symptom severity from spoken language and 3D facial expressions. arXiv preprint arXiv:1811.08592 (2018)"},{"issue":"5","key":"13_CR13","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1109\/MIS.2019.2925204","volume":"34","author":"SA Qureshi","year":"2019","unstructured":"Qureshi, S.A., Saha, S., Hasanuzzaman, M., Dias, G.: Multitask representation learning for multimodal estimation of depression level. IEEE Intell. Syst. 34(5), 45\u201352 (2019)","journal-title":"IEEE Intell. Syst."},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"Wei, P.-C., Peng, K., Roitberg, A., Yang, K., Zhang, J., Stiefelhagen, R.: Multi-modal depression estimation based on sub-attentional fusion. In: European Conference on Computer Vision, Springer, pp. 623\u2013639 (2022)","DOI":"10.1007\/978-3-031-25075-0_42"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Jung, J., Kang, C., Yoon, J., Kim, S., Han, J.: Hique: hierarchical question embedding network for multimodal depression detection. In: Proceedings of the 33rd ACM International Conference on Information and Knowledge Management, pp. 1049\u20131059 (2024)","DOI":"10.1145\/3627673.3679797"},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"Williamson, J.R., Quatieri, T.F., Helfer, B.S., Horwitz, R., Yu, B., Mehta, D.D.: Vocal biomarkers of depression based on motor incoordination. In: Proceedings of the 3rd ACM International Workshop on Audio\/Visual Emotion Challenge, pp. 41\u201348 (2013)","DOI":"10.1145\/2512530.2512531"},{"key":"13_CR17","doi-asserted-by":"crossref","unstructured":"Ma, X., Yang, H., Chen, Q., Huang, D., Wang, Y.: Depaudionet: an efficient deep model for audio based depression classification. In: Proceedings of the 6th International Workshop on Audio\/Visual Emotion Challenge, pp. 35\u201342 (2016)","DOI":"10.1145\/2988257.2988267"},{"key":"13_CR18","doi-asserted-by":"publisher","first-page":"775","DOI":"10.1109\/TASLP.2023.3235194","volume":"31","author":"W Chen","year":"2023","unstructured":"Chen, W., Xing, X., Xu, X., Pang, J., Du, L.: Speechformer++: a hierarchical efficient framework for paralinguistic speech processing. IEEE\/ACM Trans. Audio Speech Lang. Process. 31, 775\u2013788 (2023)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"13_CR19","doi-asserted-by":"crossref","unstructured":"Zhang, Y., He, Y., Rong, L., Ding, Y.: A hybrid model for depression detection with transformer and bi-directional long short-term memory. In: 2022 IEEE International Conference on Bioinformatics and Biomedicine (BIBM), pp. 2727\u20132734. IEEE (2022)","DOI":"10.1109\/BIBM55620.2022.9995184"},{"issue":"5","key":"13_CR20","doi-asserted-by":"publisher","first-page":"2806","DOI":"10.1109\/TCSS.2022.3202316","volume":"10","author":"Y Guo","year":"2022","unstructured":"Guo, Y., Zhu, C., Hao, S., Hong, R.: Automatic depression detection via learning and fusing features from visual cues. IEEE Trans. Comput. Soc. Syst. 10(5), 2806\u20132813 (2022)","journal-title":"IEEE Trans. Comput. Soc. Syst."},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"Toto, E., Tlachac, M., Rundensteiner, E.A.: AudiBERT: a deep transfer learning multimodal classification framework for depression screening. In: Proceedings of the 30th ACM International Conference on Information & Knowledge Management, pp. 4145\u20134154 (2021)","DOI":"10.1145\/3459637.3481895"},{"key":"13_CR22","doi-asserted-by":"crossref","unstructured":"Al\u00a0Hanai, T., Ghassemi, M.M., Glass, J.R.: Detecting depression with audio\/text sequence modeling of interviews. In: Interspeech, pp. 1716\u20131720 (2018)","DOI":"10.21437\/Interspeech.2018-2522"},{"issue":"12","key":"13_CR23","doi-asserted-by":"publisher","first-page":"7749","DOI":"10.1109\/TCYB.2022.3197127","volume":"53","author":"T Chen","year":"2022","unstructured":"Chen, T., Hong, R., Guo, Y., Hao, S., Hu, B.: Ms$$^2$$-GNN: exploring GNN-based multimodal fusion network for depression detection. IEEE Trans. Cybern. 53(12), 7749\u20137759 (2022)","journal-title":"IEEE Trans. Cybern."},{"key":"13_CR24","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"13_CR25","doi-asserted-by":"crossref","unstructured":"Bridle, J.S.: Probabilistic interpretation of feedforward classification network outputs, with relationships to statistical pattern recognition. In: Neurocomputing: Algorithms, architectures and applications, pp. 227\u2013236. Springer (1990)","DOI":"10.1007\/978-3-642-76153-9_28"},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"Hofst\u00e4tter, S., Zamani, H., Mitra, B., Craswell, N., Hanbury, A.: Local self-attention over long text for efficient document retrieval. In: Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2021\u20132024 (2020)","DOI":"10.1145\/3397271.3401224"},{"key":"13_CR27","unstructured":"Gratch, J., et al.: The distress analysis interview corpus of human and computer interviews. In: LREC, Reykjavik, pp. 3123\u20133128 (2014)"},{"key":"13_CR28","unstructured":"DeVault, D., et al.: Simsensei kiosk: a virtual human interviewer for healthcare decision support. In: Proceedings of the 2014 International Conference on Autonomous Agents and Multi-Agent Systems, pp. 1061\u20131068 (2014)"},{"issue":"1\u20133","key":"13_CR29","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1016\/j.jad.2008.06.026","volume":"114","author":"K Kroenke","year":"2009","unstructured":"Kroenke, K., Strine, T.W., Spitzer, R.L., Williams, J.B., Berry, J.T., Mokdad, A.H.: The phq-8 as a measure of current depression in the general population. J. Affect. Disord. 114(1\u20133), 163\u2013173 (2009)","journal-title":"J. Affect. Disord."},{"key":"13_CR30","doi-asserted-by":"publisher","first-page":"116076","DOI":"10.1016\/j.eswa.2021.116076","volume":"189","author":"S Sardari","year":"2022","unstructured":"Sardari, S., Nakisa, B., Rastgoo, M.N., Eklund, P.: Audio based depression detection using convolutional autoencoder. Expert Syst. Appl. 189, 116076 (2022)","journal-title":"Expert Syst. Appl."},{"key":"13_CR31","unstructured":"Ioannides, G., Kieback, A., Chadha, A., Elkins, A.: Density adaptive attention-based speech network: enhancing feature understanding for mental health disorders. arXiv preprint arXiv:2409.00391 (2024)"},{"key":"13_CR32","doi-asserted-by":"crossref","unstructured":"Sun, B., et al.: A random forest regression method with selected-text feature for depression assessment. In: Proceedings of the 7th Annual Workshop on Audio\/Visual Emotion Challenge, pp. 61\u201368 (2017)","DOI":"10.1145\/3133944.3133951"},{"key":"13_CR33","doi-asserted-by":"crossref","unstructured":"Xezonaki, D., Paraskevopoulos, G., Potamianos, A., Narayanan, S.: Affective conditioning on hierarchical attention networks applied to depression detection from transcribed clinical interviews. In: Interspeech (INTERSPEECH), pp. 4556\u20134560 (2020)","DOI":"10.21437\/Interspeech.2020-2819"},{"key":"13_CR34","doi-asserted-by":"crossref","unstructured":"Tang, J., Shang, Y.: Advancing mental health pre-screening: a new custom GPT for psychological distress assessment. In: 2024 IEEE 6th International Conference on Cognitive Machine Intelligence (CogMI), pp. 162\u2013171. IEEE (2024)","DOI":"10.1109\/CogMI62246.2024.00030"},{"key":"13_CR35","doi-asserted-by":"crossref","unstructured":"Villatoro-Tello, E., Ram\u00edrez-de-la Rosa, G., G\u00e1tica-P\u00e9rez, D., Magimai.-Doss, M., Jim\u00e9nez-Salazar, H.: Approximating the mental lexicon from clinical interviews as a support tool for depression detection. In: Proceedings of the 2021 International Conference on Multimodal Interaction, pp. 557\u2013566 (2021)","DOI":"10.1145\/3462244.3479896"},{"issue":"3","key":"13_CR36","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/s43657-023-00152-8","volume":"4","author":"W Zhang","year":"2024","unstructured":"Zhang, W., Mao, K., Chen, J.: A multimodal approach for detection and assessment of depression using text, audio and video. Phenomics 4(3), 234\u2013249 (2024)","journal-title":"Phenomics"},{"key":"13_CR37","unstructured":"Cheong, J., Kalkan, S., Gunes, H.: Fairrefuse: referee-guided fusion for multimodal causal fairness in depression detection. In: International Joint Conference on Artificial Intelligence (IJCAI) (2024)"},{"key":"13_CR38","doi-asserted-by":"crossref","unstructured":"Gimeno-G\u00f3mez, D., Bucur, A.-M., Cosma, A. Mart\u00ednez-Hinarejos, C.-D., Rosso, P.: Reading between the frames: multi-modal depression detection in videos from non-verbal cues. In: European Conference on Information Retrieval, pp. 191\u2013209. Springer (2024)","DOI":"10.1007\/978-3-031-56027-9_12"},{"key":"13_CR39","unstructured":"Tank, C., Pol, S., Katoch, V., Mehta, S., Anand, A., Shah, R.R.: Depression detection and analysis using large language models on textual and audio-visual modalities. arXiv preprint arXiv:2407.06125 (2024)"},{"key":"13_CR40","unstructured":"Ding, X., Zhou, X., Guo, Y., Han, J., Liu, J., et al.: Global sparse momentum SGD for pruning very deep neural networks. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"13_CR41","doi-asserted-by":"crossref","unstructured":"Qin, J., et al.: Mental-perceiver: audio-textual multi-modal learning for estimating mental disorders. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a039, pp. 25029\u201325037 (2025)","DOI":"10.1609\/aaai.v39i23.34687"}],"container-title":["Lecture Notes in Computer Science","Advanced Data Mining and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-3456-2_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T22:05:23Z","timestamp":1760738723000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-3456-2_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,17]]},"ISBN":["9789819534555","9789819534562"],"references-count":41,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-3456-2_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,10,17]]},"assertion":[{"value":"17 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ADMA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Advanced Data Mining and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kyoto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"adma2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/adma2025.github.io\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}