{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T04:38:32Z","timestamp":1743136712686,"version":"3.40.3"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031779602"},{"type":"electronic","value":"9783031779619"}],"license":[{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-77961-9_23","type":"book-chapter","created":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T13:54:02Z","timestamp":1732197242000},"page":"309-323","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Cross-Cultural Automatic Depression Detection Based on\u00a0Audio Signals"],"prefix":"10.1007","author":[{"given":"Danila","family":"Mamontov","sequence":"first","affiliation":[]},{"given":"Sebastian","family":"Zepf","sequence":"additional","affiliation":[]},{"given":"Alexey","family":"Karpov","sequence":"additional","affiliation":[]},{"given":"Wolfgang","family":"Minker","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,22]]},"reference":[{"key":"23_CR1","doi-asserted-by":"publisher","unstructured":"Aloshban, N., Esposito, A., Vinciarelli, A.: Detecting depression in less than 10 seconds: impact of speaking time on depression detection sensitivity. In: Proceedings of the 2020 International Conference on Multimodal Interaction, ICMI \u201920, pp. 79\u201387. Association for Computing Machinery, New York (2020). https:\/\/doi.org\/10.1145\/3382507.3418875","DOI":"10.1145\/3382507.3418875"},{"key":"23_CR2","doi-asserted-by":"publisher","unstructured":"Alpert, M., Pouget, E.R., Silva, R.R.: Reflections of depression in acoustic measures of the patient\u2019s speech. J. Affect. Disord. 66(1), 59\u201369 (2001). https:\/\/doi.org\/10.1016\/S0165-0327(00)00335-9. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0165032700003359","DOI":"10.1016\/S0165-0327(00)00335-9"},{"key":"23_CR3","doi-asserted-by":"publisher","unstructured":"Brohan, E., Gauci, D., Sartorius, N., Thornicroft, G.: Self-stigma, empowerment and perceived discrimination among people with bipolar disorder or depression in 13 European countries: the GAMIAN\u2013Europe study. J. Affect. Disord. 129(1), 56\u201363 (2011). https:\/\/doi.org\/10.1016\/j.jad.2010.09.001. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0165032710005690","DOI":"10.1016\/j.jad.2010.09.001"},{"key":"23_CR4","doi-asserted-by":"publisher","unstructured":"Cai, H., et al.: A multi-modal open dataset for mental-disorder analysis. Sci. Data 9(1), 178 (2022). https:\/\/doi.org\/10.1038\/s41597-022-01211-x. https:\/\/www.nature.com\/articles\/s41597-022-01211-x","DOI":"10.1038\/s41597-022-01211-x"},{"key":"23_CR5","doi-asserted-by":"publisher","unstructured":"Callejas\u00a0Carri\u00f3n, Z., Benghazi, K., Noguera, M., Torres\u00a0Bara\u00f1ano, M.I., Justo\u00a0Blanco, R.: MENHIR: mental health monitoring through interactive conversations (2019). https:\/\/doi.org\/10.26342\/2019-63-15. http:\/\/rua.ua.es\/dspace\/handle\/10045\/96617","DOI":"10.26342\/2019-63-15"},{"key":"23_CR6","unstructured":"Dumpala, S.H., Rodriguez, S., Rempel, S., Sajjadian, M., Uher, R., Oore, S.: Detecting depression with a temporal context of speaker embeddings (2022)"},{"key":"23_CR7","doi-asserted-by":"publisher","unstructured":"France, D., Shiavi, R., Silverman, S., Silverman, M., Wilkes, M.: Acoustical properties of speech as indicators of depression and suicidal risk. IEEE Trans. Biomed. Eng. 47(7), 829\u2013837 (2000). https:\/\/doi.org\/10.1109\/10.846676. https:\/\/ieeexplore.ieee.org\/abstract\/document\/846676","DOI":"10.1109\/10.846676"},{"key":"23_CR8","doi-asserted-by":"publisher","unstructured":"Gong, Y., Poellabauer, C.: Topic modeling based multi-modal depression detection. In: Proceedings of the 7th Annual Workshop on Audio\/Visual Emotion Challenge, AVEC 2017, pp. 69\u201376. Association for Computing Machinery, New York (2017). https:\/\/doi.org\/10.1145\/3133944.3133945","DOI":"10.1145\/3133944.3133945"},{"key":"23_CR9","unstructured":"Gotlib, I.H., Hammen, C.L.: Handbook of Depression, 2nd edn. Guilford Press (2008)"},{"key":"23_CR10","unstructured":"Gratch, J., et al.: The distress analysis interview corpus of human and computer interviews. In: Calzolari, N., et al. (eds.) Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC 2014), pp. 3123\u20133128. European Language Resources Association (ELRA), Reykjavik (2014). http:\/\/www.lrec-conf.org\/proceedings\/lrec2014\/pdf\/508_Paper.pdf"},{"issue":"4 Suppl","key":"23_CR11","first-page":"S92","volume":"13","author":"A Halfin","year":"2007","unstructured":"Halfin, A.: Depression: the benefits of early and appropriate treatment. Am. J. Manag. Care 13(4 Suppl), S92-97 (2007)","journal-title":"Am. J. Manag. Care"},{"key":"23_CR12","doi-asserted-by":"publisher","unstructured":"Han, M.M., et al.: Automatic recognition of depression based on audio and video: a review. World J. Psychiatry 14(2), 225\u2013233 (2024). https:\/\/doi.org\/10.5498\/wjp.v14.i2.225. https:\/\/www.ncbi.nlm.nih.gov\/pmc\/articles\/PMC10921287\/","DOI":"10.5498\/wjp.v14.i2.225"},{"key":"23_CR13","doi-asserted-by":"publisher","unstructured":"He, L., Cao, C.: Automated depression analysis using convolutional neural networks from speech. J. Biomed. Inf. 83, 103\u2013111 (2018). https:\/\/doi.org\/10.1016\/j.jbi.2018.05.007. https:\/\/www.sciencedirect.com\/science\/article\/pii\/S153204641830090X","DOI":"10.1016\/j.jbi.2018.05.007"},{"key":"23_CR14","doi-asserted-by":"publisher","unstructured":"He, L., Jiang, D., Sahli, H.: Multimodal depression recognition with dynamic visual and audio cues. In: 2015 International Conference on Affective Computing and Intelligent Interaction (ACII), pp. 260\u2013266 (2015). https:\/\/doi.org\/10.1109\/ACII.2015.7344581. https:\/\/ieeexplore.ieee.org\/abstract\/document\/7344581, iSSN: 2156-8111","DOI":"10.1109\/ACII.2015.7344581"},{"key":"23_CR15","doi-asserted-by":"publisher","unstructured":"Kaya, H., et al.: Predicting depression and emotions in the cross-roads of cultures, para-linguistics, and non-linguistics. In: Proceedings of the 9th International on Audio\/Visual Emotion Challenge and Workshop, AVEC 2019, pp. 27\u201335. Association for Computing Machinery, New York (2019). https:\/\/doi.org\/10.1145\/3347320.3357691","DOI":"10.1145\/3347320.3357691"},{"key":"23_CR16","doi-asserted-by":"publisher","unstructured":"Lam, G., Dongyan, H., Lin, W.: Context-aware deep learning for multi-modal depression detection. In: ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 3946\u20133950 (2019). https:\/\/doi.org\/10.1109\/ICASSP.2019.8683027. https:\/\/ieeexplore.ieee.org\/abstract\/document\/8683027, iSSN: 2379-190X","DOI":"10.1109\/ICASSP.2019.8683027"},{"key":"23_CR17","doi-asserted-by":"publisher","unstructured":"Lehti, A., Hammarstr\u00f6m, A., Mattsson, B.: Recognition of depression in people of different cultures: a qualitative study. BMC Family Pract. 10(1), 53 (2009). https:\/\/doi.org\/10.1186\/1471-2296-10-53","DOI":"10.1186\/1471-2296-10-53"},{"key":"23_CR18","doi-asserted-by":"publisher","unstructured":"Meng, H., Huang, D., Wang, H., Yang, H., AI-Shuraifi, M., Wang, Y.: Depression recognition based on dynamic facial and vocal expression features using partial least square regression. In: Proceedings of the 3rd ACM International Workshop on Audio\/Visual Emotion Challenge, AVEC 2013, pp. 21\u201330. Association for Computing Machinery, New York (2013). https:\/\/doi.org\/10.1145\/2512530.2512532","DOI":"10.1145\/2512530.2512532"},{"key":"23_CR19","doi-asserted-by":"publisher","unstructured":"Niu, M., Tao, J., Liu, B., Huang, J., Lian, Z.: Multimodal spatiotemporal representation for automatic depression level detection. IEEE Trans. Affect. Comput. 14(1), 294\u2013307 (2023). https:\/\/doi.org\/10.1109\/TAFFC.2020.3031345. https:\/\/ieeexplore.ieee.org\/abstract\/document\/9226102","DOI":"10.1109\/TAFFC.2020.3031345"},{"key":"23_CR20","unstructured":"Pedregosa, F., et\u00a0al.: Scikit-learn: machine learning in python. J. Mach. Learn. Res. 12, 2825\u20132830 (2011)"},{"key":"23_CR21","doi-asserted-by":"publisher","unstructured":"Pratt, L.A., Druss, B.G., Manderscheid, R.W., Walker, E.R.: Excess mortality due to Depression and Anxiety in the United States: results from a nationally representative survey. General Hosp. Psychiat. 39, 39\u201345 (2016). https:\/\/doi.org\/10.1016\/j.genhosppsych.2015.12.003. https:\/\/www.ncbi.nlm.nih.gov\/pmc\/articles\/PMC5113020\/","DOI":"10.1016\/j.genhosppsych.2015.12.003"},{"key":"23_CR22","doi-asserted-by":"publisher","unstructured":"Ringeval, F., Schuller, B., Valstar, M., Cummins, N., Cowie, R., Pantic, M.: AVEC\u201919: audio\/visual emotion challenge and Workshop, pp. 2718\u20132719 (2019). https:\/\/doi.org\/10.1145\/3343031.3350550","DOI":"10.1145\/3343031.3350550"},{"key":"23_CR23","doi-asserted-by":"publisher","unstructured":"Ringeval, F., ET AL.: AVEC 2017: real-life depression, and affect recognition workshop and challenge. In: Proceedings of the 7th Annual Workshop on Audio\/Visual Emotion Challenge, AVEC \u201917, pp.\u00a03\u20139. Association for Computing Machinery, New York (2017). https:\/\/doi.org\/10.1145\/3133944.3133953","DOI":"10.1145\/3133944.3133953"},{"key":"23_CR24","doi-asserted-by":"publisher","unstructured":"Sidorov, M., Minker, W.: Emotion recognition and depression diagnosis by acoustic and visual features: a multimodal approach. In: Proceedings of the 4th International Workshop on Audio\/Visual Emotion Challenge, AVEC \u201914, pp. 81\u201386. Association for Computing Machinery, New York (2014). https:\/\/doi.org\/10.1145\/2661806.2661816","DOI":"10.1145\/2661806.2661816"},{"key":"23_CR25","doi-asserted-by":"publisher","unstructured":"Sun, H., et al.: Multi-modal adaptive fusion transformer network for the estimation of depression level. Sensors 21(14), 4764 (2021). https:\/\/doi.org\/10.3390\/s21144764. https:\/\/www.mdpi.com\/1424-8220\/21\/14\/4764","DOI":"10.3390\/s21144764"},{"key":"23_CR26","doi-asserted-by":"publisher","unstructured":"Thomas, K.C., Ellis, A.R., Konrad, T.R., Holzer, C.E., Morrissey, J.P.: County-level estimates of mental health professional shortage in the United States. Psychiat. Serv. 60(10), 1323\u20131328 (2009). https:\/\/doi.org\/10.1176\/ps.2009.60.10.1323. https:\/\/ps.psychiatryonline.org\/doi\/full\/10.1176\/ps.2009.60.10.1323","DOI":"10.1176\/ps.2009.60.10.1323"},{"key":"23_CR27","doi-asserted-by":"publisher","unstructured":"Valstar, M., et al.: AVEC 2016: depression, mood, and emotion recognition workshop and challenge. In: Proceedings of the 6th International Workshop on Audio\/Visual Emotion Challenge, AVEC \u201916, pp. 3\u201310. Association for Computing Machinery, New York (2016). https:\/\/doi.org\/10.1145\/2988257.2988258","DOI":"10.1145\/2988257.2988258"},{"key":"23_CR28","doi-asserted-by":"publisher","unstructured":"Valstar, M., et al.: AVEC 2013: the continuous audio\/visual emotion and depression recognition challenge. In: Proceedings of the 3rd ACM International Workshop on Audio\/Visual Emotion Challenge, AVEC 2013, pp. 3\u201310. Association for Computing Machinery, New York (2013). https:\/\/doi.org\/10.1145\/2512530.2512533","DOI":"10.1145\/2512530.2512533"},{"key":"23_CR29","unstructured":"WHO: Depressive disorder WHO (depression) (2023). https:\/\/www.who.int\/news-room\/fact-sheets\/detail\/depression"},{"key":"23_CR30","doi-asserted-by":"publisher","unstructured":"Wu, P., Wang, R., Lin, H., Zhang, F., Tu, J., Sun, M.: Automatic depression recognition by intelligent speech signal processing: a systematic survey. CAAI Trans. Intell. Technol. 8(3), 701\u2013711 (2023). https:\/\/doi.org\/10.1049\/cit2.12113. https:\/\/onlinelibrary.wiley.com\/doi\/abs\/10.1049\/cit2.12113","DOI":"10.1049\/cit2.12113"},{"key":"23_CR31","doi-asserted-by":"publisher","unstructured":"Yang, L., Jiang, D., Han, W., Sahli, H.: DCNN and DNN based multi-modal depression recognition. In: 2017 Seventh International Conference on Affective Computing and Intelligent Interaction (ACII), pp. 484\u2013489 (2017). https:\/\/doi.org\/10.1109\/ACII.2017.8273643. https:\/\/ieeexplore.ieee.org\/abstract\/document\/8273643. iSSN: 2156-8111","DOI":"10.1109\/ACII.2017.8273643"},{"key":"23_CR32","doi-asserted-by":"publisher","unstructured":"Yin, F., Du, J., Xu, X., Zhao, L.: Depression detection in speech using transformer and parallel convolutional neural networks. Electronics 12(2), 328 (2023). https:\/\/doi.org\/10.3390\/electronics12020328. https:\/\/www.mdpi.com\/2079-9292\/12\/2\/328","DOI":"10.3390\/electronics12020328"},{"key":"23_CR33","doi-asserted-by":"publisher","unstructured":"Yu, Y., et al.: Recognition of depression, anxiety, and alcohol abuse in a Chinese rural sample: a cross-sectional study. BMC Psychiat. 16(1), 93 (2016). https:\/\/doi.org\/10.1186\/s12888-016-0802-0","DOI":"10.1186\/s12888-016-0802-0"},{"key":"23_CR34","doi-asserted-by":"publisher","unstructured":"Zou, Bet al.: Semi-structural interview-based Chinese multimodal depression corpus towards automatic preliminary screening of depressive disorders. IEEE Trans. Affect. Comput. 1\u201316 (2022). https:\/\/doi.org\/10.1109\/TAFFC.2022.3181210. https:\/\/ieeexplore.ieee.org\/document\/9793717\/algorithms","DOI":"10.1109\/TAFFC.2022.3181210"},{"key":"23_CR35","doi-asserted-by":"publisher","unstructured":"Zubiaga, I., Justo, R.: Multimodal feature evaluation and fusion for emotional well-being monitorization. In: Pinho, A.J., Georgieva, P., Teixeira, L.F., S\u00e1nchez, J.A. (eds.) Pattern Recognition and Image Analysis, pp. 242\u2013254. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-04881-4_20","DOI":"10.1007\/978-3-031-04881-4_20"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-77961-9_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,9]],"date-time":"2025-01-09T16:06:30Z","timestamp":1736438790000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-77961-9_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,22]]},"ISBN":["9783031779602","9783031779619"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-77961-9_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,22]]},"assertion":[{"value":"22 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Belgrade","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Serbia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 November 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/specom2024.ftn.uns.ac.rs\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}