{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T16:21:01Z","timestamp":1772554861170,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":34,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819984688","type":"print"},{"value":"9789819984695","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,12,25]],"date-time":"2023-12-25T00:00:00Z","timestamp":1703462400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,25]],"date-time":"2023-12-25T00:00:00Z","timestamp":1703462400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8469-5_20","type":"book-chapter","created":{"date-parts":[[2023,12,24]],"date-time":"2023-12-24T17:02:18Z","timestamp":1703437338000},"page":"252-264","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["An Automatic Depression Detection Method with\u00a0Cross-Modal Fusion Network and\u00a0Multi-head Attention Mechanism"],"prefix":"10.1007","author":[{"given":"Yutong","family":"Li","sequence":"first","affiliation":[]},{"given":"Juan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Zhenyu","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Li","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Haibo","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Cheng","family":"Tang","sequence":"additional","affiliation":[]},{"given":"Xiping","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Bin","family":"Hu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,25]]},"reference":[{"key":"20_CR1","doi-asserted-by":"crossref","unstructured":"American Psychiatric Association, A., Association, A.P., et al.: Diagnostic and statistical manual of mental disorders: DSM-5, vol. 10. Washington, DC: American psychiatric association (2013)","DOI":"10.1176\/appi.books.9780890425596"},{"key":"20_CR2","doi-asserted-by":"publisher","first-page":"103","DOI":"10.1016\/j.jbi.2018.05.007","volume":"83","author":"L He","year":"2018","unstructured":"He, L., Cao, C.: Automated depression analysis using convolutional neural networks from speech. J. Biomed. Inform. 83, 103\u2013111 (2018)","journal-title":"J. Biomed. Inform."},{"key":"20_CR3","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1016\/j.neucom.2021.02.019","volume":"441","author":"Y Dong","year":"2021","unstructured":"Dong, Y., Yang, X.: A hierarchical depression detection model based on vocal and emotional cues. Neurocomputing 441, 279\u2013290 (2021)","journal-title":"Neurocomputing"},{"issue":"4","key":"20_CR4","doi-asserted-by":"publisher","first-page":"578","DOI":"10.1109\/TAFFC.2017.2650899","volume":"9","author":"Y Zhu","year":"2017","unstructured":"Zhu, Y., Shang, Y., Shao, Z., Guo, G.: Automated depression diagnosis based on deep networks to encode facial appearance and dynamics. IEEE Trans. Affect. Comput. 9(4), 578\u2013584 (2017)","journal-title":"IEEE Trans. Affect. Comput."},{"issue":"1","key":"20_CR5","doi-asserted-by":"publisher","first-page":"262","DOI":"10.1109\/TAFFC.2018.2870884","volume":"12","author":"M Al Jazaery","year":"2018","unstructured":"Al Jazaery, M., Guo, G.: Video-based depression level analysis by encoding deep spatiotemporal features. IEEE Trans. Affect. Comput. 12(1), 262\u2013268 (2018)","journal-title":"IEEE Trans. Affect. Comput."},{"issue":"1","key":"20_CR6","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1111\/j.1365-2850.2009.01469.x","volume":"17","author":"A McPherson","year":"2010","unstructured":"McPherson, A., Martin, C.: A narrative review of the beck depression inventory (BDI) and implications for its use in an alcohol-dependent population. J. Psychiatr. Ment. Health Nurs. 17(1), 19\u201330 (2010)","journal-title":"J. Psychiatr. Ment. Health Nurs."},{"issue":"7","key":"20_CR7","doi-asserted-by":"publisher","first-page":"1432","DOI":"10.1109\/TIFS.2015.2414392","volume":"10","author":"L Wen","year":"2015","unstructured":"Wen, L., Li, X., Guo, G., Zhu, Y.: Automated depression diagnosis based on facial dynamic analysis and sparse coding. IEEE Trans. Inf. Forensics Secur. 10(7), 1432\u20131441 (2015)","journal-title":"IEEE Trans. Inf. Forensics Secur."},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Stasak, B., Joachim, D., Epps, J.: Breaking age barriers with automatic voice-based depression detection. IEEE Pervasive Comput. (2022)","DOI":"10.1109\/MPRV.2022.3163656"},{"key":"20_CR9","doi-asserted-by":"publisher","first-page":"56","DOI":"10.1016\/j.inffus.2021.10.012","volume":"80","author":"L He","year":"2022","unstructured":"He, L., et al.: Deep learning for depression recognition with audiovisual cues: a review. Inf. Fusion 80, 56\u201386 (2022)","journal-title":"Inf. Fusion"},{"key":"20_CR10","doi-asserted-by":"crossref","unstructured":"Dubagunta, S.P., Vlasenko, B., Doss, M.M.: Learning voice source related information for depression detection. In: ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6525\u20136529. IEEE (2019)","DOI":"10.1109\/ICASSP.2019.8683498"},{"key":"20_CR11","unstructured":"Haque, A., Guo, M., Miner, A.S., Fei-Fei, L.: Measuring depression symptom severity from spoken language and 3d facial expressions. arXiv preprint arXiv:1811.08592 (2018)"},{"issue":"3","key":"20_CR12","doi-asserted-by":"publisher","first-page":"668","DOI":"10.1109\/TCDS.2017.2721552","volume":"10","author":"A Jan","year":"2017","unstructured":"Jan, A., Meng, H., Gaus, Y.F.B.A., Zhang, F.: Artificial intelligent system for automatic depression level analysis through visual and vocal expressions. IEEE Trans. Cogn. Dev. Syst. 10(3), 668\u2013680 (2017)","journal-title":"IEEE Trans. Cogn. Dev. Syst."},{"key":"20_CR13","doi-asserted-by":"crossref","unstructured":"He, L., Jiang, D., Sahli, H.: Multimodal depression recognition with dynamic visual and audio cues. In: 2015 International Conference on Affective Computing and Intelligent Interaction (ACII), pp. 260\u2013266. IEEE (2015)","DOI":"10.1109\/ACII.2015.7344581"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"Meng, H., Huang, D., Wang, H., Yang, H., Ai-Shuraifi, M., Wang, Y.: Depression recognition based on dynamic facial and vocal expression features using partial least square regression. In: Proceedings of the 3rd ACM International Workshop on Audio\/Visual Emotion Challenge, pp. 21\u201330 (2013)","DOI":"10.1145\/2512530.2512532"},{"key":"20_CR15","doi-asserted-by":"crossref","unstructured":"Cummins, N., Joshi, J., Dhall, A., Sethu, V., Goecke, R., Epps, J.: Diagnosis of depression by behavioural signals: a multimodal approach. In: Proceedings of the 3rd ACM International Workshop on Audio\/Visual Emotion Challenge, pp. 11\u201320 (2013)","DOI":"10.1145\/2512530.2512535"},{"key":"20_CR16","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"Barsoum, E., Zhang, C., Ferrer, C.C., Zhang, Z.: Training deep networks for facial expression recognition with crowd-sourced label distribution. In: Proceedings of the 18th ACM International Conference on Multimodal Interaction, pp. 279\u2013283 (2016)","DOI":"10.1145\/2993148.2993165"},{"issue":"2","key":"20_CR18","doi-asserted-by":"publisher","first-page":"190","DOI":"10.1109\/TAFFC.2015.2457417","volume":"7","author":"F Eyben","year":"2015","unstructured":"Eyben, F., et al.: The Geneva minimalistic acoustic parameter set (GeMAPS) for voice research and affective computing. IEEE Trans. Affect. Comput. 7(2), 190\u2013202 (2015)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"20_CR19","unstructured":"Bai, S., Kolter, J.Z., Koltun, V.: An empirical evaluation of generic convolutional and recurrent networks for sequence modeling. arXiv preprint arXiv:1803.01271 (2018)"},{"key":"20_CR20","first-page":"1755","volume":"10","author":"DE King","year":"2009","unstructured":"King, D.E.: Dlib-ml: a machine learning toolkit. J. Mach. Learn. Res. 10, 1755\u20131758 (2009)","journal-title":"J. Mach. Learn. Res."},{"key":"20_CR21","unstructured":"Stevens, E., Antiga, L., Viehmann, T.: Deep Learning with PyTorch. Manning Publications (2020)"},{"key":"20_CR22","doi-asserted-by":"crossref","unstructured":"Uddin, M.A., Joolee, J.B., Sohn, K.A.: Deep multi-modal network based automated depression severity estimation. IEEE Trans. Affect. Comput. (2022)","DOI":"10.1109\/TAFFC.2022.3179478"},{"issue":"2","key":"20_CR23","doi-asserted-by":"publisher","first-page":"272","DOI":"10.1109\/TAFFC.2017.2766145","volume":"11","author":"N Cummins","year":"2017","unstructured":"Cummins, N., Sethu, V., Epps, J., Williamson, J.R., Quatieri, T.F., Krajewski, J.: Generalized two-stage rank regression framework for depression score prediction from speech. IEEE Trans. Affect. Comput. 11(2), 272\u2013283 (2017)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"20_CR24","doi-asserted-by":"crossref","unstructured":"Niu, M., Tao, J., Liu, B., Fan, C.: Automatic depression level detection via lp-Norm pooling. In: Proceedings of the INTERSPEECH, Graz, Austria, pp. 4559\u20134563 (2019)","DOI":"10.21437\/Interspeech.2019-1617"},{"key":"20_CR25","unstructured":"Niu, M., Tao, J., Liu, B., Huang, J., Lian, Z.: Multimodal spatiotemporal representation for automatic depression level detection. IEEE Trans. Affect. Comput. (2020)"},{"key":"20_CR26","doi-asserted-by":"crossref","unstructured":"Zhao, Z., Li, Q., Cummins, N., Liu, B., Wang, H., Tao, J., Schuller, B.: Hybrid network feature extraction for depression assessment from speech. In: Proceeding of the INTERSPEECH, Shanghai, China, pp. 4956\u20134960 (2020)","DOI":"10.21437\/Interspeech.2020-2396"},{"key":"20_CR27","doi-asserted-by":"crossref","unstructured":"De Melo, W.C., Granger, E., Hadid, A.: Depression detection based on deep distribution learning. In: 2019 IEEE International Conference on Image Processing (ICIP), pp. 4544\u20134548. IEEE (2019)","DOI":"10.1109\/ICIP.2019.8803467"},{"issue":"3","key":"20_CR28","doi-asserted-by":"publisher","first-page":"542","DOI":"10.1109\/TAFFC.2018.2828819","volume":"11","author":"X Zhou","year":"2018","unstructured":"Zhou, X., Jin, K., Shang, Y., Guo, G.: Visually interpretable representation learning for depression recognition from facial images. IEEE Trans. Affect. Comput. 11(3), 542\u2013552 (2018)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"20_CR29","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1016\/j.neucom.2020.10.015","volume":"422","author":"L He","year":"2021","unstructured":"He, L., Chan, J.C.W., Wang, Z.: Automatic depression recognition using CNN with attention mechanism from videos. Neurocomputing 422, 165\u2013175 (2021)","journal-title":"Neurocomputing"},{"issue":"2","key":"20_CR30","doi-asserted-by":"publisher","first-page":"864","DOI":"10.1109\/TAFFC.2020.2970418","volume":"13","author":"MA Uddin","year":"2020","unstructured":"Uddin, M.A., Joolee, J.B., Lee, Y.K.: Depression level prediction using deep spatiotemporal features and multilayer Bi-LTSM. IEEE Trans. Affect. Comput. 13(2), 864\u2013870 (2020)","journal-title":"IEEE Trans. Affect. Comput."},{"key":"20_CR31","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1016\/j.neunet.2022.05.025","volume":"153","author":"L He","year":"2022","unstructured":"He, L., Tiwari, P., Lv, C., Wu, W., Guo, L.: Reducing noisy annotations for depression estimation from facial images. Neural Netw. 153, 120\u2013129 (2022)","journal-title":"Neural Netw."},{"key":"20_CR32","doi-asserted-by":"crossref","unstructured":"Liu, Z., Yuan, X., Li, Y., Shangguan, Z., Zhou, L., Hu, B.: PRA-Net: part-and-relation attention network for depression recognition from facial expression. Comput. Biol. Med., 106589 (2023)","DOI":"10.1016\/j.compbiomed.2023.106589"},{"key":"20_CR33","doi-asserted-by":"publisher","first-page":"1188434","DOI":"10.3389\/fnins.2023.1188434","volume":"17","author":"Y Li","year":"2023","unstructured":"Li, Y., et al.: A facial depression recognition method based on hybrid multi-head cross attention network. Front. Neurosci. 17, 1188434 (2023)","journal-title":"Front. Neurosci."},{"key":"20_CR34","doi-asserted-by":"crossref","unstructured":"Kaya, H., \u00c7illi, F., Salah, A.A.: Ensemble CCA for continuous emotion prediction. In: Proceedings of the 4th International Workshop on Audio\/Visual Emotion Challenge, pp. 19\u201326 (2014)","DOI":"10.1145\/2661806.2661814"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8469-5_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,24]],"date-time":"2023-12-24T17:05:21Z","timestamp":1703437521000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8469-5_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,25]]},"ISBN":["9789819984688","9789819984695"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8469-5_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,12,25]]},"assertion":[{"value":"25 December 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Xiamen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/prcv2023.xmu.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1420","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"532","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,78","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,69","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}