{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T06:20:54Z","timestamp":1772605254967,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":37,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819985364","type":"print"},{"value":"9789819985371","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,12,26]],"date-time":"2023-12-26T00:00:00Z","timestamp":1703548800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,26]],"date-time":"2023-12-26T00:00:00Z","timestamp":1703548800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8537-1_19","type":"book-chapter","created":{"date-parts":[[2023,12,25]],"date-time":"2023-12-25T19:02:17Z","timestamp":1703530937000},"page":"233-244","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Going Beyond Closed Sets: A Multimodal Perspective for\u00a0Video Emotion Analysis"],"prefix":"10.1007","author":[{"given":"Hao","family":"Pu","sequence":"first","affiliation":[]},{"given":"Yuchong","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Ruihua","family":"Song","sequence":"additional","affiliation":[]},{"given":"Xu","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Hao","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Yi","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Zhao","family":"Cao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,26]]},"reference":[{"key":"19_CR1","doi-asserted-by":"crossref","unstructured":"Ali, A.R., et al.: High-level concepts for affective understanding of images. In: WACV, pp. 679\u2013687. IEEE (2017)","DOI":"10.1109\/WACV.2017.81"},{"issue":"1","key":"19_CR2","first-page":"43","volume":"6","author":"Y Baveye","year":"2015","unstructured":"Baveye, Y., et al.: LIRIS-ACCEDE: a video database for affective content analysis. TAC 6(1), 43\u201355 (2015)","journal-title":"TAC"},{"key":"19_CR3","unstructured":"Bertasius, G., et al.: Is space-time attention all you need for video understanding? In: ICML, vol. 2, p. 4 (2021)"},{"key":"19_CR4","doi-asserted-by":"crossref","unstructured":"Borth, D., et al.: Large-scale visual sentiment ontology and detectors using adjective noun pairs. In: ACM MM, pp. 223\u2013232 (2013)","DOI":"10.1145\/2502081.2502282"},{"issue":"38","key":"19_CR5","doi-asserted-by":"publisher","first-page":"E7900","DOI":"10.1073\/pnas.1702247114","volume":"114","author":"AS Cowen","year":"2017","unstructured":"Cowen, A.S., et al.: Self-report captures 27 distinct categories of emotion bridged by continuous gradients. PNAS 114(38), E7900\u2013E7909 (2017)","journal-title":"PNAS"},{"key":"19_CR6","unstructured":"Deng, S., et al.: Simple but powerful, a language-supervised method for image emotion classification. TAC (2022)"},{"issue":"3\u20134","key":"19_CR7","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1080\/02699939208411068","volume":"6","author":"P Ekman","year":"1992","unstructured":"Ekman, P.: An argument for basic emotions. Cogn. Emot. 6(3\u20134), 169\u2013200 (1992)","journal-title":"Cogn. Emot."},{"issue":"2","key":"19_CR8","first-page":"90","volume":"23","author":"A Hanjalic","year":"2006","unstructured":"Hanjalic, A.: Extracting moods from pictures and sounds: towards truly personalized tv. SPM 23(2), 90\u2013100 (2006)","journal-title":"SPM"},{"key":"19_CR9","doi-asserted-by":"crossref","unstructured":"Jiang, Y.G., et al.: Predicting emotions in user-generated videos. In: AAAI, vol. 28 (2014)","DOI":"10.1609\/aaai.v28i1.8724"},{"key":"19_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1007\/978-3-031-19833-5_7","volume-title":"Computer Vision - ECCV 2022","author":"C Ju","year":"2022","unstructured":"Ju, C., et al.: Prompting visual-language models for efficient video understanding. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13695, pp. 105\u2013124. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19833-5_7"},{"key":"19_CR11","doi-asserted-by":"crossref","unstructured":"Lee, J., et al.: Context-aware emotion recognition networks. In: ICCV, pp. 10143\u201310152 (2019)","DOI":"10.1109\/ICCV.2019.01024"},{"key":"19_CR12","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: Decoupled multimodal distilling for emotion recognition. In: CVPR, pp. 6631\u20136640 (2023)","DOI":"10.1109\/CVPR52729.2023.00641"},{"key":"19_CR13","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., et al.: Focal loss for dense object detection. In: ICCV, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"19_CR14","unstructured":"Van der Maaten, L., et al.: Visualizing data using t-SNE. J. Mach. Learn. Res. 9(11) (2008)"},{"key":"19_CR15","doi-asserted-by":"crossref","unstructured":"Machajdik, J., et al.: Affective image classification using features inspired by psychology and art theory. In: ACM MM, pp. 83\u201392 (2010)","DOI":"10.1145\/1873951.1873965"},{"key":"19_CR16","unstructured":"Mazeika, M., et al.: How would the viewer feel? Estimating wellbeing from video scenarios. arXiv preprint arXiv:2210.10039 (2022)"},{"key":"19_CR17","doi-asserted-by":"crossref","unstructured":"Pan, J., et al.: Representation learning through multimodal attention and time-sync comments for affective video content analysis. In: ACM MM, pp. 42\u201350 (2022)","DOI":"10.1145\/3503161.3548018"},{"issue":"197\u2013219","key":"19_CR18","first-page":"2","volume":"1984","author":"R Plutchik","year":"1984","unstructured":"Plutchik, R.: Emotions: a general psychoevolutionary theory. Approaches Emot. 1984(197\u2013219), 2\u20134 (1984)","journal-title":"Approaches Emot."},{"key":"19_CR19","doi-asserted-by":"crossref","unstructured":"Qiu, H., et al.: Dual focus attention network for video emotion recognition. In: ICME, pp. 1\u20136. IEEE (2020)","DOI":"10.1109\/ICME46284.2020.9102808"},{"key":"19_CR20","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: ICML, pp. 8748\u20138763. PMLR (2021)"},{"key":"19_CR21","unstructured":"Sharir, G., et al.: An image is worth 16$$\\times $$16 words, what is a video worth? arXiv preprint arXiv:2103.13915 (2021)"},{"key":"19_CR22","unstructured":"Stray, J., et al.: What are you optimizing for? Aligning recommender systems with human values. arXiv preprint arXiv:2107.10939 (2021)"},{"key":"19_CR23","unstructured":"Tong, Z., et al.: VideoMAE: masked autoencoders are data-efficient learners for self-supervised video pre-training. arXiv preprint arXiv:2203.12602 (2022)"},{"key":"19_CR24","doi-asserted-by":"crossref","unstructured":"Tran, D., et al.: A closer look at spatiotemporal convolutions for action recognition. In: CVPR, pp. 6450\u20136459 (2018)","DOI":"10.1109\/CVPR.2018.00675"},{"key":"19_CR25","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS, vol. 30 (2017)"},{"issue":"11","key":"19_CR26","doi-asserted-by":"publisher","first-page":"2740","DOI":"10.1109\/TPAMI.2018.2868668","volume":"41","author":"L Wang","year":"2018","unstructured":"Wang, L., et al.: Temporal segment networks for action recognition in videos. TPAMI 41(11), 2740\u20132755 (2018)","journal-title":"TPAMI"},{"key":"19_CR27","unstructured":"Wang, M., et al.: ActionCLIP: a new paradigm for video action recognition. arXiv preprint arXiv:2109.08472 (2021)"},{"issue":"2","key":"19_CR28","first-page":"255","volume":"9","author":"B Xu","year":"2016","unstructured":"Xu, B., et al.: Heterogeneous knowledge transfer in video emotion recognition, attribution and summarization. TAC 9(2), 255\u2013270 (2016)","journal-title":"TAC"},{"key":"19_CR29","doi-asserted-by":"crossref","unstructured":"Xu, B., et al.: Video emotion recognition with concept selection. In: ICME, pp. 406\u2013411. IEEE (2019)","DOI":"10.1109\/ICME.2019.00077"},{"key":"19_CR30","doi-asserted-by":"crossref","unstructured":"Yanulevskaya, V., et al.: Emotional valence categorization using holistic image features. In: ICIP, pp. 101\u2013104. IEEE (2008)","DOI":"10.1109\/ICIP.2008.4711701"},{"key":"19_CR31","doi-asserted-by":"crossref","unstructured":"Yu, W., et al.: CH-SIMS: a Chinese multimodal sentiment analysis dataset with fine-grained annotation of modality. In: ACL, pp. 3718\u20133727 (2020)","DOI":"10.18653\/v1\/2020.acl-main.343"},{"key":"19_CR32","doi-asserted-by":"crossref","unstructured":"Yu, W., et al.: Learning modality-specific representations with self-supervised multi-task learning for multimodal sentiment analysis. In: AAAI, vol. 35, pp. 10790\u201310797 (2021)","DOI":"10.1609\/aaai.v35i12.17289"},{"key":"19_CR33","unstructured":"Zhang, H., et al.: Recognition of emotions in user-generated videos through frame-level adaptation and emotion intensity learning. TMM (2021)"},{"key":"19_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, Z., et al.: Temporal sentiment localization: listen and look in untrimmed videos. In: ACM MM, pp. 199\u2013208 (2022)","DOI":"10.1145\/3503161.3548007"},{"key":"19_CR35","doi-asserted-by":"crossref","unstructured":"Zhang, Z., et al.: Weakly supervised video emotion detection and prediction via cross-modal temporal erasing network. In: CVPR, pp. 18888\u201318897 (2023)","DOI":"10.1109\/CVPR52729.2023.01811"},{"key":"19_CR36","doi-asserted-by":"crossref","unstructured":"Zhao, S., et al.: An end-to-end visual-audio attention network for emotion recognition in user-generated videos. In: AAAI, vol. 34, pp. 303\u2013311 (2020)","DOI":"10.1609\/aaai.v34i01.5364"},{"issue":"10","key":"19_CR37","doi-asserted-by":"publisher","first-page":"6729","DOI":"10.1109\/TPAMI.2021.3094362","volume":"44","author":"S Zhao","year":"2021","unstructured":"Zhao, S., et al.: Affective image content analysis: two decades review and new perspectives. TPAMI 44(10), 6729\u20136751 (2021)","journal-title":"TPAMI"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8537-1_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,25]],"date-time":"2023-12-25T19:09:40Z","timestamp":1703531380000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8537-1_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,26]]},"ISBN":["9789819985364","9789819985371"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8537-1_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,12,26]]},"assertion":[{"value":"26 December 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Xiamen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/prcv2023.xmu.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1420","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"532","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,78","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,69","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}