{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T23:35:19Z","timestamp":1743032119416,"version":"3.40.3"},"publisher-location":"Cham","reference-count":22,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031207150"},{"type":"electronic","value":"9783031207167"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-20716-7_23","type":"book-chapter","created":{"date-parts":[[2022,12,9]],"date-time":"2022-12-09T05:03:02Z","timestamp":1670562182000},"page":"295-306","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Emotion Recognition in Video Streams Using Intramodal and Intermodal Attention Mechanisms"],"prefix":"10.1007","author":[{"given":"Bogdan","family":"Mocanu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruxandra","family":"Tapu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,12,10]]},"reference":[{"key":"23_CR1","doi-asserted-by":"crossref","unstructured":"Cao, Q., Hou, M., Chen, B., Zhang, Z., Lu, G.: Hierarchical network based on the fusion of static and dynamic features for speech emotion recognition. In: ICASSP \u2013 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) pp., 6334\u20136338 (2021)","DOI":"10.1109\/ICASSP39728.2021.9414540"},{"key":"23_CR2","doi-asserted-by":"crossref","unstructured":"Hern\u00e1ndez-Luquin, F., Escalante, H.J.: Multi-branch deep radial basis function networks for facial emotion recognition. Neural Comput. Applic. (2021)","DOI":"10.1007\/s00521-021-06420-w"},{"key":"23_CR3","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1016\/j.future.2020.06.050","volume":"113","author":"U Naseem","year":"2020","unstructured":"Naseem, U., Razzak, I., Musial, K., Imran, M.: Transformer based deep intelligent contextual embedding for Twitter sentiment analysis. Futur. Gener. Comput. Syst. 113, 58\u201369 (2020)","journal-title":"Futur. Gener. Comput. Syst."},{"key":"23_CR4","doi-asserted-by":"publisher","first-page":"124","DOI":"10.1037\/h0030377","volume":"17","author":"P Ekman","year":"1971","unstructured":"Ekman, P., Friesen, W.V.: Constants across cultures in the face and emotion. J. Pers. Soc. Psychol. 17, 124\u2013129 (1971)","journal-title":"J. Pers. Soc. Psychol."},{"key":"23_CR5","doi-asserted-by":"crossref","unstructured":"Hara, K., Kataoka H., Satoh, Y.: Can spatiotemporal 3D CNNs retrace the history of 2D CNNs and ImageNet? In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6546\u20136555 (2018)","DOI":"10.1109\/CVPR.2018.00685"},{"key":"23_CR6","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"5","key":"23_CR7","doi-asserted-by":"publisher","first-page":"e0196391","DOI":"10.1371\/journal.pone.0196391","volume":"13","author":"SR Livingstone","year":"2018","unstructured":"Livingstone, S.R., Russo, F.A.: The ryerson audio-visual database of emotional speech and song (RAVDESS): a dynamic, multimodal set of facial and vocal expressions in North American English. PLoS ONE 13(5), e0196391 (2018)","journal-title":"PLoS ONE"},{"issue":"8","key":"23_CR8","doi-asserted-by":"publisher","first-page":"1301","DOI":"10.1109\/JSTSP.2017.2764438","volume":"11","author":"P Tzirakis","year":"2017","unstructured":"Tzirakis, P., Trigeorgis, G., Nicolaou, M.A., Schuller, B.W., Zafeiriou, S.: End-to-end multimodal emotion recognition using deep neural networks. IEEE J. Sel. Top. Sign. Process. 11(8), 1301\u20131309 (2017)","journal-title":"IEEE J. Sel. Top. Sign. Process."},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Ortega, J.D.S., Cardinal, P., Koerich, A.L.: Emotion recognition using fusion of audio and video features. In: 2019 IEEE International Conference on Systems, Man and Cybernetics (SMC), pp. 3847\u20133852 (2019)","DOI":"10.1109\/SMC.2019.8914655"},{"key":"23_CR10","doi-asserted-by":"publisher","first-page":"1313","DOI":"10.1109\/TMM.2021.3063612","volume":"24","author":"D Nguyen","year":"2021","unstructured":"Nguyen, D., et al.: Deep auto-encoders with sequential learning for multimodal dimensional emotion recognition. IEEE Trans. Multimedia 24, 1313\u20131324 (2021)","journal-title":"IEEE Trans. Multimedia"},{"key":"23_CR11","doi-asserted-by":"crossref","unstructured":"Zhao, S., et al.: An end-to-end visual-audio attention network for emotion recognition in user-generated videos. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 303\u2013311 (2020)","DOI":"10.1609\/aaai.v34i01.5364"},{"key":"23_CR12","doi-asserted-by":"crossref","unstructured":"Ghaleb, E., Niehues, J., Asteriadis, S.: Multimodal attention-mechanism for temporal emotion recognition. In: 2020 IEEE International Conference on Image Processing (ICIP), pp. 251\u2013255 (2020)","DOI":"10.1109\/ICIP40778.2020.9191019"},{"key":"23_CR13","doi-asserted-by":"crossref","unstructured":"Wang, Y., Wu, J., Heracleous, P., Wada, S., Kimura, R., Kurihara, S.: Implicit knowledge injectable cross attention audiovisual model for group emotion recognition. In: Proceedings of the 2020 International Conference on Multimodal Interaction, pp. 827\u2013834 (2020)","DOI":"10.1145\/3382507.3417960"},{"key":"23_CR14","doi-asserted-by":"crossref","unstructured":"Parthasarathy, S., Sundaram, S.: Detecting expressions with multimodal transformers. In: 2021 IEEE Spoken Language Technology Workshop (SLT), pp. 636\u2013643 (2021)","DOI":"10.1109\/SLT48900.2021.9383573"},{"key":"23_CR15","doi-asserted-by":"publisher","first-page":"108580","DOI":"10.1016\/j.knosys.2022.108580","volume":"244","author":"AI Middya","year":"2022","unstructured":"Middya, A.I., Nag, B., Roy, S.: Deep learning based multimodal emotion recognition using model-level fusion of audio\u2013visual modalities. Knowl.-Based Syst. 244, 108580 (2022)","journal-title":"Knowl.-Based Syst."},{"key":"23_CR16","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 6000\u20136010 (2017)"},{"key":"23_CR17","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, L.-J., Li, K., Fei-Fei, L.: ImageNet: A large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"23_CR18","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? a new model and the kinetics dataset. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4724\u20134733 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"23_CR19","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes, In: International Conference on Learning Representations, (2014)"},{"key":"23_CR20","unstructured":"Su, L., Hu, C., Li, G., Cao, D.: MSAF: Multimodal Split Attention Fusion. arXiv preprint arXiv: 2012.07175 (2020)"},{"key":"23_CR21","unstructured":"Fu, Z., Liu, F., Wang, H., Qi, J., Fu, X., Zhou, A., Li, Z.: A cross-modal fusion network based on self-attention and residual structure for multimodal emotion recognition. arXiv preprint arXiv: 2012.07175 (2021)"},{"key":"23_CR22","doi-asserted-by":"publisher","first-page":"7217","DOI":"10.3390\/app11167217","volume":"11","author":"C Luna-Jim\u00e9nez","year":"2021","unstructured":"Luna-Jim\u00e9nez, C., Crist\u00f3bal-Mart\u00edn, J., Kleinlein, R., Gil-Mart\u00edn, M., Moya, J.M., Fern\u00e1ndez-Mart\u00ednez, F.: Guided spatial transformers for facial expression recognition. Appl. Sci. 11, 7217 (2021)","journal-title":"Appl. Sci."}],"container-title":["Lecture Notes in Computer Science","Advances in Visual Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-20716-7_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,9]],"date-time":"2022-12-09T05:07:24Z","timestamp":1670562444000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-20716-7_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031207150","9783031207167"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-20716-7_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"10 December 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ISVC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Visual Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"San Diego, CA","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"isvc2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.isvc.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"110","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"61","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"55% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2-3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}