{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:13:43Z","timestamp":1775229223505,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,10,2]],"date-time":"2018-10-02T00:00:00Z","timestamp":1538438400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1319598"],"award-info":[{"award-number":["1319598"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,10,2]]},"DOI":"10.1145\/3242969.3264990","type":"proceedings-article","created":{"date-parts":[[2018,10,2]],"date-time":"2018-10-02T08:09:29Z","timestamp":1538467769000},"page":"635-639","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":33,"title":["Group-Level Emotion Recognition Using Hybrid Deep Models Based on Faces, Scenes, Skeletons and Visual Attentions"],"prefix":"10.1145","author":[{"given":"Xin","family":"Guo","sequence":"first","affiliation":[{"name":"University of Delaware, Newark, DE, USA"}]},{"given":"Bin","family":"Zhu","sequence":"additional","affiliation":[{"name":"University of Delaware, Newark, DE, USA"}]},{"given":"Luisa F.","family":"Polan\u00eda","sequence":"additional","affiliation":[{"name":"American Family Mutual Insurance Company, Madison, WI, USA"}]},{"given":"Charles","family":"Boncelet","sequence":"additional","affiliation":[{"name":"University of Delaware, Newark, DE, USA"}]},{"given":"Kenneth E.","family":"Barner","sequence":"additional","affiliation":[{"name":"University of Delaware, Newark, DE, USA"}]}],"member":"320","published-online":{"date-parts":[[2018,10,2]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Bottom-Up and Top-Down Attention for Image Captioning and VQA. CoRR","author":"Anderson Peter","year":"2017","unstructured":"Peter Anderson , Xiaodong He , Chris Buehler , Damien Teney , Mark Johnson , Stephen Gould , and Lei Zhang . 2017. Bottom-Up and Top-Down Attention for Image Captioning and VQA. CoRR Vol. abs\/ 1707 .07998 ( 2017 ). {arxiv}1707.07998 http:\/\/arxiv.org\/abs\/1707.07998. Peter Anderson, Xiaodong He, Chris Buehler, Damien Teney, Mark Johnson, Stephen Gould, and Lei Zhang. 2017. Bottom-Up and Top-Down Attention for Image Captioning and VQA. CoRR Vol. abs\/1707.07998 (2017). {arxiv}1707.07998 http:\/\/arxiv.org\/abs\/1707.07998."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1107622.1107644"},{"key":"e_1_3_2_1_3_1","volume-title":"VGGFace2: A dataset for recognising faces across pose and age. CoRR","author":"Cao Qiong","year":"2017","unstructured":"Qiong Cao , Li Shen , Weidi Xie , Omkar M. Parkhi , and Andrew Zisserman . 2017. VGGFace2: A dataset for recognising faces across pose and age. CoRR Vol. abs\/ 1710 .08092 ( 2017 ). {arxiv}1710.08092 http:\/\/arxiv.org\/abs\/1710.08092. Qiong Cao, Li Shen, Weidi Xie, Omkar M. Parkhi, and Andrew Zisserman. 2017. VGGFace2: A dataset for recognising faces across pose and age. CoRR Vol. abs\/1710.08092 (2017). {arxiv}1710.08092 http:\/\/arxiv.org\/abs\/1710.08092."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Z. Cao T. Simon S. Wei and Y. Sheikh. 2016. Realtime multi-person 2D pose estimation using part affinity fields. arXiv preprint arXiv:1611.08050 (2016). Z. Cao T. Simon S. Wei and Y. Sheikh. 2016. Realtime multi-person 2D pose estimation using part affinity fields. arXiv preprint arXiv:1611.08050 (2016).","DOI":"10.1109\/CVPR.2017.143"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"J. Deng W. Dong R. Socher L.-J. Li K. Li and L. Fei-Fei. 2009. ImageNet: A Large-Scale Hierarchical Image Database CVPR. J. Deng W. Dong R. Socher L.-J. Li K. Li and L. Fei-Fei. 2009. ImageNet: A Large-Scale Hierarchical Image Database CVPR.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_6_1","volume-title":"International Conference on Neural Information Processing. Springer, 485--492","author":"Dhall A.","unstructured":"A. Dhall , A. Asthana , and R. Goecke . 2010. Facial expression based automatic album creation . In International Conference on Neural Information Processing. Springer, 485--492 . A. Dhall, A. Asthana, and R. Goecke. 2010. Facial expression based automatic album creation. In International Conference on Neural Information Processing. Springer, 485--492."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2397456"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"A. Dhall J. Joshi K. Sikka R. Goecke and N. Sebe. 2015. The more the merrier: Analysing the affect of a group of people in images IEEE International Conference and Workshops on Automatic Face and Gesture Recognition Vol. Vol. 1. IEEE 1--8. A. Dhall J. Joshi K. Sikka R. Goecke and N. Sebe. 2015. The more the merrier: Analysing the affect of a group of people in images IEEE International Conference and Workshops on Automatic Face and Gesture Recognition Vol. Vol. 1. IEEE 1--8.","DOI":"10.1109\/FG.2015.7163151"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3242969.3264993"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"I. J. Goodfellow etal. 2013. Challenges in representation learning: A report on three machine learning contests International Conference on Neural Information Processing. Springer 117--124. I. J. Goodfellow et al.. 2013. Challenges in representation learning: A report on three machine learning contests International Conference on Neural Information Processing. Springer 117--124.","DOI":"10.1007\/978-3-642-42051-1_16"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3136755.3143017"},{"key":"e_1_3_2_1_12_1","volume-title":"Barner","author":"Guo Xin","year":"2018","unstructured":"Xin Guo , Luisa F. Polania , and Kenneth E . Barner . 2018 . Smile de tection in the wild based on transfer learning. (2018). Xin Guo, Luisa F. Polania, and Kenneth E. Barner. 2018. Smile detection in the wild based on transfer learning. (2018)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Y. Guo L. Zhang Y. Hu X. He and J. Gao. 2016. MS-Celeb-1M: A Dataset and Benchmark for Large Scale Face Recognition European Conference on Computer Vision. Springer. Y. Guo L. Zhang Y. Hu X. He and J. Gao. 2016. MS-Celeb-1M: A Dataset and Benchmark for Large Scale Face Recognition European Conference on Computer Vision. Springer.","DOI":"10.1007\/978-3-319-46487-9_6"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"K. He X. Zhang S. Ren and J. Sun. 2016. Deep residual learning for image recognition. In CVPR. 770--778. K. He X. Zhang S. Ren and J. Sun. 2016. Deep residual learning for image recognition. In CVPR. 770--778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_1_16_1","volume-title":"Squeeze-and-Excitation Networks. CoRR","author":"Hu Jie","year":"2017","unstructured":"Jie Hu , Li Shen , and Gang Sun . 2017. Squeeze-and-Excitation Networks. CoRR Vol. abs\/ 1709 .01507 ( 2017 ). {arxiv}1709.01507 http:\/\/arxiv.org\/abs\/1709.01507. Jie Hu, Li Shen, and Gang Sun. 2017. Squeeze-and-Excitation Networks. CoRR Vol. abs\/1709.01507 (2017). {arxiv}1709.01507 http:\/\/arxiv.org\/abs\/1709.01507."},{"key":"e_1_3_2_1_17_1","volume-title":"Riesz-based","author":"Huang Xiaohua","unstructured":"Xiaohua Huang , Abhinav Dhall , Guoying Zhao , Roland Goecke , and Matti Pietik\u00e4inen . 2015. Riesz-based Volume Local Binary Pattern and A Novel Group Expression Model for Group Happiness Intensity Analysis. In BMVC. 1-- 9 . Xiaohua Huang, Abhinav Dhall, Guoying Zhao, Roland Goecke, and Matti Pietik\u00e4inen. 2015. Riesz-based Volume Local Binary Pattern and A Novel Group Expression Model for Group Happiness Intensity Analysis. In BMVC. 1--9."},{"key":"e_1_3_2_1_18_1","unstructured":"A. Krizhevsky I. Sutskever and G. E. Hinton. 2012. Imagenet classification with deep convolutional neural networks Advances in Neural Information Processing Systems. 1097--1105. A. Krizhevsky I. Sutskever and G. E. Hinton. 2012. Imagenet classification with deep convolutional neural networks Advances in Neural Information Processing Systems. 1097--1105."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2993148.2997636"},{"key":"e_1_3_2_1_20_1","volume-title":"Large-Margin Softmax Loss for Convolutional Neural Networks Proceedings of The 33rd International Conference on Machine Learning. 507--516","author":"Liu Weiyang","year":"2016","unstructured":"Weiyang Liu , Yandong Wen , Zhiding Yu , and Meng Yang . 2016 . Large-Margin Softmax Loss for Convolutional Neural Networks Proceedings of The 33rd International Conference on Machine Learning. 507--516 . Weiyang Liu, Yandong Wen, Zhiding Yu, and Meng Yang. 2016. Large-Margin Softmax Loss for Convolutional Neural Networks Proceedings of The 33rd International Conference on Machine Learning. 507--516."},{"key":"e_1_3_2_1_21_1","unstructured":"Jiasen Lu Caiming Xiong Devi Parikh and Richard Socher. 2017. Knowing When to Look: Adaptive Attention via A Visual Sentinel for Image Captioning. Jiasen Lu Caiming Xiong Devi Parikh and Richard Socher. 2017. Knowing When to Look: Adaptive Attention via A Visual Sentinel for Image Captioning."},{"key":"e_1_3_2_1_22_1","volume-title":"Advances in Neural Information Processing Systems 27","author":"Mnih Volodymyr","unstructured":"Volodymyr Mnih , Nicolas Heess , Alex Graves , and koray kavukcuoglu. 2014. Recurrent Models of Visual Attention . In Advances in Neural Information Processing Systems 27 , Z. Ghahramani, M. Welling, C. Cortes, N. D. Lawrence, and K. Q. Weinberger (Eds.). Curran Associates, Inc. , 2204--2212. http:\/\/papers.nips.cc\/paper\/5542-recurrent-models-of-visual-attention.pdf. Volodymyr Mnih, Nicolas Heess, Alex Graves, and koray kavukcuoglu. 2014. Recurrent Models of Visual Attention. In Advances in Neural Information Processing Systems 27, Z. Ghahramani, M. Welling, C. Cortes, N. D. Lawrence, and K. Q. Weinberger (Eds.). Curran Associates, Inc., 2204--2212. http:\/\/papers.nips.cc\/paper\/5542-recurrent-models-of-visual-attention.pdf."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"W. Mou O. Celiktutan and H. Gunes. 2015. Group-level arousal and valence recognition in static images: Face body and context IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG) Vol. Vol. 5. IEEE 1--6. W. Mou O. Celiktutan and H. Gunes. 2015. Group-level arousal and valence recognition in static images: Face body and context IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG) Vol. Vol. 5. IEEE 1--6.","DOI":"10.1109\/FG.2015.7284862"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"P. M. Niedenthal and M. Brauer. 2012. Social functionality of human emotion. Annual review of psychology Vol. 63 (2012) 259--285. P. M. Niedenthal and M. Brauer. 2012. Social functionality of human emotion. Annual review of psychology Vol. 63 (2012) 259--285.","DOI":"10.1146\/annurev.psych.121208.131605"},{"key":"e_1_3_2_1_25_1","volume-title":"Deep Face Recognition. In British Machine Vision Conference.","author":"Parkhi O. M.","unstructured":"O. M. Parkhi , A. Vedaldi , and A. Zisserman . 2015 . Deep Face Recognition. In British Machine Vision Conference. O. M. Parkhi, A. Vedaldi, and A. Zisserman. 2015. Deep Face Recognition. In British Machine Vision Conference."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0010-0277(01)00147-0"},{"key":"e_1_3_2_1_27_1","volume-title":"Self-critical Sequence Training for Image Captioning. CoRR","author":"Rennie Steven J.","year":"2016","unstructured":"Steven J. Rennie , Etienne Marcheret , Youssef Mroueh , Jarret Ross , and Vaibhava Goel . 2016. Self-critical Sequence Training for Image Captioning. CoRR Vol. abs\/ 1612 .00563 ( 2016 ). {arxiv}1612.00563 http:\/\/arxiv.org\/abs\/1612.00563. Steven J. Rennie, Etienne Marcheret, Youssef Mroueh, Jarret Ross, and Vaibhava Goel. 2016. Self-critical Sequence Training for Image Captioning. CoRR Vol. abs\/1612.00563 (2016). {arxiv}1612.00563 http:\/\/arxiv.org\/abs\/1612.00563."},{"key":"e_1_3_2_1_28_1","unstructured":"K. Simonyan and A. Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014). K. Simonyan and A. Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"C. Szegedy V. Vanhoucke S. Ioffe J. Shlens and Z. Wojna. 2016. Rethinking the inception architecture for computer vision Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2818--2826. C. Szegedy V. Vanhoucke S. Ioffe J. Shlens and Z. Wojna. 2016. Rethinking the inception architecture for computer vision Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2818--2826.","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3136755.3143008"},{"key":"e_1_3_2_1_31_1","unstructured":"S. Tomas J. Hanbyul M. Iain and S. Yaser. 2017. Hand Keypoint Detection in Single Images using Multiview Bootstrapping CVPR. S. Tomas J. Hanbyul M. Iain and S. Yaser. 2017. Hand Keypoint Detection in Single Images using Multiview Bootstrapping CVPR."},{"key":"e_1_3_2_1_32_1","volume-title":"Event detection: Ultra large-scale clustering of facial expressions IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG)","author":"Vandal T.","unstructured":"T. Vandal , D. McDuff , and R. El Kaliouby . 2015. Event detection: Ultra large-scale clustering of facial expressions IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG) , Vol. Vol. 1 . IEEE , 1--8. T. Vandal, D. McDuff, and R. El Kaliouby. 2015. Event detection: Ultra large-scale clustering of facial expressions IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG), Vol. Vol. 1. IEEE, 1--8."},{"key":"e_1_3_2_1_33_1","volume-title":"Residual Attention Network for Image Classification. CoRR","author":"Wang Fei","year":"2017","unstructured":"Fei Wang , Mengqing Jiang , Chen Qian , Shuo Yang , Cheng Li , Honggang Zhang , Xiaogang Wang , and Xiaoou Tang . 2017. Residual Attention Network for Image Classification. CoRR Vol. abs\/ 1704 .06904 ( 2017 ). {arxiv}1704.06904 http:\/\/arxiv.org\/abs\/1704.06904. Fei Wang, Mengqing Jiang, Chen Qian, Shuo Yang, Cheng Li, Honggang Zhang, Xiaogang Wang, and Xiaoou Tang. 2017. Residual Attention Network for Image Classification. CoRR Vol. abs\/1704.06904 (2017). {arxiv}1704.06904 http:\/\/arxiv.org\/abs\/1704.06904."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"S. Wei V. Ramakrishna T. Kanade and Y. Sheikh. 2016. Convolutional pose machines. In CVPR. S. Wei V. Ramakrishna T. Kanade and Y. Sheikh. 2016. Convolutional pose machines. In CVPR.","DOI":"10.1109\/CVPR.2016.511"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2009.42"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.224"},{"key":"e_1_3_2_1_37_1","volume-title":"Amsterdam, The Netherlands","author":"Xu Huijuan","year":"2016","unstructured":"Huijuan Xu and Kate Saenko . 2016 . Ask, Attend and Answer: Exploring Question-Guided Spatial Attention for Visual Question Answering. In Computer Vision - ECCV 2016 - 14th European Conference , Amsterdam, The Netherlands , October 11-14, 2016, Proceedings, Part VII. 451--466. Huijuan Xu and Kate Saenko. 2016. Ask, Attend and Answer: Exploring Question-Guided Spatial Attention for Visual Question Answering. In Computer Vision - ECCV 2016 - 14th European Conference, Amsterdam, The Netherlands, October 11-14, 2016, Proceedings, Part VII. 451--466."},{"key":"e_1_3_2_1_38_1","volume-title":"Smola","author":"Yang Zichao","year":"2015","unstructured":"Zichao Yang , Xiaodong He , Jianfeng Gao , Li Deng , and Alexander J . Smola . 2015 . Stacked Attention Networks for Image Question Answering. CoRR Vol . abs\/1511.02274 (2015). http:\/\/arxiv.org\/abs\/1511.02274. Zichao Yang, Xiaodong He, Jianfeng Gao, Li Deng, and Alexander J. Smola. 2015. Stacked Attention Networks for Image Question Answering. CoRR Vol. abs\/1511.02274 (2015). http:\/\/arxiv.org\/abs\/1511.02274."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2016.2603342"}],"event":{"name":"ICMI '18: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","location":"Boulder CO USA","acronym":"ICMI '18","sponsor":["SIGCHI Specialist Interest Group in Computer-Human Interaction of the ACM"]},"container-title":["Proceedings of the 20th ACM International Conference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3242969.3264990","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3242969.3264990","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3242969.3264990","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:56:20Z","timestamp":1761094580000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3242969.3264990"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,10,2]]},"references-count":39,"alternative-id":["10.1145\/3242969.3264990","10.1145\/3242969"],"URL":"https:\/\/doi.org\/10.1145\/3242969.3264990","relation":{},"subject":[],"published":{"date-parts":[[2018,10,2]]},"assertion":[{"value":"2018-10-02","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}