{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T21:58:40Z","timestamp":1772661520149,"version":"3.50.1"},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,9,28]],"date-time":"2021-09-28T00:00:00Z","timestamp":1632787200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,9,28]],"date-time":"2021-09-28T00:00:00Z","timestamp":1632787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,9,28]],"date-time":"2021-09-28T00:00:00Z","timestamp":1632787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,9,28]]},"DOI":"10.1109\/aciiw52867.2021.9666356","type":"proceedings-article","created":{"date-parts":[[2022,1,10]],"date-time":"2022-01-10T21:14:00Z","timestamp":1641849240000},"page":"01-08","source":"Crossref","is-referenced-by-count":1,"title":["Temporal based Emotion Recognition inspired by Activity Recognition models"],"prefix":"10.1109","author":[{"given":"Balaganesh","family":"Mohan","sequence":"first","affiliation":[]},{"given":"Mirela","family":"Popa","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/FG47880.2020.00126"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.97"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.29284\/IJASIS.7.1.2021.11-20"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP40778.2020.9191019"},{"key":"ref31","article-title":"An audio-video deep and transfer learning framework for multimodal emotion recognition in the wild","volume":"abs 2010 3692","author":"dresvyanskiy","year":"2020","journal-title":"CoRR"},{"key":"ref30","article-title":"Attention is all you need","volume":"abs 1706 3762","author":"vaswani","year":"2017","journal-title":"CoRR"},{"key":"ref37","article-title":"Inverted residuals and linear bottlenecks: Mobile networks for classification, detection and segmentation","volume":"abs 1801 4381","author":"sandler","year":"2018","journal-title":"CoRR"},{"key":"ref36","article-title":"Grad-cam: Why did you say that? visual explanations from deep networks via gradient-based localization","volume":"abs 1610 2391","author":"selvaraju","year":"2016","journal-title":"CoRR"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/K18-1025"},{"key":"ref34","article-title":"Emotion recognition system from speech and visual information based on convolutional neural networks","volume":"abs 2003 351","author":"ristea","year":"2020","journal-title":"CoRR"},{"key":"ref10","author":"ekman","year":"1994","journal-title":"The Nature of Emotion Fundamental Questions"},{"key":"ref40","article-title":"Whenet: Real-time fine-grained estimation for wide range head pose","volume":"abs 2005 10353","author":"zhou","year":"2020","journal-title":"CoRR"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2016.7532431"},{"key":"ref12","author":"tran","year":"2015","journal-title":"Learning spatiotemporal features with 3d convolutional networks"},{"key":"ref13","author":"carreira","year":"2018","journal-title":"Quo Vadis Action Recognition? A New Model and the Kinetics Dataset"},{"key":"ref14","article-title":"Temporal shift module for efficient video understanding","volume":"abs 1811 8383","author":"lin","year":"2018","journal-title":"CoRR"},{"key":"ref15","article-title":"Is space-time attention all you need for video understanding?","volume":"abs 2102 5095","author":"bertasius","year":"2021","journal-title":"CoRR"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2014.2336244"},{"key":"ref17","volume":"1","author":"michel","year":"0","journal-title":"Real Time Facial Expression Recognition in Video Using Support Vector Machines"},{"key":"ref18","article-title":"Very deep convolutional networks for large-scale image recognition","volume":"abs 1409 1556","author":"simonyan","year":"2014","journal-title":"CoRR"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.25046\/aj050638"},{"key":"ref28","article-title":"Deep residual learning for image recognition","volume":"abs 1512 3385","author":"he","year":"2015","journal-title":"CoRR"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3388790"},{"key":"ref27","article-title":"Learning face representation from scratch","volume":"abs 1411 7923","author":"yi","year":"2014","journal-title":"CoRR"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.compedu.2019.103649"},{"key":"ref6","article-title":"Deep-emotion: Facial expression recognition using attentional convolutional network","volume":"abs 1902 1019","author":"minaee","year":"2019","journal-title":"CoRR"},{"key":"ref29","article-title":"Imagenet classification with deep convolutional neural networks","volume":"25","author":"krizhevsky","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICIEA.2016.7603570"},{"key":"ref8","article-title":"Facial emotion recognition with noisy multi-task annotations","volume":"abs 2010 9849","author":"zhang","year":"2020","journal-title":"CoRR"},{"key":"ref7","article-title":"Facial emotion recognition: State of the art performance on FER2013","volume":"abs 2105 3588","author":"khaireddin","year":"2021","journal-title":"CoRR"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2018.2890471"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1702247114"},{"key":"ref1","volume":"abs 1807 8169","author":"minar","year":"2018","journal-title":"Recent Advances in Deep Learning An Overview"},{"key":"ref20","article-title":"Going deeper with convolutions","volume":"abs 1409 4842","author":"szegedy","year":"2014","journal-title":"CoRR"},{"key":"ref45","article-title":"Close to human quality TTS with transformer","volume":"abs 1809 8895","author":"li","year":"2018","journal-title":"CoRR"},{"key":"ref22","article-title":"Temporal segment networks for action recognition in videos","volume":"abs 1705 2953","author":"wang","year":"2017","journal-title":"CoRR"},{"key":"ref21","author":"livingstone","year":"2018","journal-title":"The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS)"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/FG47880.2020.00056"},{"key":"ref24","article-title":"TDN: temporal difference networks for efficient action recognition","volume":"abs 2012 10071","author":"wang","year":"2020","journal-title":"CoRR"},{"key":"ref41","first-page":"1","article-title":"Multi-task head pose estimation in-the-wild","volume":"pp","author":"valle","year":"2020","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"ref23","article-title":"PAN: towards fast action recognition via learning persistence of appearance","volume":"abs 2008 3462","author":"zhang","year":"2020","journal-title":"CoRR"},{"key":"ref44","article-title":"ALBERT: A lite BERT for self-supervised learning of language representations","volume":"abs 1909 11942","author":"lan","year":"2019","journal-title":"CoRR"},{"key":"ref26","article-title":"Vg-gface 2: A dataset for recognising faces across pose and age","volume":"abs 1710 8092","author":"cao","year":"2017","journal-title":"CoRR"},{"key":"ref43","author":"xiao","year":"2020","journal-title":"Audiovisual slowfast networks for video recognition"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298682"}],"event":{"name":"2021 9th International Conference on Affective Computing and Intelligent Interaction Workshops and Demos (ACIIW)","location":"Nara, Japan","start":{"date-parts":[[2021,9,28]]},"end":{"date-parts":[[2021,10,1]]}},"container-title":["2021 9th International Conference on Affective Computing and Intelligent Interaction Workshops and Demos (ACIIW)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9666213\/9666196\/09666356.pdf?arnumber=9666356","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T16:56:53Z","timestamp":1652201813000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9666356\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,28]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/aciiw52867.2021.9666356","relation":{},"subject":[],"published":{"date-parts":[[2021,9,28]]}}}