{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T10:53:46Z","timestamp":1768906426482,"version":"3.49.0"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2023,6,28]],"date-time":"2023-06-28T00:00:00Z","timestamp":1687910400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,6,28]],"date-time":"2023-06-28T00:00:00Z","timestamp":1687910400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2024,1]]},"DOI":"10.1007\/s11042-023-16066-6","type":"journal-article","created":{"date-parts":[[2023,6,28]],"date-time":"2023-06-28T07:02:21Z","timestamp":1687935741000},"page":"9911-9928","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Enhanced spatio-temporal 3D CNN for facial expression classification in videos"],"prefix":"10.1007","volume":"83","author":[{"given":"Deepanshu","family":"Khanna","sequence":"first","affiliation":[]},{"given":"Neeru","family":"Jindal","sequence":"additional","affiliation":[]},{"given":"Prashant Singh","family":"Rana","sequence":"additional","affiliation":[]},{"given":"Harpreet","family":"Singh","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,6,28]]},"reference":[{"issue":"3","key":"16066_CR1","doi-asserted-by":"publisher","first-page":"959","DOI":"10.1109\/TITS.2019.2900426","volume":"21","author":"T Akilan","year":"2020","unstructured":"Akilan T, Wu QJ, Safaei A, Huo J, Yang Y (2020) A 3D CNN-LSTM-Based Image-to-Image Foreground Segmentation. IEEE Trans Intell Transp Syst 21(3):959\u2013971. https:\/\/doi.org\/10.1109\/TITS.2019.2900426","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"16066_CR2","doi-asserted-by":"publisher","unstructured":"Aly S, Abbott A L, Torki M (2016) A multimodal feature fusion framework for Kinect-based facial expression recognition using Dual Kernel Discriminant Analysis (DKDA). In: 2016 IEEE Winter Conference on Applications of Computer Vision (WACV), Lake Placid, NY, USA, pp. 1\u201310. https:\/\/doi.org\/10.1109\/WACV.2016.7477577","DOI":"10.1109\/WACV.2016.7477577"},{"key":"16066_CR3","doi-asserted-by":"publisher","unstructured":"Bartlett MS, Littlewood G, Fasel I, Movellan JR (2003) Real-Time Face Detection and Facial Expression Recognition: Development and Applications to Human-Computer Interaction. In: 2003 Conference on Computer Vision and Pattern Recognition Workshop, Madison, WI, USA, pp. 53\u201353. https:\/\/doi.org\/10.1109\/CVPRW.2003.10057","DOI":"10.1109\/CVPRW.2003.10057"},{"key":"16066_CR4","doi-asserted-by":"publisher","unstructured":"Carreira J, Zisserman A (2017) Quo Vadis, Action Recognition? A New Model and the Kinetics Dataset. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Honolulu, HI, USA, pp. 4724\u20134733. https:\/\/doi.org\/10.1109\/CVPR.2017.502","DOI":"10.1109\/CVPR.2017.502"},{"key":"16066_CR5","doi-asserted-by":"publisher","first-page":"823","DOI":"10.3390\/s23020823","volume":"23","author":"L Chang","year":"2023","unstructured":"Chang L, Chenglin W, Yiting Q (2023) A Video Sequence Face Expression Recognition Method Based on Squeeze-and-Excitation and 3DPCA Network. Sensors 23:823. https:\/\/doi.org\/10.3390\/s23020823","journal-title":"Sensors"},{"issue":"12","key":"16066_CR6","doi-asserted-by":"publisher","first-page":"1598","DOI":"10.1016\/j.patrec.2011.01.004","volume":"32","author":"O Deniz","year":"2011","unstructured":"Deniz O, Bueno G, Salido J et al (2011) Face recognition using histograms of oriented gradients. Pattern Recogn Lett 32(12):1598\u20131603. https:\/\/doi.org\/10.1016\/j.patrec.2011.01.004","journal-title":"Pattern Recogn Lett"},{"key":"16066_CR7","doi-asserted-by":"publisher","unstructured":"Dhankhar P (2019) ResNet-50 and VGG-16 for recognizing Facial Emotions, 13(4):1-5.\u00a0https:\/\/doi.org\/10.21172\/ijiet.134.18","DOI":"10.21172\/ijiet.134.18"},{"key":"16066_CR8","doi-asserted-by":"publisher","unstructured":"Fan Y, Lu X, Li D, Liu Y (2016) Video-based emotion recognition using CNN-RNN and C3D hybrid networks. In: Proceedings of the 18th ACM International Conference on Multimodal Interaction (ICMI\u2019 16). Association for Computing Machinery, New York, NY, USA, pp. 445\u2013450. https:\/\/doi.org\/10.1145\/2993148.2997632","DOI":"10.1145\/2993148.2997632"},{"key":"16066_CR9","doi-asserted-by":"publisher","unstructured":"Ghaleb E, Popa M, Asteriadis S (2019) Multimodal and Temporal Perception of Audio-visual Cues for Emotion Recognition. In: 2019 8th International Conference on Affective Computing and Intelligent Interaction (ACII), Cambridge, United Kingdom, pp. 552\u2013558. https:\/\/doi.org\/10.1109\/ACII.2019.8925444","DOI":"10.1109\/ACII.2019.8925444"},{"key":"16066_CR10","doi-asserted-by":"publisher","unstructured":"Haddad J, Lezoray O, Hamel P (2020) 3D-CNN for Facial Emotion Recognition in Videos. In: International Symposium on Visual Computing, pp. 298\u2013309 Springer. https:\/\/doi.org\/10.1007\/978-3-030-64559-5_23","DOI":"10.1007\/978-3-030-64559-5_23"},{"key":"16066_CR11","doi-asserted-by":"publisher","unstructured":"Hara K, Kataoka H, Satoh Y (2018) Can Spatiotemporal 3D CNNs Retrace the History of 2D CNNs and ImageNet? https:\/\/doi.org\/10.1109\/ACCESS.2019.2901521","DOI":"10.1109\/ACCESS.2019.2901521"},{"key":"16066_CR12","doi-asserted-by":"publisher","unstructured":"He Z, Jin T, Basu A, Soraghan J, Caterina G D, Petropoulakis L (2019) Human Emotion Recognition in Video Using Subtraction Pre-Processing. In: Proceedings of the 2019 11th International Conference on Machine Learning and Computing (ICMLC\u2019 19), Association for Computing Machinery, New York, NY, USA, pp. 374\u2013379. https:\/\/doi.org\/10.1145\/3318299.3318321","DOI":"10.1145\/3318299.3318321"},{"key":"16066_CR13","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep Residual Learning for Image Recognition. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Las Vegas, NV, USA, pp. 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"16066_CR14","doi-asserted-by":"publisher","unstructured":"Ho TT, Kim T, Kim WJ et al (2021) A 3D-CNN model with CT-based parametric response mapping for classifying COPD subjects. https:\/\/doi.org\/10.1038\/s41598-020-79336-5","DOI":"10.1038\/s41598-020-79336-5"},{"key":"16066_CR15","doi-asserted-by":"publisher","unstructured":"Huang G, Liu Z, Maaten LVD, Weinberger KQ (2017) Densely connected convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Honolulu, HI, US, pp. 4700\u20134708. https:\/\/doi.org\/10.1109\/CVPR.2017.243","DOI":"10.1109\/CVPR.2017.243"},{"key":"16066_CR16","doi-asserted-by":"publisher","first-page":"365","DOI":"10.1016\/j.future.2020.10.025","volume":"116","author":"F Ji","year":"2021","unstructured":"Ji F, Zhang H, Zhu Z, Dai W (2021) Blog text quality assessment using a 3D CNN-based statistical framework. Futur Gener Comput Syst 116:365\u2013370. https:\/\/doi.org\/10.1016\/j.future.2020.10.025","journal-title":"Futur Gener Comput Syst"},{"key":"16066_CR17","doi-asserted-by":"publisher","unstructured":"Kanade T, Cohn J F, Tian Y (2000) Comprehensive database for facial expression analysis. In: Proceedings of the Fourth IEEE International Conference on Automatic Face and Gesture Recognition (FG\u201900), Grenoble, France, pp. 46\u201353. https:\/\/doi.org\/10.1109\/AFGR.2000.840611","DOI":"10.1109\/AFGR.2000.840611"},{"key":"16066_CR18","doi-asserted-by":"publisher","unstructured":"Khorrami P, Paine TL, Brady K, Dagli C, Huang TS (2016) How deep neural networks can improve emotion recognition on video data. In: 2016 IEEE International Conference on Image Processing (ICIP), Phoenix, AZ, USA, pp. 619\u2013623. https:\/\/doi.org\/10.1109\/ICIP.2016.7532431","DOI":"10.1109\/ICIP.2016.7532431"},{"key":"16066_CR19","doi-asserted-by":"publisher","unstructured":"Klaeser A, Marszalek M, Schmid C (2008) A Spatio-Temporal Descriptor Based on 3D-Gradients. In: Proceedings of the British Machine Vision Conference, pp. 99.1\u201399.10. https:\/\/doi.org\/10.5244\/C.22.99","DOI":"10.5244\/C.22.99"},{"key":"16066_CR20","doi-asserted-by":"publisher","unstructured":"Li S, Deng W (2020) Deep Facial Expression Recognition: A Survey. In: IEEE Transactions on Affective Computing. https:\/\/doi.org\/10.1109\/TAFFC.2020.2981446","DOI":"10.1109\/TAFFC.2020.2981446"},{"key":"16066_CR21","doi-asserted-by":"publisher","unstructured":"Li B, Lima D (2021) Facial expression recognition via ResNet-50. Int J Cogn Comput Eng. 57\u201364. https:\/\/doi.org\/10.1016\/j.ijcce.2021.02.002","DOI":"10.1016\/j.ijcce.2021.02.002"},{"key":"16066_CR22","doi-asserted-by":"publisher","unstructured":"Liu M, Shan S, Wang R, Chen X (2014) Learning Expressionless on Spatio-temporal Manifold for Dynamic Facial Expression Recognition. In: 2014 IEEE Conference on Computer Vision and Pattern Recognition, Columbus, OH, USA, pp. 1749\u20131756. https:\/\/doi.org\/10.1109\/CVPR.2014.226","DOI":"10.1109\/CVPR.2014.226"},{"key":"16066_CR23","doi-asserted-by":"publisher","unstructured":"Livingstone SR, Russo FA (2018) The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS): A dynamic, multimodal set of facial and vocal expressions in North American English (2018). https:\/\/doi.org\/10.1371\/journal.pone.0196391","DOI":"10.1371\/journal.pone.0196391"},{"key":"16066_CR24","doi-asserted-by":"publisher","first-page":"610","DOI":"10.1016\/j.patcog.2016.07.026","volume":"61","author":"AT Lopes","year":"2017","unstructured":"Lopes AT, Aguiar E, Souza AFD, Oliveira-Santos T (2017) Facial expression recognition with Convolutional Neural Networks: Coping with few data and the training sample order. Pattern Recogn 61:610\u2013628. https:\/\/doi.org\/10.1016\/j.patcog.2016.07.026","journal-title":"Pattern Recogn"},{"key":"16066_CR25","doi-asserted-by":"publisher","unstructured":"Lucey P, Cohn JF, Kanade T, Saragih J, Ambadar Z, Matthews I. The Extended Cohn-Kanade Dataset (CK+): A complete dataset for action unit and emotion-specified expression. In: 2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition - Workshops, San Francisco, CA, USA, pp. 94\u2013101. https:\/\/doi.org\/10.1109\/CVPRW.2010.5543262","DOI":"10.1109\/CVPRW.2010.5543262"},{"key":"16066_CR26","doi-asserted-by":"publisher","unstructured":"Miao Y, Dong H, Jaam J M A, Saddik A E (2019) A Deep Learning System for Recognizing Facial Expression in Real-Time. In: ACM Transactions on Multimedia Computing, Communications, and Applications. https:\/\/doi.org\/10.1145\/3311747","DOI":"10.1145\/3311747"},{"issue":"5","key":"16066_CR27","doi-asserted-by":"publisher","first-page":"1082","DOI":"10.1016\/j.jvcir.2014.03.006","volume":"25","author":"MR Mohammadi","year":"2014","unstructured":"Mohammadi MR, Fatemizadeh E, Mahoor MH (2014) PCA-based dictionary building for accurate facial expression recognition via sparse representation. J Vis Commun Image Represent 25(5):1082\u20131092. https:\/\/doi.org\/10.1016\/j.jvcir.2014.03.006","journal-title":"J Vis Commun Image Represent"},{"key":"16066_CR28","doi-asserted-by":"publisher","unstructured":"Pe\u00f1a D, Tanaka F (2020) Human Perception of Social Robot\u2019s Emotional States via Facial and Thermal Expressions. In: Association for Computing Machinery. https:\/\/doi.org\/10.1145\/3388469","DOI":"10.1145\/3388469"},{"issue":"5","key":"16066_CR29","doi-asserted-by":"publisher","first-page":"1740","DOI":"10.1109\/TIP.2012.2235848","volume":"22","author":"AR Rivera","year":"2013","unstructured":"Rivera AR, Castillo JR, Chae OO (2013) Local Directional Number Pattern for Face Analysis: Face and Expression Recognition. IEEE Trans Image Process 22(5):1740\u20131752. https:\/\/doi.org\/10.1109\/TIP.2012.2235848","journal-title":"IEEE Trans Image Process"},{"key":"16066_CR30","doi-asserted-by":"publisher","unstructured":"Scovanner P, Ali S, Shah M (2007) A 3-dimensional sift descriptor and its application to action recognition. In: Proceedings of the 15th ACM international conference on Multimedia (MM\u2019 07). Association for Computing Machinery, New York, NY, USA, pp. 357\u2013360. https:\/\/doi.org\/10.1145\/1291233.1291311","DOI":"10.1145\/1291233.1291311"},{"key":"16066_CR31","doi-asserted-by":"publisher","unstructured":"Sharma G, Singh L, Gautam S (2019) Automatic Facial Expression Recognition Using Combined Geometric Features. In: 3D Research 10, Article 224. https:\/\/doi.org\/10.1007\/s13319-019-0224-0","DOI":"10.1007\/s13319-019-0224-0"},{"key":"16066_CR32","doi-asserted-by":"publisher","unstructured":"Singh R, Saurav S, Kumar T et al (2023) Facial expression recognition in videos using hybrid CNN & ConvLSTM. Int J Inf Tecnol (2023). https:\/\/doi.org\/10.1007\/s41870-023-01183-0","DOI":"10.1007\/s41870-023-01183-0"},{"key":"16066_CR33","doi-asserted-by":"publisher","unstructured":"Tariq U et al (2011) Emotion recognition from an ensemble of features. In: 2011 IEEE International Conference on Automatic Face & Gesture Recognition (FG), Santa Barbara, CA, USA, pp. 872\u2013877. https:\/\/doi.org\/10.1109\/FG.2011.5771365","DOI":"10.1109\/FG.2011.5771365"},{"issue":"07","key":"16066_CR34","doi-asserted-by":"publisher","first-page":"1311","DOI":"10.1109\/TLA.2020.9099774","volume":"18","author":"MG Villanueva","year":"2020","unstructured":"Villanueva MG, Zavala SR (2020) Deep Neural Network Architecture: Application for Facial Expression Recognition. IEEE Lat Am Trans 18(07):1311\u20131319. https:\/\/doi.org\/10.1109\/TLA.2020.9099774","journal-title":"IEEE Lat Am Trans"},{"key":"16066_CR35","doi-asserted-by":"publisher","first-page":"4630","DOI":"10.1109\/ACCESS.2017.2784096","volume":"6","author":"B Yang","year":"2018","unstructured":"Yang B, Cao J, Ni R, Zhang Y (2018) Facial Expression Recognition Using Weighted Mixture Deep Neural Network Based on Double-Channel Facial Images. IEEE Access 6:4630\u20134640. https:\/\/doi.org\/10.1109\/ACCESS.2017.2784096","journal-title":"IEEE Access"},{"issue":"3","key":"16066_CR36","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1109\/TAFFC.2016.2553038","volume":"8","author":"S Zhalehpour","year":"2017","unstructured":"Zhalehpour S, Onder O, Akhtar Z, Erdem CE (2017) BAUM-1: A Spontaneous Audio-Visual Face Database of Affective and Mental States. IEEE Trans Affect Comput 8(3):300\u2013313. https:\/\/doi.org\/10.1109\/TAFFC.2016.2553038","journal-title":"IEEE Trans Affect Comput"},{"issue":"10","key":"16066_CR37","doi-asserted-by":"publisher","first-page":"3030","DOI":"10.1109\/TCSVT.2017.2719043","volume":"28","author":"S Zhang","year":"2018","unstructured":"Zhang S, Huang T, Gao W, Tian Q (2018) Learning Affective Features with a Hybrid Deep Model for Audio-Visual Emotion Recognition. IEEE Trans Circ Syst Video Technol 28(10):3030\u20133043. https:\/\/doi.org\/10.1109\/TCSVT.2017.2719043","journal-title":"IEEE Trans Circ Syst Video Technol"},{"key":"16066_CR38","doi-asserted-by":"publisher","first-page":"32297","DOI":"10.1109\/ACCESS.2019.2901521","volume":"7","author":"S Zhang","year":"2019","unstructured":"Zhang S, Pan X, Cui Y, Zhao X, Liu L (2019) Learning Affective Video Features for Facial Expression Recognition via Hybrid Deep Learning. IEEE Access 7:32297\u201332304. https:\/\/doi.org\/10.1109\/ACCESS.2019.2901521","journal-title":"IEEE Access"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16066-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-16066-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16066-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,10]],"date-time":"2024-01-10T09:46:56Z","timestamp":1704880016000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-16066-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,28]]},"references-count":38,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2024,1]]}},"alternative-id":["16066"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-16066-6","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,6,28]]},"assertion":[{"value":"13 September 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 May 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 June 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 June 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"There is no conflict of interest between authors.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}