{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:31:38Z","timestamp":1750221098433,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,12,18]],"date-time":"2018-12-18T00:00:00Z","timestamp":1545091200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,12,18]]},"DOI":"10.1145\/3293353.3293398","type":"proceedings-article","created":{"date-parts":[[2020,5,4]],"date-time":"2020-05-04T22:07:32Z","timestamp":1588630052000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Dynamic Gesture Recognition with Pose-based CNN Features derived from videos using LSTM"],"prefix":"10.1145","author":[{"given":"Kankana","family":"Roy","sequence":"first","affiliation":[{"name":"Computational Vision Laboratory, Department of Computer Science and Engineering, Indian Institute of Technology Kharagpur"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rajiv R.","family":"Sahay","sequence":"additional","affiliation":[{"name":"Computational Vision Laboratory, Department of Electrical Engineering, Indian Institute of Technology Kharagpur"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2020,5,3]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2016.10.006"},{"volume-title":"Proceedings of the International Conference on Multimodal Interaction. 461--466","author":"Bayer I.","key":"e_1_3_2_1_2_1","unstructured":"I. Bayer and T. Silbermann . 2013. A multi modal approach to gesture recognition from audio and video data . In Proceedings of the International Conference on Multimodal Interaction. 461--466 . I. Bayer and T. Silbermann. 2013. A multi modal approach to gesture recognition from audio and video data. In Proceedings of the International Conference on Multimodal Interaction. 461--466."},{"volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 3034--3042","author":"Bilen H.","key":"e_1_3_2_1_3_1","unstructured":"H. Bilen , B. Fernando , E. Gavves , A. Vedaldi , and S. Gould . 2016. Dynamic image networks for action recognition . In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 3034--3042 . H. Bilen, B. Fernando, E. Gavves, A. Vedaldi, and S. Gould. 2016. Dynamic image networks for action recognition. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 3034--3042."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.143"},{"volume-title":"Proceedings of the IEEE International Conference on Pattern Recognition. 49--54","author":"Camgoz N. C","key":"e_1_3_2_1_5_1","unstructured":"N. C Camgoz , S. Hadfield , O. Koller , and R. Bowden . 2016. Using convolutional 3D neural networks for user-independent continuous gesture recognition . In Proceedings of the IEEE International Conference on Pattern Recognition. 49--54 . N. C Camgoz, S. Hadfield, O. Koller, and R. Bowden. 2016. Using convolutional 3D neural networks for user-independent continuous gesture recognition. In Proceedings of the IEEE International Conference on Pattern Recognition. 49--54."},{"volume-title":"Proceedings of the IEEE International Conference on Computer Vision. 3763--3771","author":"Cao C.","key":"e_1_3_2_1_6_1","unstructured":"C. Cao , Y. Zhang , Y. Wu , H. Lu , and J. Cheng . 2017. Egocentric gesture recognition using recurrent 3D convolutional neural networks with spatiotemporal transformer modules . In Proceedings of the IEEE International Conference on Computer Vision. 3763--3771 . C. Cao, Y. Zhang, Y. Wu, H. Lu, and J. Cheng. 2017. Egocentric gesture recognition using recurrent 3D convolutional neural networks with spatiotemporal transformer modules. In Proceedings of the IEEE International Conference on Computer Vision. 3763--3771."},{"key":"e_1_3_2_1_7_1","volume-title":"Workshop at the European Conference on Computer Vision. 503--517","author":"Chang J.","year":"2014","unstructured":"J. Chang . 2014 . Nonparametric gesture labeling from multi-modal data . In Workshop at the European Conference on Computer Vision. 503--517 . J. Chang. 2014. Nonparametric gesture labeling from multi-modal data. In Workshop at the European Conference on Computer Vision. 503--517."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2699184"},{"volume-title":"Proceedings of the IEEE International Conference on Image Processing. 2881--2885","author":"Chen X.","key":"e_1_3_2_1_9_1","unstructured":"X. Chen , H. Guo , G. Wang , and L. Zhang . 2017. Motion feature augmented recurrent neural network for skeleton-based dynamic hand gesture recognition . In Proceedings of the IEEE International Conference on Image Processing. 2881--2885 . X. Chen, H. Guo, G. Wang, and L. Zhang. 2017. Motion feature augmented recurrent neural network for skeleton-based dynamic hand gesture recognition. In Proceedings of the IEEE International Conference on Image Processing. 2881--2885."},{"volume-title":"Proceedings of the IEEE International Conference on Pattern Recognition. IEEE, 411--416","author":"Chen X.","key":"e_1_3_2_1_10_1","unstructured":"X. Chen and M. Koskela . 2014. Using appearance-based hand features for dynamic RGB-D gesture recognition . In Proceedings of the IEEE International Conference on Pattern Recognition. IEEE, 411--416 . X. Chen and M. Koskela. 2014. Using appearance-based hand features for dynamic RGB-D gesture recognition. In Proceedings of the IEEE International Conference on Pattern Recognition. IEEE, 411--416."},{"volume-title":"Proceedings of the IEEE International Conference on Computer Vision. 3218--3226","author":"Ch\u00e9ron G.","key":"e_1_3_2_1_11_1","unstructured":"G. Ch\u00e9ron , I. Laptev , and C. Schmid . 2015. P-CNN: Pose-based cnn features for action recognition . In Proceedings of the IEEE International Conference on Computer Vision. 3218--3226 . G. Ch\u00e9ron, I. Laptev, and C. Schmid. 2015. P-CNN: Pose-based cnn features for action recognition. In Proceedings of the IEEE International Conference on Computer Vision. 3218--3226."},{"volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops. 1--9.","author":"Smedt Q. De","key":"e_1_3_2_1_12_1","unstructured":"Q. De Smedt , H. Wannous , and J. Vandeborre . 2016. Skeleton-based dynamic hand gesture recognition . In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops. 1--9. Q. De Smedt, H. Wannous, and J. Vandeborre. 2016. Skeleton-based dynamic hand gesture recognition. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops. 1--9."},{"volume-title":"Workshop at the European Conference on Computer Vision. 459--473","author":"Escalera S.","key":"e_1_3_2_1_13_1","unstructured":"S. Escalera , X. Bar\u00f3 , J. Gonzalez , M. A Bautista , M. Madadi , M. Reyes , V. Ponce-L\u00f3pez , H. J Escalante , J. Shotton , and I. Guyon . 2014. Chalearn looking at people challenge 2014: Dataset and results . In Workshop at the European Conference on Computer Vision. 459--473 . S. Escalera, X. Bar\u00f3, J. Gonzalez, M. A Bautista, M. Madadi, M. Reyes, V. Ponce-L\u00f3pez, H. J Escalante, J. Shotton, and I. Guyon. 2014. Chalearn looking at people challenge 2014: Dataset and results. In Workshop at the European Conference on Computer Vision. 459--473."},{"volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 580--587","author":"Girshick R.","key":"e_1_3_2_1_14_1","unstructured":"R. Girshick , J. Donahue , T. Darrell , and J. Malik . 2014. Rich feature hierarchies for accurate object detection and semantic segmentation . In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 580--587 . R. Girshick, J. Donahue, T. Darrell, and J. Malik. 2014. Rich feature hierarchies for accurate object detection and semantic segmentation. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 580--587."},{"volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 759--768","author":"Gkioxari G.","key":"e_1_3_2_1_15_1","unstructured":"G. Gkioxari and J. Malik . 2015. Finding action tubes . In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 759--768 . G. Gkioxari and J. Malik. 2015. Finding action tubes. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 759--768."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 1646--1654","author":"Kim J.","key":"e_1_3_2_1_17_1","unstructured":"J. Kim , J. Kwon Lee , and K. Mu Lee . 2016. Accurate image super-resolution using very deep convolutional networks . In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 1646--1654 . J. Kim, J. Kwon Lee, and K. Mu Lee. 2016. Accurate image super-resolution using very deep convolutional networks. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 1646--1654."},{"volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 1--8.","author":"Kim T.","key":"e_1_3_2_1_18_1","unstructured":"T. Kim , S. Wong , and R. Cipolla . 2007. Tensor canonical correlation analysis for action classification . In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 1--8. T. Kim, S. Wong, and R. Cipolla. 2007. Tensor canonical correlation analysis for action classification. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 1--8."},{"key":"e_1_3_2_1_19_1","unstructured":"A. Krizhevsky I. Sutskever and G. E Hinton. 2012. Imagenet classification with deep Convolutional Neural Networks. In Advances in Neural Information Processing Systems. 1097--1105.  A. Krizhevsky I. Sutskever and G. E Hinton. 2012. Imagenet classification with deep Convolutional Neural Networks. In Advances in Neural Information Processing Systems. 1097--1105."},{"volume-title":"Proceedings of the European Conference on Computer Vision. 740--755","author":"Lin T.","key":"e_1_3_2_1_20_1","unstructured":"T. Lin , M. Maire , S. Belongie , J. Hays , P. Perona , D. Ramanan , P. Doll\u00e1r , and C L. Zitnick . 2014. Microsoft coco: Common objects in context . In Proceedings of the European Conference on Computer Vision. 740--755 . T. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan, P. Doll\u00e1r, and C L. Zitnick. 2014. Microsoft coco: Common objects in context. In Proceedings of the European Conference on Computer Vision. 740--755."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2011.2181452"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-015-2451-6"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.360"},{"volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops. 1--7.","author":"Molchanov P.","key":"e_1_3_2_1_24_1","unstructured":"P. Molchanov , S. Gupta , K. Kim , and J. Kautz . 2015. Hand gesture recognition with 3D convolutional neural networks . In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops. 1--7. P. Molchanov, S. Gupta, K. Kim, and J. Kautz. 2015. Hand gesture recognition with 3D convolutional neural networks. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops. 1--7."},{"volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 4207--4215","author":"Molchanov P.","key":"e_1_3_2_1_25_1","unstructured":"P. Molchanov , X. Yang , S. Gupta , K. Kim , S. Tyree , and J. Kautz . 2016. Online detection and classification of dynamic hand gestures with recurrent 3D convolutional neural network . In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 4207--4215 . P. Molchanov, X. Yang, S. Gupta, K. Kim, S. Tyree, and J. Kautz. 2016. Online detection and classification of dynamic hand gestures with recurrent 3D convolutional neural network. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 4207--4215."},{"volume-title":"Workshop at the European Conference on Computer Vision. 491--502","author":"Monnier C.","key":"e_1_3_2_1_26_1","unstructured":"C. Monnier , S. German , and A. Ost . 2014. A multi-scale boosted detector for efficient and robust gesture recognition . In Workshop at the European Conference on Computer Vision. 491--502 . C. Monnier, S. German, and A. Ost. 2014. A multi-scale boosted detector for efficient and robust gesture recognition. In Workshop at the European Conference on Computer Vision. 491--502."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00549"},{"volume-title":"Proceedings of the IEEE International Conference on Computer Vision Workshops. 484--491","author":"Neverova N.","key":"e_1_3_2_1_28_1","unstructured":"N. Neverova , C. Wolf , G. Paci , G. Sommavilla , G. Taylor , and F. Nebout . 2013. A multi-scale approach to gesture detection and recognition . In Proceedings of the IEEE International Conference on Computer Vision Workshops. 484--491 . N. Neverova, C. Wolf, G. Paci, G. Sommavilla, G. Taylor, and F. Nebout. 2013. A multi-scale approach to gesture detection and recognition. In Proceedings of the IEEE International Conference on Computer Vision Workshops. 484--491."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2461544"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2014.2337331"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.29.41"},{"volume-title":"Proceedings of the IEEE International Conference on Image Processing. 1495--1499","author":"Pavlakos G.","key":"e_1_3_2_1_32_1","unstructured":"G. Pavlakos , S. Theodorakis , V. Pitsikalis , A. Katsamanis , and P. Maragos . 2014. Kinect-based multimodal gesture recognition using a two-pass fusion scheme . In Proceedings of the IEEE International Conference on Image Processing. 1495--1499 . G. Pavlakos, S. Theodorakis, V. Pitsikalis, A. Katsamanis, and P. Maragos. 2014. Kinect-based multimodal gesture recognition using a two-pass fusion scheme. In Proceedings of the IEEE International Conference on Image Processing. 1495--1499."},{"volume-title":"Workshop at the European Conference on Computer Vision. 518--527","author":"Peng X.","key":"e_1_3_2_1_33_1","unstructured":"X. Peng , L. Wang , Z. Cai , and Y. Qiao . 2014. Action and gesture temporal spotting with super vector representation . In Workshop at the European Conference on Computer Vision. 518--527 . X. Peng, L. Wang, Z. Cai, and Y. Qiao. 2014. Action and gesture temporal spotting with super vector representation. In Workshop at the European Conference on Computer Vision. 518--527."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0957-7"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.5555\/2789272.2789281"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2013.6475006"},{"key":"e_1_3_2_1_38_1","unstructured":"K. Soomro A. R Zamir and M. Shah. 2012. UCF101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012).  K. Soomro A. R Zamir and M. Shah. 2012. UCF101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)."},{"volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 1--9.","author":"Szegedy C.","key":"e_1_3_2_1_39_1","unstructured":"C. Szegedy , W. Liu , Y. Jia , P. Sermanet , S. Reed , D. Anguelov , D. Erhan , V. Vanhoucke , and A. Rabinovich . 2015. Going deeper with convolutions . In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 1--9. C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich. 2015. Going deeper with convolutions. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 1--9."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2712608"},{"volume-title":"Proceedings of the IEEE International Conference on Computer Vision. 3129--3137","author":"Wang H.","key":"e_1_3_2_1_41_1","unstructured":"H. Wang , P. Wang , Z. Song , and W. Li . 2017. Large-scale multimodal gesture recognition using heterogeneous networks . In Proceedings of the IEEE International Conference on Computer Vision. 3129--3137 . H. Wang, P. Wang, Z. Song, and W. Li. 2017. Large-scale multimodal gesture recognition using heterogeneous networks. In Proceedings of the IEEE International Conference on Computer Vision. 3129--3137."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2537340"},{"volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 5525--5533","author":"Yang S.","key":"e_1_3_2_1_43_1","unstructured":"S. Yang , P. Luo , C. Loy , and X. Tang . 2016. Wider face: A face detection benchmark . In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 5525--5533 . S. Yang, P. Luo, C. Loy, and X. Tang. 2016. Wider face: A face detection benchmark. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 5525--5533."},{"volume-title":"Proceedings of the IEEE International Conference on Computer Vision. 3120--3128","author":"Zhang L.","key":"e_1_3_2_1_44_1","unstructured":"L. Zhang , G. Zhu , P. Shen , J. Song , S. A Shah , and M. Bennamoun . 2017. Learning spatiotemporal features using 3D CNN and convolutional lstm for gesture recognition . In Proceedings of the IEEE International Conference on Computer Vision. 3120--3128 . L. Zhang, G. Zhu, P. Shen, J. Song, S. A Shah, and M. Bennamoun. 2017. Learning spatiotemporal features using 3D CNN and convolutional lstm for gesture recognition. In Proceedings of the IEEE International Conference on Computer Vision. 3120--3128."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2017.2684186"}],"event":{"name":"ICVGIP 2018: 11th Indian Conference on Computer Vision, Graphics and Image Processing","acronym":"ICVGIP 2018","location":"Hyderabad India"},"container-title":["Proceedings of the 11th Indian Conference on Computer Vision, Graphics and Image Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3293353.3293398","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3293353.3293398","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T00:58:08Z","timestamp":1750208288000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3293353.3293398"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12,18]]},"references-count":45,"alternative-id":["10.1145\/3293353.3293398","10.1145\/3293353"],"URL":"https:\/\/doi.org\/10.1145\/3293353.3293398","relation":{},"subject":[],"published":{"date-parts":[[2018,12,18]]},"assertion":[{"value":"2020-05-03","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}