{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T14:04:46Z","timestamp":1762005886311,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030007638"},{"type":"electronic","value":"9783030007645"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-00764-5_17","type":"book-chapter","created":{"date-parts":[[2018,9,17]],"date-time":"2018-09-17T16:21:31Z","timestamp":1537201291000},"page":"178-187","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Rethinking Fusion Baselines for Multi-modal Human Action Recognition"],"prefix":"10.1007","author":[{"given":"Hongda","family":"Jiang","sequence":"first","affiliation":[]},{"given":"Yanghao","family":"Li","sequence":"additional","affiliation":[]},{"given":"Sijie","family":"Song","sequence":"additional","affiliation":[]},{"given":"Jiaying","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,9,18]]},"reference":[{"key":"17_CR1","doi-asserted-by":"crossref","unstructured":"Ch\u00e9ron, G., Laptev, I., Schmid, C.: P-CNN: pose-based CNN features for action recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3218\u20133226 (2015)","DOI":"10.1109\/ICCV.2015.368"},{"key":"17_CR2","doi-asserted-by":"publisher","first-page":"428","DOI":"10.1007\/11744047_33","volume-title":"Computer Vision \u2013 ECCV 2006","author":"Navneet Dalal","year":"2006","unstructured":"Dalal, N., Triggs, B., Schmid, C.: Human detection using oriented histograms of flow and appearance. In: Proceedings of European Conference on Computer Vision, pp. 428\u2013441 (2006)"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Donahue, J., et al.: Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of IEEE International Conference on Computer Vision and Pattern Recognition, pp. 2625\u20132634 (2015)","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"17_CR4","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Wildes, R.: Spatiotemporal residual networks for video action recognition. In: Proceedings of Advances in Neural Information Processing Systems, pp. 3468\u20133476 (2016)","DOI":"10.1109\/CVPR.2017.787"},{"key":"17_CR5","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Wildes, R.P.: Spatiotemporal multiplier networks for video action recognition. In: Proceedings of IEEE International Conference on Computer Vision and Pattern Recognition, pp. 7445\u20137454 (2017)","DOI":"10.1109\/CVPR.2017.787"},{"issue":"10","key":"17_CR6","doi-asserted-by":"publisher","first-page":"2451","DOI":"10.1162\/089976600300015015","volume":"12","author":"FA Gers","year":"2000","unstructured":"Gers, F.A., Schmidhuber, J., Cummins, F.: Learning to forget: continual prediction with LSTM. Neural Comput. 12(10), 2451\u20132471 (2000)","journal-title":"Neural Comput."},{"key":"17_CR7","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of IEEE International Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"1","key":"17_CR8","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2013","unstructured":"Ji, S., Xu, W., Yang, M., Yu, K.: 3D convolutional neural networks for human action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 35(1), 221\u2013231 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"17_CR9","doi-asserted-by":"crossref","unstructured":"Jia, Y., et al.: Caffe: convolutional architecture for fast feature embedding. In: Proceedings of ACM International Conference on Multimedia, pp. 675\u2013678 (2014)","DOI":"10.1145\/2647868.2654889"},{"key":"17_CR10","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1145\/882262.882270","volume":"22","author":"SB Kang","year":"2003","unstructured":"Kang, S.B., Uyttendaele, M., Winder, S., Szeliski, R.: High dynamic range video. ACM Trans. Graph. 22, 319\u2013325 (2003)","journal-title":"ACM Trans. Graph."},{"key":"17_CR11","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Proceedings of Advances in Neural Information Processing Systems, pp. 1097\u20131105 (2012)"},{"key":"17_CR12","doi-asserted-by":"crossref","unstructured":"Laptev, I., Marszalek, M., Schmid, C., Rozenfeld, B.: Learning realistic human actions from movies. In: Proceedings of IEEE International Conference on Computer Vision and Pattern Recognition, pp. 1\u20138 (2008)","DOI":"10.1109\/CVPR.2008.4587756"},{"issue":"7553","key":"17_CR13","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436 (2015)","journal-title":"Nature"},{"key":"17_CR14","doi-asserted-by":"crossref","unstructured":"Liu, J., Li, Y., Song, S., Xing, J., Lan, C., Zeng, W.: Multi-modality multi-task recurrent neural network for online action detection. IEEE Trans. Circ. Syst. Video Technol. (2018)","DOI":"10.1109\/TCSVT.2018.2799968"},{"key":"17_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"816","DOI":"10.1007\/978-3-319-46487-9_50","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Liu","year":"2016","unstructured":"Liu, J., Shahroudy, A., Xu, D., Wang, G.: Spatio-temporal LSTM with trust gates for 3D human action recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9907, pp. 816\u2013833. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46487-9_50"},{"key":"17_CR16","doi-asserted-by":"crossref","unstructured":"Shahroudy, A., Liu, J., Ng, T.T., Wang, G.: NTU RGB+D: a large scale dataset for 3D human activity analysis. In: Proceedings of IEEE International Conference on Computer Vision and Pattern Recognition, pp. 1010\u20131019 (2016)","DOI":"10.1109\/CVPR.2016.115"},{"key":"17_CR17","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: Proceedings of Advances in Neural Information Processing Systems, pp. 568\u2013576 (2014)"},{"key":"17_CR18","doi-asserted-by":"crossref","unstructured":"Song, S., Lan, C., Xing, J., Zeng, W., Liu, J.: An end-to-end spatio-temporal attention model for human action recognition from skeleton data. In: AAAI, vol. 1, p. 7 (2017)","DOI":"10.1609\/aaai.v31i1.11212"},{"issue":"7","key":"17_CR19","doi-asserted-by":"publisher","first-page":"3459","DOI":"10.1109\/TIP.2018.2818328","volume":"27","author":"S Song","year":"2018","unstructured":"Song, S., Lan, C., Xing, J., Zeng, W., Liu, J.: Spatio-temporal attention-based LSTM networks for 3D action recognition and detection. IEEE Trans. Image Process. 27(7), 3459\u20133471 (2018)","journal-title":"IEEE Trans. Image Process."},{"key":"17_CR20","unstructured":"Srivastava, N., Mansimov, E., Salakhudinov, R.: Unsupervised learning of video representations using LSTMs. In: Proceedings of International Conference on Machine Learning, pp. 843\u2013852 (2015)"},{"key":"17_CR21","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3D convolutional networks. In: Proceedings of IEEE International Conference on Computer Vision, pp. 4489\u20134497 (2015)","DOI":"10.1109\/ICCV.2015.510"},{"key":"17_CR22","unstructured":"Wang, H., Kl\u00e4ser, A., Schmid, C., Liu, C.L.: Action recognition by dense trajectories. In: Proceedings of IEEE International Conference on Computer Vision and Pattern Recognition, pp. 3169\u20133176 (2011)"},{"key":"17_CR23","doi-asserted-by":"crossref","unstructured":"Wang, H., Schmid, C.: Action recognition with improved trajectories. In: Proceedings of IEEE International Conference on Computer Vision, pp. 3551\u20133558 (2013)","DOI":"10.1109\/ICCV.2013.441"},{"key":"17_CR24","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/978-3-319-46484-8_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"Limin Wang","year":"2016","unstructured":"Wang, L., et al.: Temporal segment networks: towards good practices for deep action recognition. In: Proceedings of European Conference on Computer Vision, pp. 20\u201336 (2016)"},{"key":"17_CR25","doi-asserted-by":"crossref","unstructured":"Zhang, P., Lan, C., Xing, J., Zeng, W., Xue, J., Zheng, N.: View adaptive recurrent neural networks for high performance human action recognition from skeleton data. In: Proceedings of IEEE International Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132126 (2017)","DOI":"10.1109\/ICCV.2017.233"},{"key":"17_CR26","doi-asserted-by":"crossref","unstructured":"Zolfaghari, M., Oliveira, G.L., Sedaghat, N., Brox, T.: Chained multi-stream networks exploiting pose, motion, and appearance for action classification and detection. In: Proceedings of IEEE International Conference on Computer Vision, pp. 2923\u20132932 (2017)","DOI":"10.1109\/ICCV.2017.316"}],"container-title":["Lecture Notes in Computer Science","Advances in Multimedia Information Processing \u2013 PCM 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-00764-5_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T12:09:11Z","timestamp":1710331751000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-00764-5_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030007638","9783030007645"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-00764-5_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"18 September 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PCM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Pacific Rim Conference on Multimedia","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hefei","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pcm2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/pcm2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}