{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T04:53:15Z","timestamp":1726030395783},"publisher-location":"Cham","reference-count":48,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030208899"},{"type":"electronic","value":"9783030208905"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-20890-5_12","type":"book-chapter","created":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T11:18:34Z","timestamp":1559387914000},"page":"179-195","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["SMC: Single-Stage Multi-location Convolutional Network for Temporal Action Detection"],"prefix":"10.1007","author":[{"given":"Zhikang","family":"Liu","sequence":"first","affiliation":[]},{"given":"Zilei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yan","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Ye","family":"Tian","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,6,2]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Caba Heilbron, F., Carlos Niebles, J., Ghanem, B.: Fast temporal activity proposals for efficient detection of human actions in untrimmed videos. In: CVPR (2016)","key":"12_CR1","DOI":"10.1109\/CVPR.2016.211"},{"doi-asserted-by":"crossref","unstructured":"Shou, Z., Wang, D., Chang, S.F.: Temporal action localization in untrimmed videos via multi-stage CNNs. In: CVPR (2016)","key":"12_CR2","DOI":"10.1109\/CVPR.2016.119"},{"key":"12_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"768","DOI":"10.1007\/978-3-319-46487-9_47","volume-title":"Computer Vision \u2013 ECCV 2016","author":"V Escorcia","year":"2016","unstructured":"Escorcia, V., Caba Heilbron, F., Niebles, J.C., Ghanem, B.: DAPs: deep action proposals for action understanding. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9907, pp. 768\u2013784. Springer, Cham (2016). \n                      https:\/\/doi.org\/10.1007\/978-3-319-46487-9_47"},{"unstructured":"Oneata, D., Verbeek, J., Schmid, C.: The LEAR submission at Thumos 2014 (2014)","key":"12_CR4"},{"doi-asserted-by":"crossref","unstructured":"Yuan, J., Ni, B., Yang, X., Kassim, A.A.: Temporal action localization with pyramid of score distribution features. In: CVPR (2016)","key":"12_CR5","DOI":"10.1109\/CVPR.2016.337"},{"doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: unified, real-time object detection. In: CVPR (2016)","key":"12_CR6","DOI":"10.1109\/CVPR.2016.91"},{"doi-asserted-by":"crossref","unstructured":"Redmon, J., Farhadi, A.: Yolo9000: better, faster, stronger. In: CVPR (2017)","key":"12_CR7","DOI":"10.1109\/CVPR.2017.690"},{"key":"12_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-319-46448-0_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"W Liu","year":"2016","unstructured":"Liu, W., et al.: SSD: single shot MultiBox detector. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 21\u201337. Springer, Cham (2016). \n                      https:\/\/doi.org\/10.1007\/978-3-319-46448-0_2"},{"doi-asserted-by":"crossref","unstructured":"Herath, S., Harandi, M., Porikli, F.: Going deeper into action recognition: a survey. Image Vis. Comput. (2017)","key":"12_CR9","DOI":"10.1016\/j.imavis.2017.01.010"},{"doi-asserted-by":"crossref","unstructured":"Ji, S., Xu, W., Yang, M., Yu, K.: 3D convolutional neural networks for human action recognition. PAMI (2013)","key":"12_CR10","DOI":"10.1109\/TPAMI.2012.59"},{"unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: NIPS (2014)","key":"12_CR11"},{"doi-asserted-by":"crossref","unstructured":"Yue-Hei Ng, J., Hausknecht, M., Vijayanarasimhan, S., Vinyals, O., Monga, R., Toderici, G.: Beyond short snippets: deep networks for video classification. In: CVPR (2015)","key":"12_CR12","DOI":"10.1109\/CVPR.2015.7299101"},{"doi-asserted-by":"crossref","unstructured":"Zhu, Y., Long, Y., Guan, Y., Newsam, S., Shao, L.: Towards universal representation for unseen action recognition. In: CVPR (2018)","key":"12_CR13","DOI":"10.1109\/CVPR.2018.00983"},{"doi-asserted-by":"crossref","unstructured":"Laptev, I.: On space-time interest points. IJCV (2005)","key":"12_CR14","DOI":"10.1007\/s11263-005-1838-7"},{"doi-asserted-by":"crossref","unstructured":"Klaser, A., Marsza\u0142ek, M., Schmid, C.: A spatio-temporal descriptor based on 3D-gradients. In: BMVC(2008)","key":"12_CR15","DOI":"10.5244\/C.22.99"},{"doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3D convolutional networks. In: CVPR (2015)","key":"12_CR16","DOI":"10.1109\/ICCV.2015.510"},{"doi-asserted-by":"crossref","unstructured":"Karpathy, A., Toderici, G., Shetty, S., Leung, T., Sukthankar, R., Fei-Fei, L.: Large-scale video classification with convolutional neural networks. In: CVPR (2014)","key":"12_CR17","DOI":"10.1109\/CVPR.2014.223"},{"doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Wildes, R.: Spatiotemporal residual networks for video action recognition. In: NIPS (2016)","key":"12_CR18","DOI":"10.1109\/CVPR.2017.787"},{"unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. In: ICLR (2014)","key":"12_CR19"},{"doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR (2016)","key":"12_CR20","DOI":"10.1109\/CVPR.2016.90"},{"key":"12_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1007\/978-3-642-25446-8_4","volume-title":"Human Behavior Understanding","author":"M Baccouche","year":"2011","unstructured":"Baccouche, M., Mamalet, F., Wolf, C., Garcia, C., Baskurt, A.: Sequential deep learning for human action recognition. In: Salah, A.A., Lepri, B. (eds.) HBU 2011. LNCS, vol. 7065, pp. 29\u201339. Springer, Heidelberg (2011). \n                      https:\/\/doi.org\/10.1007\/978-3-642-25446-8_4"},{"doi-asserted-by":"crossref","unstructured":"Donahue, J., et al.: Long-term recurrent convolutional networks for visual recognition and description. In: CVPR (2015)","key":"12_CR22","DOI":"10.21236\/ADA623249"},{"doi-asserted-by":"crossref","unstructured":"Gaidon, A., Harchaoui, Z., Schmid, C.: Actom sequence models for efficient action detection. In: CVPR (2011)","key":"12_CR23","DOI":"10.1109\/CVPR.2011.5995646"},{"doi-asserted-by":"crossref","unstructured":"Jain, M., van Gemert, J.C., Snoek, C.G.: What do 15,000 object categories tell us about classifying and localizing actions? In: CVPR (2015)","key":"12_CR24","DOI":"10.1109\/CVPR.2015.7298599"},{"doi-asserted-by":"crossref","unstructured":"Oneata, D., Verbeek, J., Schmid, C.: Efficient action localization with approximately normalized fisher vectors. In: CVPR (2014)","key":"12_CR25","DOI":"10.1109\/CVPR.2014.326"},{"doi-asserted-by":"crossref","unstructured":"Shou, Z., Chan, J., Zareian, A., Miyazawa, K., Chang, S.F.: CDC: convolutional-de-convolutional networks for precise temporal action localization in untrimmed videos. In: CVPR (2017)","key":"12_CR26","DOI":"10.1109\/CVPR.2017.155"},{"doi-asserted-by":"crossref","unstructured":"Dai, X., Singh, B., Zhang, G., Davis, L.S., Chen, Y.Q.: Temporal context network for activity localization in videos. In: ICCV (2017)","key":"12_CR27","DOI":"10.1109\/ICCV.2017.610"},{"doi-asserted-by":"crossref","unstructured":"Gao, J., Yang, Z., Nevatia, R.: Cascaded boundary regression for temporal action detection. In: BMVC (2017)","key":"12_CR28","DOI":"10.5244\/C.31.52"},{"doi-asserted-by":"crossref","unstructured":"Gao, J., Yang, Z., Sun, C., Chen, K., Nevatia, R.: TURN TAP: temporal unit regression network for temporal action proposals. In: ICCV (2017)","key":"12_CR29","DOI":"10.1109\/ICCV.2017.392"},{"unstructured":"Yeung, S., Russakovsky, O., Jin, N., Andriluka, M., Mori, G., Fei-Fei, L.: Every moment counts: dense detailed labeling of actions in complex videos. IJCV (2015)","key":"12_CR30"},{"doi-asserted-by":"crossref","unstructured":"Yeung, S., Russakovsky, O., Mori, G., Fei-Fei, L.: End-to-end learning of action detection from frame glimpses in videos. In: CVPR (2016)","key":"12_CR31","DOI":"10.1109\/CVPR.2016.293"},{"doi-asserted-by":"crossref","unstructured":"Singh, B., Marks, T.K., Jones, M., Tuzel, O., Shao, M.: A multi-stream bi-directional recurrent neural network for fine-grained action detection. In: CVPR (2016)","key":"12_CR32","DOI":"10.1109\/CVPR.2016.216"},{"doi-asserted-by":"crossref","unstructured":"Xu, H., Das, A., Saenko, K.: R-C3D: region convolutional 3D network for temporal activity detection. In: ICCV (2017)","key":"12_CR33","DOI":"10.1109\/ICCV.2017.617"},{"doi-asserted-by":"crossref","unstructured":"Zhu, Y., Newsam, S.: Efficient action detection in untrimmed videos via multi-task learning. In: WACV (2017)","key":"12_CR34","DOI":"10.1109\/WACV.2017.29"},{"doi-asserted-by":"crossref","unstructured":"Lin, T., Zhao, X., Shou, Z.: Single shot temporal action detection. In: ACMMM (2017)","key":"12_CR35","DOI":"10.1145\/3123266.3123343"},{"doi-asserted-by":"crossref","unstructured":"Buch, S., Escorcia, V., Ghanem, B., Fei-Fei, L., Niebles, J.C.: End-to-end, single-stream temporal action detection in untrimmed videos. In: BMVC (2017)","key":"12_CR36","DOI":"10.5244\/C.31.93"},{"doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: CVPR (2014)","key":"12_CR37","DOI":"10.1109\/CVPR.2014.81"},{"doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast R-CNN. In: ICCV (2015)","key":"12_CR38","DOI":"10.1109\/ICCV.2015.169"},{"unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: NIPS (2015)","key":"12_CR39"},{"unstructured":"Glorot, X., Bordes, A., Bengio, Y.: Deep sparse rectifier neural networks. In: ICAIS (2011)","key":"12_CR40"},{"unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., Torralba, A.: Object detectors emerge in deep scene CNNs. In: ICLR (2015)","key":"12_CR41"},{"unstructured":"Jiang, Y., et al.: THUMOS challenge: action recognition with a large number of classes (2014). \n                      http:\/\/crcv.ucf.edu\/THUMOS14\/","key":"12_CR42"},{"unstructured":"MEXaction2 (2015). \n                      http:\/\/mexculture.cnam.fr\/xwiki\/bin\/view\/Datasets\/Mex+action+dataset","key":"12_CR43"},{"key":"12_CR44","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1007\/978-3-7908-2604-3_16","volume-title":"COMPSTAT 2010","author":"L Bottou","year":"2010","unstructured":"Bottou, L.: Large-scale machine learning with stochastic gradient descent. In: Lechevallier, Y., Saporta, G. (eds.) COMPSTAT 2010, pp. 177\u2013186. Springer, Heidelberg (2010). \n                      https:\/\/doi.org\/10.1007\/978-3-7908-2604-3_16"},{"unstructured":"Soomro, K., Zamir, A.R., Shah, M.: UCF101: a dataset of 101 human actions classes from videos in the wild. CRCV-TR-12-01 (2012)","key":"12_CR45"},{"doi-asserted-by":"crossref","unstructured":"Jia, Y., et al.: Caffe: convolutional architecture for fast feature embedding. In: ACMMM (2014)","key":"12_CR46","DOI":"10.1145\/2647868.2654889"},{"unstructured":"Wang, L., Qiao, Y., Tang, X.: Action recognition and detection by combining motion and appearance features. In: THUMOS14 Action Recognition Challenge (2014)","key":"12_CR47"},{"doi-asserted-by":"crossref","unstructured":"Buch, S., Escorcia, V., Shen, C., Ghanem, B., Niebles, J.C.: SST: single-stream temporal action proposals. In: CVPR (2017)","key":"12_CR48","DOI":"10.1109\/CVPR.2017.675"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2018"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-20890-5_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,1]],"date-time":"2019-06-01T11:23:07Z","timestamp":1559388187000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-20890-5_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030208899","9783030208905"],"references-count":48,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-20890-5_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"2 June 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Perth, WA","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 December 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/accv2018.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"979","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"274","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"2.7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}}]}}