{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T04:17:28Z","timestamp":1743135448632,"version":"3.40.3"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030208752"},{"type":"electronic","value":"9783030208769"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-20876-9_27","type":"book-chapter","created":{"date-parts":[[2019,5,25]],"date-time":"2019-05-25T10:03:53Z","timestamp":1558778633000},"page":"420-437","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["TraMNet - Transition Matrix Network for Efficient Action Tube Proposals"],"prefix":"10.1007","author":[{"given":"Gurkirt","family":"Singh","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Suman","family":"Saha","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fabio","family":"Cuzzolin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,5,26]]},"reference":[{"key":"27_CR1","doi-asserted-by":"crossref","unstructured":"Saha, S., Singh, G., Cuzzolin, F.: AMTnet: action-micro-tube regression by end-to-end trainable deep architecture. In: IEEE International Conference on Computer Vision (2017)","DOI":"10.1109\/ICCV.2017.473"},{"key":"27_CR2","doi-asserted-by":"crossref","unstructured":"Kalogeiton, V., Weinzaepfel, P., Ferrari, V., Schmid, C.: Action tubelet detector for spatio-temporal action localization. In: IEEE International Conference on Computer Vision (2017)","DOI":"10.1109\/ICCV.2017.472"},{"key":"27_CR3","doi-asserted-by":"crossref","unstructured":"Hou, R., Chen, C., Shah, M.: Tube convolutional neural network (T-CNN) for action detection in videos. In: IEEE International Conference on Computer Vision (2017)","DOI":"10.1109\/ICCV.2017.620"},{"key":"27_CR4","doi-asserted-by":"crossref","unstructured":"Gkioxari, G., Malik, J.: Finding action tubes. In: IEEE International Conference on Computer Vision and Pattern Recognition (2015)","DOI":"10.1109\/CVPR.2015.7298676"},{"key":"27_CR5","doi-asserted-by":"crossref","unstructured":"Weinzaepfel, P., Harchaoui, Z., Schmid, C.: Learning to track for spatio-temporal action localization. In: IEEE International Conference on Computer Vision and Pattern Recognition (2015)","DOI":"10.1109\/ICCV.2015.362"},{"key":"27_CR6","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrel, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: IEEE International Conference on Computer Vision and Pattern Recognition (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"27_CR7","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, pp. 91\u201399 (2015)"},{"key":"27_CR8","doi-asserted-by":"crossref","unstructured":"Liu, W., et al.: SSD: Single shot multibox detector. arXiv preprint \n                      arXiv:1512.02325\n                      \n                     (2015)","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"27_CR9","doi-asserted-by":"crossref","unstructured":"Saha, S., Singh, G., Sapienza, M., Torr, P.H.S., Cuzzolin, F.: Deep learning for detecting multiple space-time action tubes in videos. In: British Machine Vision Conference (2016)","DOI":"10.5244\/C.30.58"},{"key":"27_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"744","DOI":"10.1007\/978-3-319-46493-0_45","volume-title":"Computer Vision \u2013 ECCV 2016","author":"X Peng","year":"2016","unstructured":"Peng, X., Schmid, C.: Multi-region two-stream R-CNN for action detection. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 744\u2013759. Springer, Cham (2016). \n                      https:\/\/doi.org\/10.1007\/978-3-319-46493-0_45"},{"key":"27_CR11","doi-asserted-by":"crossref","unstructured":"Singh, G., Saha, S., Sapienza, M., Torr, P., Cuzzolin, F.: Online real-time multiple spatiotemporal action localisation and prediction. In: IEEE International Conference on Computer Vision (2017)","DOI":"10.1109\/ICCV.2017.393"},{"key":"27_CR12","unstructured":"Soomro, K., Zamir, A.R., Shah, M.: UCF101: a dataset of 101 human action classes from videos in the wild. Technical report, CRCV-TR-12-01 (2012)"},{"key":"27_CR13","unstructured":"Weinzaepfel, P., Martin, X., Schmid, C.: Human action localization with sparse spatial supervision. arXiv preprint \n                      arXiv:1605.05197\n                      \n                     (2016)"},{"key":"27_CR14","doi-asserted-by":"crossref","unstructured":"Gu, C., et al.: AVA: a video dataset of spatio-temporally localized atomic visual actions. arXiv preprint \n                      arXiv:1705.08421\n                      \n                     (2017)","DOI":"10.1109\/CVPR.2018.00633"},{"key":"27_CR15","unstructured":"Kay, W., et al.: The kinetics human action video dataset. arXiv preprint \n                      arXiv:1705.06950\n                      \n                     (2017)"},{"key":"27_CR16","doi-asserted-by":"publisher","DOI":"10.1007\/978-0-387-84854-9","volume-title":"Hidden Markov Models: Estimation and Control","author":"RJ Elliott","year":"2008","unstructured":"Elliott, R.J., Aggoun, L., Moore, J.B.: Hidden Markov Models: Estimation and Control, vol. 29. Springer, Heidelberg (2008). \n                      https:\/\/doi.org\/10.1007\/978-0-387-84854-9"},{"key":"27_CR17","doi-asserted-by":"crossref","unstructured":"van Gemert, J.C., Jain, M., Gati, E., Snoek, C.G.: APT: action localization proposals from dense trajectories. In: BMVC, vol. 2, p. 4 (2015)","DOI":"10.5244\/C.29.177"},{"key":"27_CR18","doi-asserted-by":"crossref","unstructured":"Oneata, D., Verbeek, J., Schmid, C.: Efficient action localization with approximately normalized fisher vectors. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2545\u20132552 (2014)","DOI":"10.1109\/CVPR.2014.326"},{"key":"27_CR19","doi-asserted-by":"crossref","unstructured":"Jain, M., Van Gemert, J., J\u00e9gou, H., Bouthemy, P., Snoek, C.G.: Action localization with tubelets from motion. In: 2014 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 740\u2013747. IEEE (2014)","DOI":"10.1109\/CVPR.2014.100"},{"key":"27_CR20","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1007\/s11263-013-0662-8","volume":"110","author":"M Sapienza","year":"2014","unstructured":"Sapienza, M., Cuzzolin, F., Torr, P.H.: Learning discriminative space-time action parts from weakly labelled videos. Int. J. Comput. Vis. 110, 30\u201347 (2014)","journal-title":"Int. J. Comput. Vis."},{"key":"27_CR21","doi-asserted-by":"crossref","unstructured":"Sultani, W., Shah, M.: What if we do not have multiple videos of the same action? - video action localization using web images. In: IEEE International Conference on Computer Vision and Pattern Recognition (2016)","DOI":"10.1109\/CVPR.2016.122"},{"key":"27_CR22","doi-asserted-by":"crossref","unstructured":"Wang, H., Kl\u00e4ser, A., Schmid, C., Liu, C.: Action recognition by dense trajectories. In: IEEE International Conference on Computer Vision and Pattern Recognition (2011)","DOI":"10.1109\/CVPR.2011.5995407"},{"key":"27_CR23","doi-asserted-by":"crossref","unstructured":"Redmon, J., Farhadi, A.: Yolo9000: better, faster, stronger. arXiv preprint \n                      arXiv:1612.08242\n                      \n                     (2016)","DOI":"10.1109\/CVPR.2017.690"},{"key":"27_CR24","unstructured":"Weinzaepfel, P., Martin, X., Schmid, C.: Towards weakly-supervised action localization. arXiv preprint \n                      arXiv:1605.05197\n                      \n                     (2016)"},{"key":"27_CR25","doi-asserted-by":"crossref","unstructured":"Zolfaghari, M., Oliveira, G.L., Sedaghat, N., Brox, T.: Chained multi-stream networks exploiting pose, motion, and appearance for action classification and detection. In: IEEE International Conference on Computer Vision, pp. 2923\u20132932. IEEE (2017)","DOI":"10.1109\/ICCV.2017.316"},{"key":"27_CR26","doi-asserted-by":"crossref","unstructured":"Yang, Z., Gao, J., Nevatia, R.: Spatio-temporal action detection with cascade proposal and location anticipation. In: BMVC (2017)","DOI":"10.5244\/C.31.95"},{"key":"27_CR27","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4724\u20134733. IEEE (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"27_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"595","DOI":"10.1007\/978-3-319-16178-5_42","volume-title":"Computer Vision - ECCV 2014 Workshops","author":"GD Evangelidis","year":"2015","unstructured":"Evangelidis, G.D., Singh, G., Horaud, R.: Continuous gesture recognition from articulated poses. In: Agapito, L., Bronstein, M.M., Rother, C. (eds.) ECCV 2014. LNCS, vol. 8925, pp. 595\u2013607. Springer, Cham (2015). \n                      https:\/\/doi.org\/10.1007\/978-3-319-16178-5_42"},{"key":"27_CR29","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. arXiv preprint \n                      arXiv:1708.02002\n                      \n                     (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"27_CR30","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"27_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.cviu.2016.10.018","volume":"155","author":"H Idrees","year":"2017","unstructured":"Idrees, H., et al.: The THUMOS challenge on action recognition for videos \u201cin the wild\u201d. Comput. Vis. Image Underst. 155, 1\u201323 (2017)","journal-title":"Comput. Vis. Image Underst."},{"key":"27_CR32","unstructured":"Gorban, A., et al.: Thumos challenge: action recognition with a large number of classes (2015)"},{"key":"27_CR33","doi-asserted-by":"crossref","unstructured":"Caba Heilbron, F., Escorcia, V., Ghanem, B., Carlos Niebles, J.: ActivityNet: a large-scale video benchmark for human activity understanding. In: IEEE International Conference on Computer Vision and Pattern Recognition, pp. 961\u2013970 (2015)","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"27_CR34","doi-asserted-by":"crossref","unstructured":"Yu, G., Yuan, J.: Fast action proposals for human action detection and search. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1302\u20131311 (2015)","DOI":"10.1109\/CVPR.2015.7298735"},{"key":"27_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1007\/978-3-540-24673-2_3","volume-title":"Computer Vision - ECCV 2004","author":"T Brox","year":"2004","unstructured":"Brox, T., Bruhn, A., Papenberg, N., Weickert, J.: High accuracy optical flow estimation based on a theory for warping. In: Pajdla, T., Matas, J. (eds.) ECCV 2004. LNCS, vol. 3024, pp. 25\u201336. Springer, Heidelberg (2004). \n                      https:\/\/doi.org\/10.1007\/978-3-540-24673-2_3"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2018"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-20876-9_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,25]],"date-time":"2019-05-25T19:06:29Z","timestamp":1558811189000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-20876-9_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030208752","9783030208769"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-20876-9_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"26 May 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Perth, WA","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Australia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 December 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 December 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/accv2018.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"979","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"274","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"2.7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}}]}}