{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,27]],"date-time":"2025-12-27T21:15:06Z","timestamp":1766870106068,"version":"3.40.3"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030585679"},{"type":"electronic","value":"9783030585686"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58568-6_10","type":"book-chapter","created":{"date-parts":[[2020,11,12]],"date-time":"2020-11-12T14:04:57Z","timestamp":1605189897000},"page":"160-177","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":27,"title":["Video Object Detection via Object-Level Temporal Aggregation"],"prefix":"10.1007","author":[{"given":"Chun-Han","family":"Yao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chen","family":"Fang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaohui","family":"Shen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yangyue","family":"Wan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ming-Hsuan","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,11,13]]},"reference":[{"key":"10_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"850","DOI":"10.1007\/978-3-319-48881-3_56","volume-title":"Computer Vision \u2013 ECCV 2016 Workshops","author":"L Bertinetto","year":"2016","unstructured":"Bertinetto, L., Valmadre, J., Henriques, J.F., Vedaldi, A., Torr, P.H.S.: Fully-convolutional siamese networks for object tracking. In: Hua, G., J\u00e9gou, H. (eds.) ECCV 2016. LNCS, vol. 9914, pp. 850\u2013865. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-48881-3_56"},{"key":"10_CR2","doi-asserted-by":"crossref","unstructured":"Chen, K., et al.: Optimizing video object detection via a scale-time lattice. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7814\u20137823 (2018)","DOI":"10.1109\/CVPR.2018.00815"},{"key":"10_CR3","doi-asserted-by":"crossref","unstructured":"Dosovitskiy, A., et al.: Flownet: learning optical flow with convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2758\u20132766 (2015)","DOI":"10.1109\/ICCV.2015.316"},{"key":"10_CR4","doi-asserted-by":"crossref","unstructured":"Duan, K., Bai, S., Xie, L., Qi, H., Huang, Q., Tian, Q.: Centernet: keypoint triplets for object detection. arXiv preprint arXiv:1904.08189 (2019)","DOI":"10.1109\/ICCV.2019.00667"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Pinz, A., Zisserman, A.: Detect to track and track to detect. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3038\u20133046 (2017)","DOI":"10.1109\/ICCV.2017.330"},{"key":"10_CR6","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 580\u2013587 (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"10_CR8","unstructured":"Han, W., et al.: Seq-NMS for video object detection. arXiv preprint arXiv:1602.08465 (2016)"},{"key":"10_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"10_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"10_CR11","doi-asserted-by":"crossref","unstructured":"Henriques, J.F., Caseiro, R., Martins, P., Batista, J.: High-speed tracking with kernelized correlation filters. IEEE Trans. Pattern Anal. Mach. Intell. 37(3), 583\u2013596 (2014)","DOI":"10.1109\/TPAMI.2014.2345390"},{"key":"10_CR12","unstructured":"Howard, A.G., et al.: Mobilenets: efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)"},{"key":"10_CR13","doi-asserted-by":"crossref","unstructured":"Huang, C., Lucey, S., Ramanan, D.: Learning policies for adaptive tracking with deep feature cascades. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 105\u2013114 (2017)","DOI":"10.1109\/ICCV.2017.21"},{"key":"10_CR14","unstructured":"Huang, L., Zhao, X., Huang, K.: Got-10k: a large high-diversity benchmark for generic object tracking in the wild. arXiv preprint arXiv:1810.11981 (2018)"},{"key":"10_CR15","doi-asserted-by":"crossref","unstructured":"Kang, K., Ouyang, W., Li, H., Wang, X.: Object detection from video tubelets with convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 817\u2013825 (2016)","DOI":"10.1109\/CVPR.2016.95"},{"key":"10_CR16","doi-asserted-by":"crossref","unstructured":"Lao, D., Sundaramoorthi, G.: Minimum delay object detection from video. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5097\u20135106 (2019)","DOI":"10.1109\/ICCV.2019.00520"},{"key":"10_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"765","DOI":"10.1007\/978-3-030-01264-9_45","volume-title":"Computer Vision \u2013 ECCV 2018","author":"H Law","year":"2018","unstructured":"Law, H., Deng, J.: CornerNet: detecting objects as paired keypoints. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) Computer Vision \u2013 ECCV 2018. LNCS, vol. 11218, pp. 765\u2013781. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01264-9_45"},{"key":"10_CR18","unstructured":"Li, Z., Peng, C., Yu, G., Zhang, X., Deng, Y., Sun, J.: Light-head R-CNN: in defense of two-stage object detector. arXiv preprint arXiv:1711.07264 (2017)"},{"key":"10_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"10_CR20","doi-asserted-by":"crossref","unstructured":"Liu, M., Zhu, M.: Mobile video object detection with temporally-aware feature maps. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5686\u20135695 (2018)","DOI":"10.1109\/CVPR.2018.00596"},{"key":"10_CR21","unstructured":"Liu, M., Zhu, M., White, M., Li, Y., Kalenichenko, D.: Looking fast and slow: memory-guided mobile video object detection. arXiv preprint arXiv:1903.10172 (2019)"},{"key":"10_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-319-46448-0_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"W Liu","year":"2016","unstructured":"Liu, W., et al.: SSD: single shot MultiBox detector. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 21\u201337. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_2"},{"key":"10_CR23","doi-asserted-by":"crossref","unstructured":"Luo, H., Xie, W., Wang, X., Zeng, W.: Detect or track: towards cost-effective video object detection\/tracking. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a033, pp. 8803\u20138810 (2019)","DOI":"10.1609\/aaai.v33i01.33018803"},{"key":"10_CR24","doi-asserted-by":"crossref","unstructured":"Mahasseni, B., Todorovic, S., Fern, A.: Budget-aware deep semantic video segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1029\u20131038 (2017)","DOI":"10.1109\/CVPR.2017.224"},{"key":"10_CR25","doi-asserted-by":"crossref","unstructured":"Mao, H., Yang, X., Dally, W.J.: A delay metric for video object detection: what average precision fails to tell. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 573\u2013582 (2019)","DOI":"10.1109\/ICCV.2019.00066"},{"key":"10_CR26","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: Proceedings of the International Conference on Machine Learning, pp. 1928\u20131937 (2016)"},{"key":"10_CR27","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: Pointnet: deep learning on point sets for 3D classification and segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 652\u2013660 (2017)"},{"key":"10_CR28","unstructured":"Redmon, J.: Darknet: open source neural networks in C (2013\u20132016). http:\/\/pjreddie.com\/darknet\/"},{"key":"10_CR29","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: Unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"10_CR30","unstructured":"Redmon, J., Farhadi, A.: YOLOv3: an incremental improvement. arXiv preprint arXiv:1804.02767 (2018)"},{"key":"10_CR31","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, pp. 91\u201399 (2015)"},{"key":"10_CR32","doi-asserted-by":"publisher","unstructured":"Russakovsky, O., et al.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vis. 115(3), 211\u2013252 (2015). https:\/\/doi.org\/10.1007\/s11263-015-0816-y","DOI":"10.1007\/s11263-015-0816-y"},{"key":"10_CR33","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.C.: Mobilenetv 2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"10_CR34","doi-asserted-by":"crossref","unstructured":"Supancic III, J., Ramanan, D.: Tracking as online decision-making: learning a policy from streaming videos with reinforcement learning. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 322\u2013331 (2017)","DOI":"10.1109\/ICCV.2017.43"},{"key":"10_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"557","DOI":"10.1007\/978-3-030-01261-8_33","volume-title":"Computer Vision \u2013 ECCV 2018","author":"S Wang","year":"2018","unstructured":"Wang, S., Zhou, Y., Yan, J., Deng, Z.: Fully motion-aware network for video object detection. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11217, pp. 557\u2013573. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01261-8_33"},{"key":"10_CR36","unstructured":"Zhou, X., Wang, D., Kr\u00e4henb\u00fchl, P.: Objects as points. In: arXiv preprint arXiv:1904.07850 (2019)"},{"key":"10_CR37","doi-asserted-by":"crossref","unstructured":"Zhou, X., Zhuo, J., Krahenbuhl, P.: Bottom-up object detection by grouping extreme and center points. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 850\u2013859 (2019)","DOI":"10.1109\/CVPR.2019.00094"},{"key":"10_CR38","doi-asserted-by":"crossref","unstructured":"Zhu, X., Dai, J., Zhu, X., Wei, Y., Yuan, L.: Towards high performance video object detection for mobiles. arXiv preprint arXiv:1804.05830 (2018)","DOI":"10.1109\/CVPR.2018.00753"},{"key":"10_CR39","doi-asserted-by":"crossref","unstructured":"Zhu, X., Wang, Y., Dai, J., Yuan, L., Wei, Y.: Flow-guided feature aggregation for video object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 408\u2013417 (2017)","DOI":"10.1109\/ICCV.2017.52"},{"key":"10_CR40","doi-asserted-by":"crossref","unstructured":"Zhu, X., Xiong, Y., Dai, J., Yuan, L., Wei, Y.: Deep feature flow for video recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2349\u20132358 (2017)","DOI":"10.1109\/CVPR.2017.441"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58568-6_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T00:17:26Z","timestamp":1731370646000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58568-6_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585679","9783030585686"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58568-6_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"13 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}