{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,7,2]],"date-time":"2022-07-02T04:42:23Z","timestamp":1656736943248},"reference-count":26,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"7","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Fundamentals"],"published-print":{"date-parts":[[2022,7,1]]},"DOI":"10.1587\/transfun.2021eap1068","type":"journal-article","created":{"date-parts":[[2022,1,17]],"date-time":"2022-01-17T22:08:55Z","timestamp":1642457335000},"page":"1082-1090","source":"Crossref","is-referenced-by-count":0,"title":["Temporal Ensemble SSDLite: Exploiting Temporal Correlation in Video for Accurate Object Detection"],"prefix":"10.1587","volume":"E105.A","author":[{"given":"Lukas","family":"NAKAMURA","sequence":"first","affiliation":[{"name":"Osaka University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hiromitsu","family":"AWANO","sequence":"additional","affiliation":[{"name":"Kyoto University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"532","reference":[{"key":"1","unstructured":"[1] A. Krizhevsky, I. Sutskever, and G.E. Hinton, \u201cImagenet classification with deep convolutional neural networks,\u201d Advances in Neural Inf. Process. Syst., pp.1097-1105, 2012."},{"key":"2","unstructured":"[2] Z. Zou, Z. Shi, Y. Guo, and J. Ye, \u201cObject detection in 20 years: A survey,\u201d arXiv:1905.05055, 2019. 10.48550\/arXiv.1905.05055"},{"key":"3","unstructured":"[3] K. Simonyan and A. Zisserman, \u201cVery deep convolutional networks for large-scale image recognition,\u201d arXiv:1409.1556, 2014. 10.48550\/arXiv.1409.1556"},{"key":"4","doi-asserted-by":"crossref","unstructured":"[4] C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich, \u201cGoing deeper with convolutions,\u201d Proc. Conf. on Comput. Vision and Pattern Recognit., pp.1-9, 2015. 10.1109\/cvpr.2015.7298594","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"5","doi-asserted-by":"crossref","unstructured":"[5] K. He, X. Zhang, S. Ren, and J. Sun, \u201cDeep residual learning for image recognition,\u201d Proc. Conf. on Comput. Vision and Pattern Recognit., pp.770-778, 2016. 10.1109\/cvpr.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"6","doi-asserted-by":"crossref","unstructured":"[6] R. Girshick, J. Donahue, T. Darrell, and J. Malik, \u201cRich feature hierarchies for accurate object detection and semantic segmentation,\u201d Proc. Conf. on Comput. Vision and Pattern Recognit., pp.580-587, 2014. 10.1109\/cvpr.2014.81","DOI":"10.1109\/CVPR.2014.81"},{"key":"7","doi-asserted-by":"crossref","unstructured":"[7] R. Girshick, \u201cFast r-CNN,\u201d Proc. Int. Conf. on Comput. Vision, pp.1440-1448, 2015. 10.1109\/iccv.2015.169","DOI":"10.1109\/ICCV.2015.169"},{"key":"8","unstructured":"[8] S. Ren, K. He, R. Girshick, and J. Sun, \u201cFaster R-CNN: Towards real-time object detection with region proposal networks,\u201d Advances in Neural Inf. Process. Syst., pp.3-5, Curran Associates, 2015."},{"key":"9","doi-asserted-by":"crossref","unstructured":"[9] J. Redmon, S. Divvala, R. Girshick, and A. Farhadi, \u201cYou only look once: Unified, real-time object detection,\u201d Proc. Conf. on Comput. Vision and Pattern Recognit., pp.779-788, 2016. 10.1109\/cvpr.2016.91","DOI":"10.1109\/CVPR.2016.91"},{"key":"10","doi-asserted-by":"crossref","unstructured":"[10] J. Redmon and A. Farhadi, \u201cYOLO9000: Better, faster, stronger,\u201d Proc. Conf. on Comput. Vision and Pattern Recognit., pp.7263-7271, 2017. 10.1109\/cvpr.2017.690","DOI":"10.1109\/CVPR.2017.690"},{"key":"11","unstructured":"[11] J. Redmon and A. Farhadi, \u201cYolov3: An incremental improvement,\u201d arXiv:1804.02767, 2018. 10.48550\/arXiv.1804.02767"},{"key":"12","unstructured":"[12] A. Bochkovskiy, C.Y. Wang, and H.Y.M. Liao, \u201cYOLOV4: Optimal speed and accuracy of object detection,\u201d arXiv:2004.10934, 2020."},{"key":"13","doi-asserted-by":"crossref","unstructured":"[13] W. Liu, D. Anguelov, D. Erhan, C. Szegedy, S. Reed, C.Y. Fu, and A.C. Berg, \u201cSSD: Single shot MultiBox detector,\u201d Proc. Eur. Conf. on Comput. Vision, pp.21-37, 2016. 10.1007\/978-3-319-46448-0_2","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"14","doi-asserted-by":"crossref","unstructured":"[14] M. Sandler, A. Howard, M. Zhu, A. Zhmoginov, and L.C. Chen, \u201cMobileNetV2: Inverted residuals and linear bottlenecks,\u201d Proc. Conf. on Comput. Vision and Pattern Recognit., pp.4510-4520, 2018. 10.1109\/cvpr.2018.00474","DOI":"10.1109\/CVPR.2018.00474"},{"key":"15","unstructured":"[15] I. Hubara, M. Courbariaux, D. Soudry, R. El-Yaniv, and Y. Bengio, \u201cQuantized neural networks: Training neural networks with low precision weights and activations,\u201d J. Machine Learning Research, vol.18, no.1, pp.6869-6898, 2017."},{"key":"16","doi-asserted-by":"crossref","unstructured":"[16] X. Zhu, Y. Wang, J. Dai, L. Yuan, and Y. Wei, \u201cFlow-guided feature aggregation for video object detection,\u201d Proc. Int. Conf. on Comput. Vision, pp.408-417, 2017. 10.1109\/iccv.2017.52","DOI":"10.1109\/ICCV.2017.52"},{"key":"17","doi-asserted-by":"crossref","unstructured":"[17] X. Zhu, Y. Xiong, J. Dai, L. Yuan, and Y. Wei, \u201cDeep feature flow for video recognition,\u201d Proc. Conf. on Comput. Vision and Pattern Recognit., pp.2349-2358, 2017. 10.1109\/cvpr.2017.441","DOI":"10.1109\/CVPR.2017.441"},{"key":"18","doi-asserted-by":"crossref","unstructured":"[18] X. Zhu, J. Dai, L. Yuan, and Y. Wei, \u201cTowards high performance video object detection,\u201d Proc. Conf. on Comput. Vision and Pattern Recognit., pp.7210-7218, 2018. 10.1109\/cvpr.2018.00753","DOI":"10.1109\/CVPR.2018.00753"},{"key":"19","unstructured":"[19] Y. Freund, R. Schapire, and N. Abe, \u201cA short introduction to boosting,\u201d J. JSAI, vol.14, no.771-780, p.1612, 1999."},{"key":"20","doi-asserted-by":"crossref","unstructured":"[20] T. Malisiewicz, A. Gupta, and A.A. Efros, \u201cEnsemble of exemplar-svms for object detection and beyond,\u201d Proc. Int. Conf. on Comput. Vision, pp.89-96, 2011. 10.1109\/iccv.2011.6126229","DOI":"10.1109\/ICCV.2011.6126229"},{"key":"21","unstructured":"[21] J. Guo and S. Gould, \u201cDeep cnn ensemble with data augmentation for object detection,\u201d arXiv:1506.07224, 2015. 10.48550\/arXiv.1506.07224"},{"key":"22","unstructured":"[22] A. Casado-Garc\u0131a and J. Heras, \u201cEnsemble methods for object detection,\u201d ECAI, pp.2688-2695, 2020."},{"key":"23","doi-asserted-by":"publisher","unstructured":"[23] M. Everingham, L. Van Gool, C.K. Williams, J. Winn, and A. Zisserman, \u201cThe pascal visual object classes (VOC) challenge,\u201d Int. J. Comput. Vision, vol.88, no.2, pp.303-338, 2010. 10.1007\/s11263-009-0275-4","DOI":"10.1007\/s11263-009-0275-4"},{"key":"24","doi-asserted-by":"crossref","unstructured":"[24] T.Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan, P. Doll\u00e1r, and C.L. Zitnick, \u201cMicrosoft COCO: Common objects in context,\u201d Proc. Eur. Conf. on Comput. Vision, pp.740-755, 2014. 10.1007\/978-3-319-10602-1_48","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"25","unstructured":"[25] A. Broad, M. Jones, and T.Y. Lee, \u201cRecurrent multi-frame single shot detector for video object detection,\u201d BMVC, p.94, 2018."},{"key":"26","doi-asserted-by":"crossref","unstructured":"[26] O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh, S. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein, A.C. Berg, and L. Fei-Fei, \u201cImagenet large scale visual recognition challenge,\u201d Int. J. Compput. Vision, vol.115, no.3, pp.211-252, 2015. 10.1007\/s11263-015-0816-y","DOI":"10.1007\/s11263-015-0816-y"}],"container-title":["IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transfun\/E105.A\/7\/E105.A_2021EAP1068\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,2]],"date-time":"2022-07-02T04:18:56Z","timestamp":1656735536000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transfun\/E105.A\/7\/E105.A_2021EAP1068\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,1]]},"references-count":26,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2022]]}},"URL":"https:\/\/doi.org\/10.1587\/transfun.2021eap1068","relation":{},"ISSN":["0916-8508","1745-1337"],"issn-type":[{"value":"0916-8508","type":"print"},{"value":"1745-1337","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,7,1]]},"article-number":"2021EAP1068"}}