{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T10:20:27Z","timestamp":1775816427617,"version":"3.50.1"},"reference-count":78,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2019,12,3]],"date-time":"2019-12-03T00:00:00Z","timestamp":1575331200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,12,3]],"date-time":"2019-12-03T00:00:00Z","timestamp":1575331200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100010896","name":"International Cooperation and Exchange Programme","doi-asserted-by":"publisher","award":["61620106009"],"award-info":[{"award-number":["61620106009"]}],"id":[{"id":"10.13039\/501100010896","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100010903","name":"Key Programme","doi-asserted-by":"publisher","award":["61332016"],"award-info":[{"award-number":["61332016"]}],"id":[{"id":"10.13039\/501100010903","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1636214"],"award-info":[{"award-number":["U1636214"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61650202"],"award-info":[{"award-number":["61650202"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61772494"],"award-info":[{"award-number":["61772494"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61429201"],"award-info":[{"award-number":["61429201"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Key Research Program of Frontier Sciences","award":["CAS: QYZDJ-SSW-SYS013"],"award-info":[{"award-number":["CAS: QYZDJ-SSW-SYS013"]}]},{"DOI":"10.13039\/501100004739","name":"Youth Innovation Promotion Association CAS","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100004739","id-type":"DOI","asserted-by":"crossref"}]},{"name":"ARO grants","award":["W911NF-15-1-0290"],"award-info":[{"award-number":["W911NF-15-1-0290"]}]},{"name":"Faculty Research Gift Awards by NEC Laboratories of America and Blippar"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1007\/s11263-019-01266-1","type":"journal-article","created":{"date-parts":[[2019,12,3]],"date-time":"2019-12-03T17:03:25Z","timestamp":1575392605000},"page":"1141-1159","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":200,"title":["The Unmanned Aerial Vehicle Benchmark: Object Detection, Tracking and Baseline"],"prefix":"10.1007","volume":"128","author":[{"given":"Hongyang","family":"Yu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3954-2387","authenticated-orcid":false,"given":"Guorong","family":"Li","sequence":"additional","affiliation":[]},{"given":"Weigang","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Qingming","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Dawei","family":"Du","sequence":"additional","affiliation":[]},{"given":"Qi","family":"Tian","sequence":"additional","affiliation":[]},{"given":"Nicu","family":"Sebe","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,12,3]]},"reference":[{"key":"1266_CR1","doi-asserted-by":"crossref","unstructured":"Bae, S. H., & Yoon, K. (2014). Robust online multi-object tracking based on tracklet confidence and online discriminative appearance learning. In CVPR (pp. 1218\u20131225).","DOI":"10.1109\/CVPR.2014.159"},{"key":"1266_CR2","doi-asserted-by":"publisher","first-page":"246309","DOI":"10.1155\/2008\/246309","volume":"2008","author":"K Bernardin","year":"2008","unstructured":"Bernardin, K., & Stiefelhagen, R. (2008). Evaluating multiple object tracking performance: The CLEAR MOT metrics. EURASIP Journal on Image and Video Processing, 2008, 246309.","journal-title":"EURASIP Journal on Image and Video Processing"},{"key":"1266_CR3","doi-asserted-by":"crossref","unstructured":"Bertinetto, L., Valmadre, J., Henriques, J. F., Vedaldi, A., & Torr, P. H. S. (2016). Fully-convolutional siamese networks for object tracking. In ECCV (pp. 850\u2013865).","DOI":"10.1007\/978-3-319-48881-3_56"},{"key":"1266_CR4","doi-asserted-by":"crossref","unstructured":"Bewley, A., Ge, Z., Ott, L., Ramos, F. T., & Upcroft, B. (2016). Simple online and realtime tracking. In ICIP (pp. 3464\u20133468).","DOI":"10.1109\/ICIP.2016.7533003"},{"key":"1266_CR5","doi-asserted-by":"crossref","unstructured":"Bochinski, E., Eiselein, V., & Sikora, T. (2017). High-speed tracking-by-detection without using image information. In AVSS (pp. 1\u20136).","DOI":"10.1109\/AVSS.2017.8078516"},{"key":"1266_CR6","doi-asserted-by":"crossref","unstructured":"Choi, W. (2015). Near-online multi-target tracking with aggregated local flow descriptor. In ICCV (pp. 3029\u20133037).","DOI":"10.1109\/ICCV.2015.347"},{"key":"1266_CR7","doi-asserted-by":"crossref","unstructured":"Chopra, S., Hadsell, R., & LeCun, Y. (2005). Learning a similarity metric discriminatively, with application to face verification. In CVPR (pp. 539\u2013546).","DOI":"10.1109\/CVPR.2005.202"},{"key":"1266_CR8","unstructured":"Dai, J., Li, Y., He, K., & Sun, J. (2016). R-FCN: Object detection via region-based fully convolutional networks. In NIPS (pp. 379\u2013387)."},{"key":"1266_CR9","unstructured":"Danelljan, M., Bhat, G., Khan, F. S., & Felsberg, M. (2016). ECO: Efficient convolution operators for tracking. arXiv:1611.09224."},{"key":"1266_CR10","doi-asserted-by":"crossref","unstructured":"Danelljan, M., H\u00e4ger, G., Khan, F. S., & Felsberg, M. (2015). Learning spatially regularized correlation filters for visual tracking. In ICCV (pp. 4310\u20134318).","DOI":"10.1109\/ICCV.2015.490"},{"key":"1266_CR11","doi-asserted-by":"crossref","unstructured":"Danelljan, M., H\u00e4ger, G., Khan, F. S., & Felsberg, M. (2016). Adaptive decontamination of the training set: A unified formulation for discriminative visual tracking. In CVPR (pp. 1430\u20131438).","DOI":"10.1109\/CVPR.2016.159"},{"key":"1266_CR12","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Robinson, A., Khan, F. S., & Felsberg, M. (2016). Beyond correlation filters: Learning continuous convolution operators for visual tracking. In ECCV (pp. 472\u2013488).","DOI":"10.1007\/978-3-319-46454-1_29"},{"key":"1266_CR13","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L., Li, K., & Li, F. (2009). Imagenet: A large-scale hierarchical image database. In CVPR (pp. 248\u2013255).","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1266_CR14","doi-asserted-by":"crossref","unstructured":"Dicle, C., Camps, O. I., & Sznaier, M. (2013). The way they move: Tracking multiple targets with similar appearance. In ICCV (pp. 2304\u20132311).","DOI":"10.1109\/ICCV.2013.286"},{"issue":"4","key":"1266_CR15","doi-asserted-by":"publisher","first-page":"743","DOI":"10.1109\/TPAMI.2011.155","volume":"34","author":"P Doll\u00e1r","year":"2012","unstructured":"Doll\u00e1r, P., Wojek, C., Schiele, B., & Perona, P. (2012). Pedestrian detection: An evaluation of the state of the art. IEEE Transactions on Pattern Analysis and Machine Intelligence, 34(4), 743\u2013761.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1266_CR16","doi-asserted-by":"crossref","unstructured":"Du, D., Qi, Y., Yu, H., Yang, Y., Duan, K., Li, G., Zhang, W., Huang, Q., & Tian, Q. (2018). The unmanned aerial vehicle benchmark: Object detection and tracking. In ECCV (pp. 375\u2013391).","DOI":"10.1007\/978-3-030-01249-6_23"},{"issue":"1","key":"1266_CR17","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham, M., Eslami, S. A., Van Gool, L., Williams, C. K., Winn, J., & Zisserman, A. (2015). The pascal visual object classes challenge: A retrospective. International Journal of Computer Vision, 111(1), 98\u2013136.","journal-title":"International Journal of Computer Vision"},{"key":"1266_CR18","doi-asserted-by":"crossref","unstructured":"Fan, H., & Ling, H. (2017). Parallel tracking and verifying: A framework for real-time and high accuracy visual tracking. In ICCV.","DOI":"10.1109\/ICCV.2017.585"},{"key":"1266_CR19","doi-asserted-by":"crossref","unstructured":"Ferryman, J., & Shahrokni, A. (2009). Pets2009: Dataset and challenge. In AVSS (pp. 1\u20136).","DOI":"10.1109\/PETS-WINTER.2009.5399556"},{"key":"1266_CR20","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., & Urtasun, R. (2012). Are we ready for autonomous driving? The KITTI vision benchmark suite. In CVPR (pp. 3354\u20133361).","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"1266_CR21","doi-asserted-by":"crossref","unstructured":"Girshick, R. B. (2015). Fast R-CNN. In ICCV (pp. 1440\u20131448).","DOI":"10.1109\/ICCV.2015.169"},{"key":"1266_CR22","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In CVPR (pp. 770\u2013778).","DOI":"10.1109\/CVPR.2016.90"},{"key":"1266_CR23","doi-asserted-by":"crossref","unstructured":"Held, D., Thrun, S., & Savarese, S. (2016). Learning to track at 100 FPS with deep regression networks. In ECCV (pp. 749\u2013765).","DOI":"10.1007\/978-3-319-46448-0_45"},{"issue":"3","key":"1266_CR24","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1109\/TPAMI.2014.2345390","volume":"37","author":"JF Henriques","year":"2015","unstructured":"Henriques, J. F., Caseiro, R., Martins, P., & Batista, J. (2015). High-speed tracking with kernelized correlation filters. IEEE Transactions on Pattern Analysis and Machine Intelligence, 37(3), 583\u2013596.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1266_CR25","doi-asserted-by":"crossref","unstructured":"Hsieh, M., Lin, Y., & Hsu, W. H. (2017). Drone-based object counting by spatially regularized regional proposal network. In ICCV.","DOI":"10.1109\/ICCV.2017.446"},{"key":"1266_CR26","doi-asserted-by":"crossref","unstructured":"Hwang, S., Park, J., Kim, N., Choi, Y., & Kweon, I. S. (2015). Multispectral pedestrian detection: Benchmark dataset and baseline. In CVPR (pp. 1037\u20131045).","DOI":"10.1109\/CVPR.2015.7298706"},{"key":"1266_CR27","doi-asserted-by":"crossref","unstructured":"Izadinia, H., Saleemi, I., Li, W., & Shah, M. (2012). (MP)2T: Multiple people multiple parts tracker. In ECCV (pp. 100\u2013114).","DOI":"10.1007\/978-3-642-33783-3_8"},{"key":"1266_CR28","doi-asserted-by":"crossref","unstructured":"Kalra, I., Singh, M., Nagpal, S., Singh, R., Vatsa, M., & Sujit, P. (2019). Dronesurf: Benchmark dataset for drone-based face recognition. In IEEE FG 2019 (pp. 1\u20137).","DOI":"10.1109\/FG.2019.8756593"},{"key":"1266_CR29","unstructured":"Kiani\u00a0Galoogahi, H., Fagg, A., Huang, C., Ramanan, D., & Lucey, S. (2017). Need for speed: A benchmark for higher frame rate object tracking. In ICCV (pp. 1125\u20131134)."},{"key":"1266_CR30","doi-asserted-by":"crossref","unstructured":"Kim, C., Li, F., Ciptadi, A., & Rehg, J. M. (2015). Multiple hypothesis tracking revisited. In ICCV (pp. 4696\u20134704).","DOI":"10.1109\/ICCV.2015.533"},{"key":"1266_CR31","doi-asserted-by":"crossref","unstructured":"Kong, T., Sun, F., Yao, A., Liu, H., Lu, M., & Chen, Y. (2017). RON: Reverse connection with objectness prior networks for object detection. In CVPR.","DOI":"10.1109\/CVPR.2017.557"},{"key":"1266_CR32","doi-asserted-by":"crossref","unstructured":"Kristan, M., Leonardis, A., Matas, J., et\u00a0al. (2016). The visual object tracking VOT2016 challenge results. In ECCV workshop (pp. 777\u2013823).","DOI":"10.1007\/978-3-319-48881-3_54"},{"key":"1266_CR33","doi-asserted-by":"crossref","unstructured":"Kristan, M., Leonardis, A., Matas, J., Felsberg, M., & He, Z. (2017). The visual object tracking VOT2017 challenge results. In ICCV workshop.","DOI":"10.1109\/ICCVW.2017.230"},{"key":"1266_CR34","unstructured":"Leal-Taix\u00e9, L., Milan, A., Reid, I. D., Roth, S., & Schindler, K. (2015). Motchallenge 2015: Towards a benchmark for multi-target tracking. arXiv:1504.01942."},{"key":"1266_CR35","doi-asserted-by":"crossref","unstructured":"Li, F., Tian, C., Zuo, W., Zhang, L., & Yang, M. (2018). Learning spatial-temporal regularized correlation filters for visual tracking. In CVPR.","DOI":"10.1109\/CVPR.2018.00515"},{"key":"1266_CR36","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S. E., Fu, C., & Berg, A. C. (2016). SSD: Single shot multibox detector. In ECCV (pp. 21\u201337).","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1266_CR37","doi-asserted-by":"crossref","unstructured":"Ma, C., Huang, J., Yang, X., & Yang, M. (2015). Hierarchical convolutional features for visual tracking. In ICCV (pp. 3074\u20133082).","DOI":"10.1109\/ICCV.2015.352"},{"key":"1266_CR38","unstructured":"Milan, A., Leal-Taix\u00e9, L., Reid, I. D., Roth, S., & Schindler, K. (2016). Mot16: A benchmark for multi-object tracking. arXiv:1603.00831."},{"key":"1266_CR39","doi-asserted-by":"crossref","unstructured":"Milan, A., Rezatofighi, S. H., Dick, A. R., Reid, I. D., & Schindler, K. (2017). Online multi-target tracking using recurrent neural networks. In AAAI (pp. 4225\u20134232).","DOI":"10.1609\/aaai.v31i1.11194"},{"issue":"1","key":"1266_CR40","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1109\/TPAMI.2013.103","volume":"36","author":"A Milan","year":"2014","unstructured":"Milan, A., Roth, S., & Schindler, K. (2014). Continuous energy minimization for multitarget tracking. IEEE Transactions on Pattern Analysis and Machine Intelligence, 36(1), 58\u201372.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1266_CR41","doi-asserted-by":"crossref","unstructured":"Muller, M., Bibi, A., Giancola, S., Alsubaihi, S., & Ghanem, B. (2018). Trackingnet: A large-scale dataset and benchmark for object tracking in the wild. In ECCV.","DOI":"10.1007\/978-3-030-01246-5_19"},{"key":"1266_CR42","doi-asserted-by":"crossref","unstructured":"Mueller, M., Smith, N., & Ghanem, B. (2016). A benchmark and simulator for UAV tracking. In ECCV (pp. 445\u2013461).","DOI":"10.1007\/978-3-319-46448-0_27"},{"key":"1266_CR43","doi-asserted-by":"crossref","unstructured":"Mueller, M., Smith, N., & Ghanem, B. (2017). Context-aware correlation filter tracking. In CVPR.","DOI":"10.1109\/CVPR.2017.152"},{"issue":"1","key":"1266_CR44","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1137\/0105003","volume":"5","author":"J Munkres","year":"1957","unstructured":"Munkres, J. (1957). Algorithms for the assignment and transportation problems. Journal of the Society for Industrial and Applied Mathematics, 5(1), 32\u201338.","journal-title":"Journal of the Society for Industrial and Applied Mathematics"},{"key":"1266_CR45","doi-asserted-by":"crossref","unstructured":"Nam, H., & Han, B. (2016). Learning multi-domain convolutional neural networks for visual tracking. In CVPR (pp. 4293\u20134302).","DOI":"10.1109\/CVPR.2016.465"},{"key":"1266_CR46","unstructured":"Ning, W., Wengang, Z., Qi, T., Richang, H., Meng, W., & Houqiang, L. (2018). Multi-cue correlation filters for robust visual tracking. In CVPR (pp. 4844\u20134853)."},{"issue":"1","key":"1266_CR47","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1023\/A:1008162616689","volume":"38","author":"C Papageorgiou","year":"2000","unstructured":"Papageorgiou, C., & Poggio, T. (2000). A trainable system for object detection. International Journal of Computer Vision, 38(1), 15\u201333.","journal-title":"International Journal of Computer Vision"},{"key":"1266_CR48","doi-asserted-by":"crossref","unstructured":"Pirsiavash, H., Ramanan, D., & Fowlkes, C. C. (2011). Globally-optimal greedy algorithms for tracking a variable number of objects. In CVPR (pp. 1201\u20131208).","DOI":"10.1109\/CVPR.2011.5995604"},{"key":"1266_CR49","doi-asserted-by":"crossref","unstructured":"Qi, Y., Zhang, S., Qin, L., Yao, H., Huang, Q., Lim, J., & Yang, M. (2016). Hedged deep tracking. In CVPR (pp. 4303\u20134311).","DOI":"10.1109\/CVPR.2016.466"},{"key":"1266_CR50","unstructured":"Ren, S., He, K., Girshick, R. B., & Sun, J. (2015). Faster R-CNN: Towards real-time object detection with region proposal networks. In NIPS (pp. 91\u201399)."},{"key":"1266_CR51","doi-asserted-by":"crossref","unstructured":"Ristani, E., Solera, F., Zou, R. S., Cucchiara, R., & Tomasi, C. (2016). Performance measures and a data set for multi-target, multi-camera tracking. In ECCVW (pp. 17\u201335).","DOI":"10.1007\/978-3-319-48881-3_2"},{"key":"1266_CR52","doi-asserted-by":"crossref","unstructured":"Robicquet, A., Sadeghian, A., Alahi, A., & Savarese, S. (2016). Learning social etiquette: Human trajectory understanding in crowded scenes. In ECCV (pp. 549\u2013565).","DOI":"10.1007\/978-3-319-46484-8_33"},{"key":"1266_CR53","unstructured":"Shu, G., Dehghan, A., Oreifej, O., Hand, E., & Shah, M. (2012). Part-based multiple-person tracking with partial occlusion handling. In CVPR (pp. 1815\u20131821)."},{"issue":"7","key":"1266_CR54","doi-asserted-by":"publisher","first-page":"1442","DOI":"10.1109\/TPAMI.2013.230","volume":"36","author":"AWM Smeulders","year":"2014","unstructured":"Smeulders, A. W. M., Chu, D. M., Cucchiara, R., Calderara, S., Dehghan, A., & Shah, M. (2014). Visual tracking: An experimental survey. IEEE Transactions on Pattern Analysis and Machine Intelligence, 36(7), 1442\u20131468.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1266_CR55","doi-asserted-by":"crossref","unstructured":"Solera, F., Calderara, S., & Cucchiara, R. (2015). Towards the evaluation of reproducible robustness in tracking-by-detection. In AVSS (pp. 1\u20136).","DOI":"10.1109\/AVSS.2015.7301755"},{"key":"1266_CR56","doi-asserted-by":"crossref","unstructured":"Son, J., Baek, M., Cho, M., & Han, B. (2017). Multi-object tracking with quadruplet convolutional neural networks. In CVPR.","DOI":"10.1109\/CVPR.2017.403"},{"key":"1266_CR57","unstructured":"Song, Y., Ma, C., Gong, L., Zhang, J., Lau, R. W. H., & Yang, M. (2017). CREST: Convolutional residual learning for visual tracking. arXiv:1708.00225."},{"key":"1266_CR58","unstructured":"Song, Y., Ma, C., Wu, X., Gong, L., Bao, L., Zuo, W., Shen, C., Lau, R. W. H., & Yang, M. (2018). VITAL: Visual tracking via adversarial learning. arXiv:1804.04273."},{"key":"1266_CR59","doi-asserted-by":"crossref","unstructured":"Tang, S., Andres, B., Andriluka, M., & Schiele, B. (2016). Multi-person tracking by multicut and deep matching. In ECCV workshops (pp. 100\u2013111).","DOI":"10.1007\/978-3-319-48881-3_8"},{"key":"1266_CR60","doi-asserted-by":"crossref","unstructured":"Tang, S., Andriluka, M., Andres, B., & Schiele, B. (2017). Multiple people tracking by lifted multicut and person re-identification. In CVPR.","DOI":"10.1109\/CVPR.2017.394"},{"key":"1266_CR61","doi-asserted-by":"crossref","unstructured":"Tao, R., Gavves, E., & Smeulders, A. W. M. (2016). Siamese instance search for tracking. In CVPR (pp. 1420\u20131429).","DOI":"10.1109\/CVPR.2016.158"},{"key":"1266_CR62","doi-asserted-by":"crossref","unstructured":"Valmadre, J., Bertinetto, L., Henriques, J. F., Vedaldi, A., & Torr, P. H. S. (2017). End-to-end representation learning for correlation filter based tracking. In CVPR.","DOI":"10.1109\/CVPR.2017.531"},{"key":"1266_CR63","doi-asserted-by":"crossref","unstructured":"Wang, L., Ouyang, W., Wang, X., & Lu, H. (2015). Visual tracking with fully convolutional networks. In ICCV (pp. 3119\u20133127).","DOI":"10.1109\/ICCV.2015.357"},{"key":"1266_CR64","doi-asserted-by":"crossref","unstructured":"Wang, L., Ouyang, W., Wang, X., Lu, H. (2016). STCT: Sequentially training convolutional networks for visual tracking. In CVPR (pp. 1373\u20131381).","DOI":"10.1109\/CVPR.2016.153"},{"key":"1266_CR65","unstructured":"Wen, L., Du, D., Cai, Z., Lei, Z., Chang, M., Qi, H., Lim, J., Yang, M., & Lyu, S. (2015). DETRAC: A new benchmark and protocol for multi-object tracking. arXiv:1511.04136."},{"key":"1266_CR66","unstructured":"Wojke, N., Bewley, A., & Paulus, D. (2017). Simple online and realtime tracking with a deep association metric. arXiv:1703.07402."},{"issue":"9","key":"1266_CR67","doi-asserted-by":"publisher","first-page":"1834","DOI":"10.1109\/TPAMI.2014.2388226","volume":"37","author":"Y Wu","year":"2015","unstructured":"Wu, Y., Lim, J., & Yang, M. (2015). Object tracking benchmark. IEEE Transactions on Pattern Analysis and Machine Intelligence, 37(9), 1834\u20131848.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1266_CR68","doi-asserted-by":"crossref","unstructured":"Xia, G. S., Bai, X., Ding, J., Zhu, Z., Belongie, S., Luo, J., Datcu, M., Pelillo, M., & Zhang, L. (2018). DOTA: A large-scale dataset for object detection in aerial images. In CVPR (pp. 3974\u20133983).","DOI":"10.1109\/CVPR.2018.00418"},{"key":"1266_CR69","doi-asserted-by":"crossref","unstructured":"Xiang, Y., Alahi, A., & Savarese, S. (2015). Learning to track: Online multi-object tracking by decision making. In ICCV (pp. 4705\u20134713).","DOI":"10.1109\/ICCV.2015.534"},{"key":"1266_CR70","doi-asserted-by":"crossref","unstructured":"Yoon, J. H., Lee, C., Yang, M., & Yoon, K. (2016). Online multi-object tracking via structural constraint event aggregation. In CVPR (pp. 1392\u20131400).","DOI":"10.1109\/CVPR.2016.155"},{"key":"1266_CR71","doi-asserted-by":"crossref","unstructured":"Yoon, J. H., Yang, M., Lim, J., & Yoon, K. (2015). Bayesian multi-object tracking using motion context from multiple objects. In WACV (pp. 33\u201340).","DOI":"10.1109\/WACV.2015.12"},{"key":"1266_CR72","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1016\/j.neucom.2018.02.068","volume":"292","author":"H Yu","year":"2018","unstructured":"Yu, H., Qin, L., Huang, Q., & Yao, H. (2018). Online multiple object tracking via exchanging object context. Neurocomputing, 292, 28\u201337.","journal-title":"Neurocomputing"},{"key":"1266_CR73","doi-asserted-by":"crossref","unstructured":"Yun, S., Choi, J., Yoo, Y., Yun, K., & Choi, J. Y. (2017). Action-decision networks for visual tracking with deep reinforcement learning. In CVPR.","DOI":"10.1109\/CVPR.2017.148"},{"key":"1266_CR74","doi-asserted-by":"crossref","unstructured":"Zhang, K., Zhang, L., Liu, Q., Zhang, D., & Yang, M. (2014). Fast visual tracking via dense spatio-temporal context learning. In ECCV (pp. 127\u2013141).","DOI":"10.1007\/978-3-319-10602-1_9"},{"key":"1266_CR75","unstructured":"Zhang, T., Xu, C., & Yang, M. H. (2017). Multi-task correlation particle filter for robust visual tracking. In CVPR."},{"issue":"5","key":"1266_CR76","doi-asserted-by":"publisher","first-page":"2331","DOI":"10.1109\/TIP.2018.2885238","volume":"28","author":"B Zhong","year":"2018","unstructured":"Zhong, B., Bai, B., Li, J., Zhang, Y., & Fu, Y. (2018). Hierarchical tracking by reinforcement learning-based searching and coarse-to-fine verifying. IEEE Transactions on Image Processing, 28(5), 2331\u20132341.","journal-title":"IEEE Transactions on Image Processing"},{"issue":"5","key":"1266_CR77","doi-asserted-by":"publisher","first-page":"1183","DOI":"10.1109\/TMM.2018.2875360","volume":"21","author":"Q Zhou","year":"2018","unstructured":"Zhou, Q., Zhong, B., Zhang, Y., Li, J., & Fu, Y. (2018). Deep alignment network based multi-person tracking with occlusion and motion reasoning. IEEE Transactions on Multimedia, 21(5), 1183\u20131194.","journal-title":"IEEE Transactions on Multimedia"},{"key":"1266_CR78","unstructured":"Zhu, P., Wen, L., Bian, X., Haibin, L., & Hu, Q. (2018). Vision meets drones: A challenge. arXiv preprint arXiv:1804.07437."}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-019-01266-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-019-01266-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-019-01266-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,7]],"date-time":"2022-10-07T15:25:39Z","timestamp":1665156339000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-019-01266-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12,3]]},"references-count":78,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2020,5]]}},"alternative-id":["1266"],"URL":"https:\/\/doi.org\/10.1007\/s11263-019-01266-1","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,12,3]]},"assertion":[{"value":"27 December 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 November 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 December 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}