{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T09:56:33Z","timestamp":1773741393199,"version":"3.50.1"},"reference-count":74,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2024,2,12]],"date-time":"2024-02-12T00:00:00Z","timestamp":1707696000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,12]],"date-time":"2024-02-12T00:00:00Z","timestamp":1707696000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62020106012"],"award-info":[{"award-number":["62020106012"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U1836218"],"award-info":[{"award-number":["U1836218"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62106089"],"award-info":[{"award-number":["62106089"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2024,7]]},"DOI":"10.1007\/s11263-024-02008-8","type":"journal-article","created":{"date-parts":[[2024,2,12]],"date-time":"2024-02-12T18:02:36Z","timestamp":1707760956000},"page":"2698-2712","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Learning Adaptive Spatio-Temporal Inference Transformer for Coarse-to-Fine Animal Visual Tracking: Algorithm and Benchmark"],"prefix":"10.1007","volume":"132","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9015-3128","authenticated-orcid":false,"given":"Tianyang","family":"Xu","sequence":"first","affiliation":[]},{"given":"Ze","family":"Kang","sequence":"additional","affiliation":[]},{"given":"Xuefeng","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Xiao-Jun","family":"Wu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,12]]},"reference":[{"issue":"8","key":"2008_CR1","doi-asserted-by":"publisher","first-page":"1064","DOI":"10.1109\/TPAMI.2004.53","volume":"26","author":"S Avidan","year":"2004","unstructured":"Avidan, S. (2004). Support vector tracking. IEEE Transactions on Pattern Analysis and Machine Intelligence, 26(8), 1064\u20131072.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"8","key":"2008_CR2","doi-asserted-by":"publisher","first-page":"1619","DOI":"10.1109\/TPAMI.2010.226","volume":"33","author":"B Babenko","year":"2011","unstructured":"Babenko, B., Yang, M. H., & Belongie, S. (2011). Robust object tracking with online multiple instance learning. IEEE Transactions on Pattern Analysis and Machine Intelligence, 33(8), 1619\u20131632.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"3","key":"2008_CR3","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1023\/B:VISI.0000011205.11775.fd","volume":"56","author":"S Baker","year":"2004","unstructured":"Baker, S., & Matthews, I. (2004). Lucas-kanade 20 years on: A unifying framework. International Journal of Computer Vision, 56(3), 221\u2013255.","journal-title":"International Journal of Computer Vision"},{"key":"2008_CR4","first-page":"1401","volume":"38","author":"L Bertinetto","year":"2016","unstructured":"Bertinetto, L., Valmadre, J., Golodetz, S., Miksik, O., & Torr, P. H. S. (2016). Staple: Complementary learners for real-time tracking. IEEE Conference on Computer Vision and Pattern Recognition, 38, 1401\u20131409.","journal-title":"IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"2008_CR5","doi-asserted-by":"crossref","unstructured":"Bertinetto, L., Valmadre, J., Henriques, J. F., Vedaldi, A., & Torr, P. H. (2016b). Fully-convolutional siamese networks for object tracking. In European Conference on Computer Vision. Springer, pp. 850\u2013865.","DOI":"10.1007\/978-3-319-48881-3_56"},{"key":"2008_CR6","doi-asserted-by":"crossref","unstructured":"Bhat, G., Danelljan, M., Gool, L. V., & Timofte, R. (2019). Learning discriminative model prediction for tracking. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6182\u20136191.","DOI":"10.1109\/ICCV.2019.00628"},{"key":"2008_CR7","doi-asserted-by":"crossref","unstructured":"Bideau, P., & Learned-Miller, E. (2016). It\u2019s moving! a probabilistic model for causal motion segmentation in moving camera videos. In European Conference on Computer Vision. Springer, pp. 433\u2013449.","DOI":"10.1007\/978-3-319-46484-8_26"},{"key":"2008_CR8","doi-asserted-by":"crossref","unstructured":"Biggs, B., Boyne, O., Charles, J., Fitzgibbon, A., & Cipolla, R. (2020). Who left the dogs out? 3d animal reconstruction with expectation maximization in the loop. In European Conference on Computer Vision. Springer, pp. 195\u2013211.","DOI":"10.1007\/978-3-030-58621-8_12"},{"key":"2008_CR9","doi-asserted-by":"crossref","unstructured":"Bolme, D. S., Beveridge, J. R., Draper, B. A., & Lui, Y. M. (2010). Visual object tracking using adaptive correlation filters. In IEEE Conference on Computer Vision and Pattern Recognition, pp. 2544\u20132550.","DOI":"10.1109\/CVPR.2010.5539960"},{"key":"2008_CR10","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1117\/12.421129","volume":"4387","author":"K Briechle","year":"2001","unstructured":"Briechle, K., & Hanebeck, U. D. (2001). Template matching using fast normalized cross correlation. Proceedings of SPIE, 4387, 95\u2013102.","journal-title":"Proceedings of SPIE"},{"key":"2008_CR11","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1109\/TAES.1979.308710","volume":"2","author":"Y Chan","year":"1979","unstructured":"Chan, Y., Hu, A., & Plant, J. (1979). A kalman filter based tracking scheme with input estimation. IEEE Transactions on Aerospace and Electronic Systems, 2, 237\u2013244.","journal-title":"IEEE Transactions on Aerospace and Electronic Systems"},{"key":"2008_CR12","doi-asserted-by":"crossref","unstructured":"Chen, X., Yan, B., Zhu, J., Wang, D., Yang, X., & Lu, H. (2021). Transformer tracking. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8126\u20138135.","DOI":"10.1109\/CVPR46437.2021.00803"},{"key":"2008_CR13","doi-asserted-by":"crossref","unstructured":"Chen, Z., Zhong, B., Li, G., Zhang, S., & Ji, R. (2020). Siamese box adaptive network for visual tracking. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6668\u20136677.","DOI":"10.1109\/CVPR42600.2020.00670"},{"key":"2008_CR14","doi-asserted-by":"crossref","unstructured":"Comaniciu, D., Ramesh, V., & Meer, P. (2000). Real-time tracking of non-rigid objects using mean shift. In IEEE Conference on Computer Vision and Pattern Recognition, pp. 142\u2013149.","DOI":"10.1109\/CVPR.2000.854761"},{"key":"2008_CR15","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Hager, G., Khan, F. S., & Felsberg, M. (2015). Learning spatially regularized correlation filters for visual tracking. In IEEE International Conference on Computer Vision, pp. 4310\u20134318.","DOI":"10.1109\/ICCV.2015.490"},{"key":"2008_CR16","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Bhat, G., Khan, F. S., & Felsberg, M. (2017a). Eco: Efficient convolution operators for tracking. In IEEE Conference on Computer Vision and Pattern Recognition, pp. 6931\u20136939.","DOI":"10.1109\/CVPR.2017.733"},{"issue":"8","key":"2008_CR17","doi-asserted-by":"publisher","first-page":"1561","DOI":"10.1109\/TPAMI.2016.2609928","volume":"39","author":"M Danelljan","year":"2017","unstructured":"Danelljan, M., H\u00e4ger, G., Khan, F. S., & Felsberg, M. (2017). Discriminative scale space tracking. IEEE Transactions on Pattern Analysis and Machine Intelligence, 39(8), 1561\u20131575.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2008_CR18","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Bhat, G., Khan, F. S., & Felsberg, M. (2019). Atom: Accurate tracking by overlap maximization. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4660\u20134669.","DOI":"10.1109\/CVPR.2019.00479"},{"key":"2008_CR19","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Gool, L. V., & Timofte, R. (2020). Probabilistic regression for visual tracking. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7183\u20137192.","DOI":"10.1109\/CVPR42600.2020.00721"},{"key":"2008_CR20","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., & Gelly S et al. (2020). An image is worth 16x16 words: Transformers for image recognition at scale. arXiv:2010.11929"},{"key":"2008_CR21","doi-asserted-by":"crossref","unstructured":"Fan, H., Lin, L., Yang, F., Chu, P., Deng, G., Yu, S., Bai, H., Xu, Y., Liao, C., & Ling, H. (2019). Lasot: A high-quality benchmark for large-scale single object tracking. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5374\u20135383.","DOI":"10.1109\/CVPR.2019.00552"},{"issue":"1","key":"2008_CR22","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1007\/s11263-007-0066-8","volume":"77","author":"M Fink","year":"2008","unstructured":"Fink, M., & Ullman, S. (2008). From aardvark to zorro: A benchmark for mammal image classification. International Journal of Computer Vision, 77(1), 143\u2013156.","journal-title":"International Journal of Computer Vision"},{"issue":"6","key":"2008_CR23","doi-asserted-by":"publisher","first-page":"1434","DOI":"10.2514\/3.21565","volume":"18","author":"N Gordon","year":"1995","unstructured":"Gordon, N., Salmond, D., & Ewing, C. (1995). Bayesian state estimation for tracking and guidance using the bootstrap filter. Journal of Guidance, Control, and Dynamics, 18(6), 1434\u20131443.","journal-title":"Journal of Guidance, Control, and Dynamics"},{"key":"2008_CR24","doi-asserted-by":"crossref","unstructured":"Guo, D., Wang, J., Cui, Y., Wang, Z., & Chen, S. (2020). Siamcar: Siamese fully convolutional classification and regression for visual tracking. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6269\u20136277.","DOI":"10.1109\/CVPR42600.2020.00630"},{"key":"2008_CR25","doi-asserted-by":"crossref","unstructured":"Guo, D., Shao, Y., Cui, Y., Wang, Z., Zhang, L., & Shen, C. (2021). Graph attention tracking. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9543\u20139552.","DOI":"10.1109\/CVPR46437.2021.00942"},{"key":"2008_CR26","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., & Girshick, R. (2022). Masked autoencoders are scalable vision learners. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16000\u201316009.","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"2008_CR27","doi-asserted-by":"crossref","unstructured":"Held, D., Thrun, S., & Savarese, S. (2016). Learning to track at 100 fps with deep regression networks. In European Conference on Computer Vision. Springer, pp. 749\u2013765.","DOI":"10.1007\/978-3-319-46448-0_45"},{"key":"2008_CR28","doi-asserted-by":"crossref","unstructured":"Henriques, J., O.\u00a0F., Caseiro, R., Martins, P., & Batista, J. (2012). Exploiting the circulant structure of tracking-by-detection with kernels. In European Conference on Computer Vision, pp. 702\u2013715.","DOI":"10.1007\/978-3-642-33765-9_50"},{"issue":"3","key":"2008_CR29","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1109\/TPAMI.2014.2345390","volume":"37","author":"JF Henriques","year":"2015","unstructured":"Henriques, J. F., Rui, C., Martins, P., & Batista, J. (2015). High-speed tracking with kernelized correlation filters. IEEE Transactions on Pattern Analysis and Machine Intelligence, 37(3), 583\u2013596.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2008_CR30","doi-asserted-by":"publisher","first-page":"1562","DOI":"10.1109\/TPAMI.2019.2957464","volume":"43","author":"L Huang","year":"2019","unstructured":"Huang, L., Zhao, X., & Huang, K. (2019). Got-10k: A large high-diversity benchmark for generic object tracking in the wild. IEEE Transactions on Pattern Analysis and Machine Intelligence, 43, 1562\u20131577.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"1","key":"2008_CR31","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1008078328650","volume":"29","author":"M Isard","year":"1998","unstructured":"Isard, M., & Blake, A. (1998). Condensation-conditional density propagation for visual tracking. International Journal of Computer Vision, 29(1), 5\u201328.","journal-title":"International Journal of Computer Vision"},{"key":"2008_CR32","doi-asserted-by":"crossref","unstructured":"Kiani\u00a0Galoogahi, H., Fagg, A., & Lucey, S. (2017). Learning background-aware correlation filters for visual tracking. In IEEE International Conference on Computer Vision.","DOI":"10.1109\/ICCV.2017.129"},{"key":"2008_CR33","doi-asserted-by":"crossref","unstructured":"Kristan, M., Leonardis, A., & Matas, J., et al. (2016). The visual object tracking vot2016 challenge results. In European Conference on Computer Vision Workshops, 8926, 191\u2013217.","DOI":"10.1007\/978-3-319-48881-3_54"},{"key":"2008_CR34","unstructured":"Kristan, M., Leonardis, A., Matas, J., Felsberg, M., Pflugfelder, R., Cehovin\u00a0Zajc, L., Vojir, T., Bhat, G., Lukezic, A., & Eldesokey, A. et al. (2018). The sixth visual object tracking vot2018 challenge results. In Proceedings of the European Conference on Computer Vision (ECCV)."},{"key":"2008_CR35","doi-asserted-by":"crossref","unstructured":"Kristan, M., Matas, J., & Leonardis, A., et\u00a0al. (2019). The seventh visual object tracking vot2019 challenge results. In Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops, pp. 0\u20130.","DOI":"10.1109\/ICCVW.2019.00276"},{"key":"2008_CR36","first-page":"1097","volume":"25","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. Advances in Neural Information Processing Systems, 25, 1097\u20131105.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2008_CR37","doi-asserted-by":"crossref","unstructured":"Lamdouar, H., Yang, C., Xie, W., & Zisserman, A. (2020). Betrayed by motion: Camouflaged object discovery via motion segmentation. In Proceedings of the Asian Conference on Computer Vision.","DOI":"10.1007\/978-3-030-69532-3_30"},{"issue":"2","key":"2008_CR38","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1109\/TPAMI.2015.2417577","volume":"38","author":"A Li","year":"2016","unstructured":"Li, A., Lin, M., Wu, Y., Yang, M. H., & Yan, S. (2016). Nus-pro: A new visual tracking challenge. IEEE Transactions on Pattern Analysis and Machine Intelligence, 38(2), 335\u2013349.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2008_CR39","doi-asserted-by":"crossref","unstructured":"Li, B., Yan, J., Wu, W., Zhu, Z., & Hu, X. (2018). High performance visual tracking with siamese region proposal network. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8971\u20138980.","DOI":"10.1109\/CVPR.2018.00935"},{"key":"2008_CR40","doi-asserted-by":"crossref","unstructured":"Li, B., Wu, W., Wang, Q., Zhang, F., Xing, J., & Yan, J. (2019). Siamrpn++: Evolution of siamese visual tracking with very deep networks. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4282\u20134291.","DOI":"10.1109\/CVPR.2019.00441"},{"issue":"12","key":"2008_CR41","doi-asserted-by":"publisher","first-page":"2936","DOI":"10.1007\/s11263-020-01349-4","volume":"128","author":"S Li","year":"2020","unstructured":"Li, S., Song, W., Fang, Z., Shi, J., Hao, A., Zhao, Q., & Qin, H. (2020). Long-short temporal-spatial clues excited network for robust person re-identification. International Journal of Computer Vision, 128(12), 2936\u20132961.","journal-title":"International Journal of Computer Vision"},{"key":"2008_CR42","first-page":"21002","volume":"33","author":"X Li","year":"2020","unstructured":"Li, X., Wang, W., Wu, L., Chen, S., Hu, X., Li, J., Tang, J., & Yang, J. (2020). Generalized focal loss: Learning qualified and distributed bounding boxes for dense object detection. Advances in Neural Information Processing Systems, 33, 21002\u201321012.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2008_CR43","doi-asserted-by":"crossref","unstructured":"Li, Y., & Zhu, J. (2014). A scale adaptive kernel correlation filter tracker with feature integration. In European Conference on Computer Vision Workshops. Springer, pp. 254\u2013265.","DOI":"10.1007\/978-3-319-16181-5_18"},{"issue":"10","key":"2008_CR44","doi-asserted-by":"publisher","first-page":"2408","DOI":"10.1007\/s11263-022-01655-z","volume":"130","author":"Y Li","year":"2022","unstructured":"Li, Y., Xu, N., Yang, W., See, J., & Lin, W. (2022). Exploring the semi-supervised video object segmentation problem from a cyclic perspective. International Journal of Computer Vision, 130(10), 2408\u20132424.","journal-title":"International Journal of Computer Vision"},{"issue":"12","key":"2008_CR45","doi-asserted-by":"publisher","first-page":"5630","DOI":"10.1109\/TIP.2015.2482905","volume":"24","author":"P Liang","year":"2015","unstructured":"Liang, P., Blasch, E., & Ling, H. (2015). Encoding color information for visual tracking: Algorithms and benchmark. IEEE Transactions on Image Processing, 24(12), 5630\u20135644.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2008_CR46","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Dollar, P., & Zitnick, C. L. (2014). Microsoft coco: Common objects in context. In European Conference on Computer Vision. Springer, pp. 740\u2013755.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"2008_CR47","doi-asserted-by":"crossref","unstructured":"Liu, S., Zhang, T., Cao, X., & Xu, C. (2016). Structural correlation filter for robust visual tracking. In IEEE Conference on Computer Vision and Pattern Recognition, pp. 4312\u20134320.","DOI":"10.1109\/CVPR.2016.467"},{"key":"2008_CR48","doi-asserted-by":"crossref","unstructured":"Martin, D., Andreas, R., Fahad, K., & Michael, F. (2016). Beyond correlation filters: Learning continuous convolution operators for visual tracking. In European Conference on Computer Vision, pp. 472\u2013488.","DOI":"10.1007\/978-3-319-46454-1_29"},{"key":"2008_CR49","doi-asserted-by":"crossref","unstructured":"Mueller, M., Smith, N., & Ghanem, B. (2016). A benchmark and simulator for uav tracking. In European Conference on Computer Vision. Springer, pp. 445\u2013461.","DOI":"10.1007\/978-3-319-46448-0_27"},{"key":"2008_CR50","doi-asserted-by":"crossref","unstructured":"Mueller, M., Smith, N., & Ghanem, B. (2017). Context-aware correlation filter tracking. In IEEE Conference on Computer Vision and Pattern Recognition, pp. 1396\u20131404.","DOI":"10.1109\/CVPR.2017.152"},{"key":"2008_CR51","doi-asserted-by":"crossref","unstructured":"Ng, X. L., Ong, K. E., Zheng, Q., Ni, Y., & Liu, S. Y. Y. J. (2022). Animal kingdom: A large and diverse dataset for animal behavior understanding. arXiv:2204.08129.","DOI":"10.1109\/CVPR52688.2022.01844"},{"issue":"2","key":"2008_CR52","doi-asserted-by":"publisher","first-page":"430","DOI":"10.1007\/s11263-016-0957-7","volume":"126","author":"L Pigou","year":"2018","unstructured":"Pigou, L., Van Den Oord, A., Dieleman, S., Van Herreweghe, M., & Dambre, J. (2018). Beyond temporal pooling: Recurrence and temporal convolutions for gesture recognition in video. International Journal of Computer Vision, 126(2), 430\u2013439.","journal-title":"International Journal of Computer Vision"},{"issue":"8","key":"2008_CR53","doi-asserted-by":"publisher","first-page":"1084","DOI":"10.1007\/s11263-019-01156-6","volume":"127","author":"Y Sui","year":"2019","unstructured":"Sui, Y., Zhang, Z., Wang, G., Tang, Y., & Zhang, L. (2019). Exploiting the anisotropy of correlation filter learning for visual tracking. International Journal of Computer Vision, 127(8), 1084\u20131105.","journal-title":"International Journal of Computer Vision"},{"key":"2008_CR54","doi-asserted-by":"crossref","unstructured":"Tao, R., Gavves, E., & Smeulders, A. W. (2016). Siamese instance search for tracking. In IEEE Conference on Computer Vision and Pattern Recognition. IEEE, pp. 1420\u20131429.","DOI":"10.1109\/CVPR.2016.158"},{"key":"2008_CR55","doi-asserted-by":"crossref","unstructured":"Valmadre, J., Bertinetto, L., Henriques, J., Vedaldi, A., & Torr, P. H. (2017). End-to-end representation learning for correlation filter based tracking. In IEEE Conference on Computer Vision and Pattern Recognition. IEEE, pp. 5000\u20135008.","DOI":"10.1109\/CVPR.2017.531"},{"key":"2008_CR56","doi-asserted-by":"crossref","unstructured":"Wang, M., Liu, Y., & Huang, Z. (2017). Large margin object tracking with circulant feature maps. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4021\u20134029.","DOI":"10.1109\/CVPR.2017.510"},{"key":"2008_CR57","doi-asserted-by":"crossref","unstructured":"Wang, N., Shi, J., Yeung, D. Y., & Jia, J. (2015). Understanding and diagnosing visual tracking systems. In IEEE International Conference on Computer Vision. IEEE, pp. 3101\u20133109.","DOI":"10.1109\/ICCV.2015.355"},{"key":"2008_CR58","doi-asserted-by":"crossref","unstructured":"Wang, Q., Zhang, L., Bertinetto, L., Hu, W., & Torr, P. H. (2019). Fast online object tracking and segmentation: A unifying approach. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1328\u20131338.","DOI":"10.1109\/CVPR.2019.00142"},{"key":"2008_CR59","doi-asserted-by":"crossref","unstructured":"Wu, Y., Lim, J., & Yang, M. H. (2013). Online object tracking: A benchmark. In IEEE Conference on Computer Vision and Pattern Recognition, pp. 2411\u20132418","DOI":"10.1109\/CVPR.2013.312"},{"issue":"9","key":"2008_CR60","doi-asserted-by":"publisher","first-page":"1834","DOI":"10.1109\/TPAMI.2014.2388226","volume":"37","author":"Y Wu","year":"2015","unstructured":"Wu, Y., Lim, J., & Yang, M. H. (2015). Object tracking benchmark. IEEE Transactions on Pattern Analysis and Machine Intelligence, 37(9), 1834\u20131848.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2008_CR61","doi-asserted-by":"crossref","unstructured":"Xing, D., Evangeliou, N., Tsoukalas, A., & Tzes, A. (2022). Siamese transformer pyramid networks for real-time uav tracking. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 2139\u20132148.","DOI":"10.1109\/WACV51458.2022.00196"},{"key":"2008_CR62","doi-asserted-by":"crossref","unstructured":"Xu, T., Feng, Z. H., Wu, X. J., & Kittler, J. (2019a). Joint group feature selection and discriminative filter learning for robust visual object tracking. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7950\u20137960","DOI":"10.1109\/ICCV.2019.00804"},{"issue":"11","key":"2008_CR63","doi-asserted-by":"publisher","first-page":"5596","DOI":"10.1109\/TIP.2019.2919201","volume":"28","author":"T Xu","year":"2019","unstructured":"Xu, T., Feng, Z. H., Wu, X. J., & Kittler, J. (2019). Learning adaptive discriminative correlation filters via temporal consistency preserving spatial feature selection for robust visual object tracking. IEEE Transactions on Image Processing, 28(11), 5596\u20135609.","journal-title":"IEEE Transactions on Image Processing"},{"issue":"5","key":"2008_CR64","doi-asserted-by":"publisher","first-page":"1359","DOI":"10.1007\/s11263-021-01435-1","volume":"129","author":"T Xu","year":"2021","unstructured":"Xu, T., Feng, Z., Wu, X. J., & Kittler, J. (2021). Adaptive channel selection for robust visual object tracking with discriminative correlation filters. International Journal of Computer Vision, 129(5), 1359\u20131375.","journal-title":"International Journal of Computer Vision"},{"key":"2008_CR65","doi-asserted-by":"crossref","unstructured":"Xu, Y., Wang, Z., Li, Z., Yuan, Y., & Yu, G. (2020). Siamfc++: Towards robust and accurate visual tracking with target estimation guidelines. In The AAAI Conference on Artificial Intelligence, pp. 12549\u201312556.","DOI":"10.1609\/aaai.v34i07.6944"},{"key":"2008_CR66","doi-asserted-by":"crossref","unstructured":"Yan, B., Peng, H., Fu, J., Wang, D., & Lu, H. (2021). Learning spatio-temporal transformer for visual tracking. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10448\u201310457.","DOI":"10.1109\/ICCV48922.2021.01028"},{"key":"2008_CR67","unstructured":"Yu, H., Xu, Y., Zhang, J., Zhao, W., Guan, Z., & Tao, D. (2021). Ap-10k: A benchmark for animal pose estimation in the wild. arXiv:2108.12617."},{"key":"2008_CR68","doi-asserted-by":"crossref","unstructured":"Yu, Y., Yuan, J., Mittal, G., Fuxin, L., & Chen, M. (2022). Batman: Bilateral attention transformer in motion-appearance neighboring space for video object segmentation. In European Conference on Computer Vision. Springer, pp. 612\u2013629.","DOI":"10.1007\/978-3-031-19818-2_35"},{"key":"2008_CR69","doi-asserted-by":"crossref","unstructured":"Zhang, K., Zhang, L., Liu, Q., Zhang, D., & Yang, M. H. (2014). Fast visual tracking via dense spatio-temporal context learning. In European Conference on Computer Vision, pp. 127\u2013141.","DOI":"10.1007\/978-3-319-10602-1_9"},{"issue":"2","key":"2008_CR70","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1007\/s11263-012-0582-z","volume":"101","author":"T Zhang","year":"2013","unstructured":"Zhang, T., Ghanem, B., Liu, S., & Ahuja, N. (2013). Robust visual tracking via structured multi-task sparse learning. International Journal of Computer Vision, 101(2), 367\u2013383.","journal-title":"International Journal of Computer Vision"},{"key":"2008_CR71","doi-asserted-by":"crossref","unstructured":"Zhang, T., Bibi, A., & Ghanem, B. (2016). In defense of sparse tracking: Circulant sparse tracker. In IEEE Conference on Computer Vision and Pattern Recognition, pp. 3880\u20133888","DOI":"10.1109\/CVPR.2016.421"},{"key":"2008_CR72","doi-asserted-by":"crossref","unstructured":"Zhang, T., Xu, C., & Yang, M. H. (2017). Multi-task correlation particle filter for robust object tracking. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4335\u20134343.","DOI":"10.1109\/CVPR.2017.512"},{"issue":"8","key":"2008_CR73","doi-asserted-by":"publisher","first-page":"2002","DOI":"10.1007\/s11263-020-01308-z","volume":"128","author":"X Zheng","year":"2020","unstructured":"Zheng, X., Guo, Y., Huang, H., Li, Y., & He, R. (2020). A survey of deep facial attribute analysis. International Journal of Computer Vision, 128(8), 2002\u20132034.","journal-title":"International Journal of Computer Vision"},{"key":"2008_CR74","doi-asserted-by":"crossref","unstructured":"Zhu, Z., Wang, Q., Li, B., Wu, W., Yan, J., & Hu, W. (2018). Distractor-aware siamese networks for visual object tracking. In Proceedings of the European Conference on Computer Vision (ECCV), pp. 101\u2013117.","DOI":"10.1007\/978-3-030-01240-3_7"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02008-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-024-02008-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02008-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,17]],"date-time":"2024-08-17T06:55:00Z","timestamp":1723877700000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-024-02008-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,12]]},"references-count":74,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2024,7]]}},"alternative-id":["2008"],"URL":"https:\/\/doi.org\/10.1007\/s11263-024-02008-8","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2,12]]},"assertion":[{"value":"30 April 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 January 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 February 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}