{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,18]],"date-time":"2025-04-18T04:11:38Z","timestamp":1744949498068,"version":"3.40.4"},"reference-count":67,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,12,19]],"date-time":"2024-12-19T00:00:00Z","timestamp":1734566400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,19]],"date-time":"2024-12-19T00:00:00Z","timestamp":1734566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62306294","62071122","62121002"],"award-info":[{"award-number":["62306294","62071122","62121002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004739","name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences","doi-asserted-by":"publisher","award":["2018166"],"award-info":[{"award-number":["2018166"]}],"id":[{"id":"10.13039\/501100004739","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1007\/s11263-024-02307-0","type":"journal-article","created":{"date-parts":[[2024,12,19]],"date-time":"2024-12-19T18:21:30Z","timestamp":1734632490000},"page":"2950-2966","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning Discriminative Features for Visual Tracking via Scenario Decoupling"],"prefix":"10.1007","volume":"133","author":[{"given":"Yinchao","family":"Ma","sequence":"first","affiliation":[]},{"given":"Qianjin","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Wenfei","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0764-6106","authenticated-orcid":false,"given":"Tianzhu","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jinpeng","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,19]]},"reference":[{"key":"2307_CR1","doi-asserted-by":"crossref","unstructured":"Arnab, A., Dehghani, M., Heigold, G., Sun, C., Lu\u010di\u0107, M., & Schmid, C. (2021). Vivit: A video vision transformer. In Proceedings of the IEEE international conference on computer vision (pp. 6836\u20136846).","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"2307_CR2","doi-asserted-by":"crossref","unstructured":"Bertinetto, L., Valmadre, J., Henriques, J. F., Vedaldi, A., & Torr, P. H. S. (2016). Fully-convolutional siamese networks for object tracking. In Proceedings of the European conference on computer vision workshops.","DOI":"10.1007\/978-3-319-48881-3_56"},{"key":"2307_CR3","doi-asserted-by":"crossref","unstructured":"Bhat, G., Danelljan, M., Gool, L. V., & Timofte, R. (2019). Learning discriminative model prediction for tracking. In Proceedings of the IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2019.00628"},{"key":"2307_CR4","doi-asserted-by":"crossref","unstructured":"Bhat, G., Danelljan, M., Gool, L.V., & Timofte, R. (2020). Know your surroundings: Exploiting scene information for object tracking. Proceedings of the European conference on computer vision (pp. 205\u2013221).","DOI":"10.1007\/978-3-030-58592-1_13"},{"key":"2307_CR5","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., & Zagoruyko, S. (2020). End-to-end object detection with transformers. In Proceedings of the European conference on computer vision.","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"2307_CR6","doi-asserted-by":"crossref","unstructured":"Chen, B., Li, P., Bai, L., Qiao, L., Shen, Q., Li, B., Gan, W., Wu, W. & Ouyang, W. (2022). Backbone is all your need: A simplified architecture for visual object tracking. In Proceedings of the European conference on computer vision.","DOI":"10.1007\/978-3-031-20047-2_22"},{"key":"2307_CR7","doi-asserted-by":"crossref","unstructured":"Chen, J., & Ho, C. M. (2022). Mm-vit: Multi-modal video transformer for compressed video action recognition. In Proceedings of the IEEE\/CVF winter conference on applications of computer vision (pp. 1910\u20131921).","DOI":"10.1109\/WACV51458.2022.00086"},{"issue":"07","key":"2307_CR8","doi-asserted-by":"publisher","first-page":"8507","DOI":"10.1109\/TPAMI.2022.3232535","volume":"45","author":"X Chen","year":"2023","unstructured":"Chen, X., Yan, B., Zhu, J., Lu, H., Ruan, X., & Wang, D. (2023). High-performance transformer tracking. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(07), 8507\u20138523. https:\/\/doi.org\/10.1109\/TPAMI.2022.3232535","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2307_CR9","doi-asserted-by":"crossref","unstructured":"Chen, X., Yan, B., Zhu, J., Wang, D., Yang, X., & Lu, H. (2021). Transformer tracking. In Proceedings of IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR46437.2021.00803"},{"key":"2307_CR10","doi-asserted-by":"crossref","unstructured":"Chen, Z., Zhong, B., Li, G., Zhang, S., & Ji, R. (2020). Siamese box adaptive network for visual tracking. In Proceedings of IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR42600.2020.00670"},{"key":"2307_CR11","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A. G., Kirillov, A., & Girdhar, R. (2022). Masked-attention mask transformer for universal image segmentation. In Proceedings of IEEE conference on computer vision and pattern recognition (pp. 1290\u20131299).","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"2307_CR12","unstructured":"Cui, Y., Guo, D., Shao, Y., Wang, Z., Shen, C., Zhang, L., & Chen, S. (2022a). Joint classification and regression for visual tracking with fully convolutional siamese networks (pp. 1\u201317). International Journal of Computer Vision."},{"key":"2307_CR13","doi-asserted-by":"crossref","unstructured":"Cui, Y., et al. (2022b). Fully convolutional online tracking. Computer Vision and Image Understanding, 224, 103547.","DOI":"10.1016\/j.cviu.2022.103547"},{"key":"2307_CR14","doi-asserted-by":"publisher","unstructured":"Cui, Y., Jiang, C., Wang, L., Wu, G. (2024a). Mixformer: End-to-end tracking with iterative mixed attention. IEEE Transactions on Pattern Analysis and Machine Intelligence. https:\/\/doi.org\/10.1109\/TPAMI.2024.3349519","DOI":"10.1109\/TPAMI.2024.3349519"},{"key":"2307_CR15","unstructured":"Cui, Y., Jiang, C., Wang, L., Wu, G. (2024b). Mixformerv2: Efficient fully transformer tracking. Advances of Neural Information Processing Systems, 36."},{"key":"2307_CR16","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Bhat, G., Khan, F. S., & Felsberg, M. (2017). ECO: Efficient convolution operators for tracking. In Proceedings of IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2017.733"},{"key":"2307_CR17","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Bhat, G., Khan, F. S., & Felsberg, M. (2019). ATOM: Accurate tracking by overlap maximization. In Proceedings of IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2019.00479"},{"key":"2307_CR18","doi-asserted-by":"crossref","unstructured":"Danelljan, M., et\u00a0al. (2020). Probabilistic regression for visual tracking. In Proceedings of IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR42600.2020.00721"},{"key":"2307_CR19","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J. & Houlsby, N. (2021). An image is worth 16x16 words: Transformers for image recognition at scale. In International Conference on learning representations."},{"key":"2307_CR20","doi-asserted-by":"crossref","unstructured":"Du, F., et\u00a0al. (2020). Correlation-guided attention for corner detection based visual tracking. In Proceedings of IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR42600.2020.00687"},{"issue":"2","key":"2307_CR21","doi-asserted-by":"publisher","first-page":"439","DOI":"10.1007\/s11263-020-01387-y","volume":"129","author":"H Fan","year":"2021","unstructured":"Fan, H., Bai, H., Lin, L., Yang, F., Chu, P., Deng, G., Yu, S., Huang, M., Liu, J., Xu, Y., et al. (2021). Lasot: A high-quality large-scale single object tracking benchmark. International Journal of Computer Vision, 129(2), 439\u2013461.","journal-title":"International Journal of Computer Vision"},{"key":"2307_CR22","doi-asserted-by":"crossref","unstructured":"Fan, H., Lin, L., Yang, F., Chu, P., Deng, G., Yu, S., Bai, H., Xu, Y., Liao, C., & Ling, H. (2019). LaSOT: A high-quality benchmark for large-scale single object tracking. In Proceedings of IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2019.00552"},{"key":"2307_CR23","doi-asserted-by":"crossref","unstructured":"Gao, S., et\u00a0al. (2022). Aiatrack: Attention in attention for transformer visual tracking. In Proceedings of the European conference on computer vision (pp. 146\u2013164).","DOI":"10.1007\/978-3-031-20047-2_9"},{"key":"2307_CR24","unstructured":"Glorot, X., & Bengio, Y. (2010). Understanding the difficulty of training deep feedforward neural networks. In Proceedings of the international conference on artificial intelligence and statistics."},{"key":"2307_CR25","doi-asserted-by":"crossref","unstructured":"Guo, D., Shao, Y., Cui, Y., Wang, Z., Zhang, L., & Shen, C. (2021). Graph attention tracking. In Proceedings of IEEE conference on computer vision and pattern recognition (pp. 9543\u20139552).","DOI":"10.1109\/CVPR46437.2021.00942"},{"key":"2307_CR26","doi-asserted-by":"publisher","first-page":"7128","DOI":"10.1109\/TIP.2020.2998978","volume":"29","author":"R Han","year":"2020","unstructured":"Han, R., Feng, W., & Wang, S. (2020). Fast learning of spatially regularized and content aware correlation filter for visual tracking. IEEE Transactions on Image Processing, 29, 7128\u20137140.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2307_CR27","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., & Girshick, R. (2022). Masked autoencoders are scalable vision learners. In Proceedings of IEEE conference on computer vision and pattern recognition (pp. 16000\u201316009).","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"2307_CR28","doi-asserted-by":"crossref","unstructured":"Huang, L., Zhao, X., & Huang, K. (2018). Got-10k: A large high-diversity benchmark for generic object tracking in the wild. IEEE Transactions on Pattern Analysis and Machine Intelligence, 43, 1562\u20131577.","DOI":"10.1109\/TPAMI.2019.2957464"},{"key":"2307_CR29","doi-asserted-by":"crossref","unstructured":"Jiang, B., et\u00a0al. (2018). Acquisition of localization confidence for accurate object detection. In Proceedings of the European conference on computer vision.","DOI":"10.1007\/978-3-030-01264-9_48"},{"key":"2307_CR30","doi-asserted-by":"crossref","unstructured":"Kiani\u00a0Galoogahi, H., et\u00a0al. (2017). Need for speed: A benchmark for higher frame rate object tracking. In Proceedings of the IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2017.128"},{"key":"2307_CR31","unstructured":"Kristan, M., Leonardis, A., Matas, J., Felsberg, M., Pflugfelder, R., K\u00e4m\u00e4r\u00e4inen, J.-K., Chang, H. J., Danelljan, M., Zajc, L. \u010c., Luke\u017ei\u010d, A., et al. (2022). The tenth visual object tracking vot2022 challenge results. In Proceedings of the European conference on computer vision (pp. 431\u2013460)."},{"key":"2307_CR32","doi-asserted-by":"crossref","unstructured":"Law, H., & Deng, J. (2018). Cornernet: Detecting objects as paired keypoints. In Proceedings of the European conference on computer vision (pp. 734\u2013750).","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"2307_CR33","doi-asserted-by":"crossref","unstructured":"Li, B., Wu, W., Wang, Q., Zhang, F., Xing, J., & Yan, J. (2019). SiamRPN++: Evolution of siamese visual tracking with very deep networks. In Proceedings of IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2019.00441"},{"key":"2307_CR34","doi-asserted-by":"crossref","unstructured":"Li, B., Yan, J., Wu, W., Zhu, Z., & Hu, X. (2018). High performance visual tracking with siamese region proposal network. In Proceedings of IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2018.00935"},{"key":"2307_CR35","first-page":"16743","volume":"35","author":"L Lin","year":"2022","unstructured":"Lin, L., Fan, H., Zhang, Z., Xu, Y., & Ling, H. (2022). Swintrack: A simple and strong baseline for transformer tracking. Advances in Neural Information Processing Systems, 35, 16743\u201316754.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2307_CR36","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S. J., Bourdev, L. D., Girshick, R. B., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., & Zitnick, C. L. (2014). Microsoft COCO: Common objects in context. In Proceedings of the European conference on computer vision.","DOI":"10.1007\/978-3-319-10602-1_48"},{"issue":"8","key":"2307_CR37","doi-asserted-by":"publisher","first-page":"1347","DOI":"10.1109\/TMM.2015.2443559","volume":"17","author":"S Liu","year":"2015","unstructured":"Liu, S., Liang, X., Liu, L., Lu, K., Lin, L., Cao, X., & Yan, S. (2015). Fashion parsing with video context. IEEE Transactions on Multimedia, 17(8), 1347\u20131358.","journal-title":"IEEE Transactions on Multimedia"},{"key":"2307_CR38","doi-asserted-by":"publisher","first-page":"5427","DOI":"10.1109\/TIP.2022.3195321","volume":"31","author":"X Liu","year":"2022","unstructured":"Liu, X., Wang, Q., Hu, Y., Tang, X., Zhang, S., Bai, S., & Bai, X. (2022). End-to-end temporal action detection with transformer. IEEE Transactions on Image Processing, 31, 5427\u20135441.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2307_CR39","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B., & Guo, B. (2021). Swin transformer: Hierarchical vision transformer using shifted windows. In Proceedings of the IEEE international conference on computer vision (pp. 10012\u201310022).","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2307_CR40","unstructured":"Loshchilov, I., & Hutter, F. (2018). Decoupled weight decay regularization. In International conference on learning representations."},{"key":"2307_CR41","doi-asserted-by":"crossref","unstructured":"Lukezic, A., et\u00a0al. (2020). D3S: A discriminative single shot segmentation tracker. In Proceedings of IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR42600.2020.00716"},{"issue":"10","key":"2307_CR42","doi-asserted-by":"publisher","first-page":"11443","DOI":"10.1109\/TPAMI.2023.3275034","volume":"45","author":"Y Ma","year":"2023","unstructured":"Ma, Y., et al. (2023). Adaptive part mining for robust visual tracking. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(10), 11443\u201311457.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2307_CR43","doi-asserted-by":"crossref","unstructured":"Mayer, C., Danelljan, M., Paudel, D. P., & Van\u00a0Gool, L. (2021). Learning target candidate association to keep track of what not to track. In Proceedings of the IEEE international conference on computer vision (pp. 13444\u201313454).","DOI":"10.1109\/ICCV48922.2021.01319"},{"key":"2307_CR44","doi-asserted-by":"crossref","unstructured":"Mueller, M., et\u00a0al. (2016). A benchmark and simulator for UAV tracking. In Proceedings of the European conference on computer vision.","DOI":"10.1007\/978-3-319-46448-0_27"},{"key":"2307_CR45","doi-asserted-by":"crossref","unstructured":"Muller, M., Bibi, A., Giancola, S., Alsubaihi, S., & Ghanem, B. (2018). TrackingNet: A large-scale dataset and benchmark for object tracking in the wild. In Proceedings of the European conference on computer vision.","DOI":"10.1007\/978-3-030-01246-5_19"},{"key":"2307_CR46","doi-asserted-by":"crossref","unstructured":"Nam, H., & Han, B. (2016). Learning multi-domain convolutional neural networks for visual tracking. In Proceedings of IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2016.465"},{"key":"2307_CR47","unstructured":"Noman, M., Ghallabi, W. A., Najiha, D., Mayer, C., Dudhane, A., Danelljan, M., Cholakkal, H., Khan, S., Van Gool, L. & Khan, F.S. (2022). Avist: A benchmark for visual object tracking in adverse visibility. In British machine vision conference."},{"key":"2307_CR48","doi-asserted-by":"crossref","unstructured":"Rezatofighi, H., Tsoi, N., Gwak, J., Sadeghian, A., Reid, I. D., & Savarese, S. (2019). Generalized intersection over union: A metric and a loss for bounding box regression. In Proceedings of IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2019.00075"},{"key":"2307_CR49","doi-asserted-by":"crossref","unstructured":"Strudel, R., Garcia, R., Laptev, I., & Schmid, C. (2021). Segmenter: Transformer for semantic segmentation. In Proceedings of the IEEE international conference on computer vision (pp. 7262\u20137272).","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"2307_CR50","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., Kaiser, L. & Polosukhin, I. (2017). Attention is all you need. In Advances of neural information processing systems."},{"key":"2307_CR51","doi-asserted-by":"crossref","unstructured":"Voigtlaender, P., Luiten, J., Torr, P. H. S., & Leibe, B. (2020). Siam R-CNN: Visual tracking by re-detection. In Proceedings of IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR42600.2020.00661"},{"key":"2307_CR52","doi-asserted-by":"crossref","unstructured":"Wang, G., et\u00a0al. (2020). Tracking by Instance Detection: A meta-learning approach. In Proceedings of IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR42600.2020.00632"},{"key":"2307_CR53","doi-asserted-by":"crossref","unstructured":"Wang, N., et\u00a0al. (2021a). Transformer meets tracker: Exploiting temporal context for robust visual tracking. In Proceedings of IEEE conference on computer vision and pattern recognition (pp. 1571\u20131580).","DOI":"10.1109\/CVPR46437.2021.00162"},{"key":"2307_CR54","doi-asserted-by":"crossref","unstructured":"Wang, W., Xie, E., Li, X., Fan, D.-P., Song, K., Liang, D., Lu, T., Luo, P., & Shao, L. (2021b). Pyramid vision transformer: A versatile backbone for dense prediction without convolutions. In Proceedings of the IEEE international conference on computer vision (pp. 568\u2013578).","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"2307_CR55","doi-asserted-by":"crossref","unstructured":"Wang, X., Shu, X., Zhang, Z., Jiang, B., Wang, Y., Tian, Y., & Wu, F. (2021c). Towards more flexible and accurate object tracking with natural language: Algorithms and benchmark. In Proceedings of IEEE conference on computer vision and pattern recognition (pp. 13763\u201313773).","DOI":"10.1109\/CVPR46437.2021.01355"},{"key":"2307_CR56","doi-asserted-by":"publisher","unstructured":"Wu, Y., Lim, J., & Yang, M.-H. (2015). Object tracking benchmark. IEEE Transactions on Pattern Analysis and Machine Intelligence, 37(9), 1834\u20131848. https:\/\/doi.org\/10.1109\/TPAMI.2014.2388226.","DOI":"10.1109\/TPAMI.2014.2388226"},{"key":"2307_CR57","doi-asserted-by":"crossref","unstructured":"Xin, C., et\u00a0al. (2023). Seqtrack: Sequence to sequence learning for visual object tracking. In Proceedings of IEEE conference on computer vision and pattern recognition (pp. 14572\u201314581).","DOI":"10.1109\/CVPR52729.2023.01400"},{"key":"2307_CR58","doi-asserted-by":"publisher","first-page":"1359","DOI":"10.1007\/s11263-021-01435-1","volume":"129","author":"T Xu","year":"2021","unstructured":"Xu, T., Feng, Z., Wu, X.-J., & Kittler, J. (2021). Adaptive channel selection for robust visual object tracking with discriminative correlation filters. International Journal of Computer Vision, 129, 1359\u20131375.","journal-title":"International Journal of Computer Vision"},{"key":"2307_CR59","doi-asserted-by":"crossref","unstructured":"Xu, Y., Wang, Z., Li, Z., Yuan, Y., & Yu, G. (2020). SiamFC++: Towards robust and accurate visual tracking with target estimation guidelines. In Proceedings of the AAAI conference on artificial intelligence.","DOI":"10.1609\/aaai.v34i07.6944"},{"key":"2307_CR60","doi-asserted-by":"crossref","unstructured":"Yan, B., Peng, H., Fu, J., Wang, D., & Lu, H. (2021). Learning spatio-temporal transformer for visual tracking. In Proceedings of the IEEE international conference on computer vision (pp. 10448\u201310457).","DOI":"10.1109\/ICCV48922.2021.01028"},{"key":"2307_CR61","doi-asserted-by":"crossref","unstructured":"Ye, B., Chang, H., Ma, B., & Shan, S. (2022). Joint feature learning and relation modeling for tracking: A one-stream framework. In Proceedings of the European conference on computer vision.","DOI":"10.1007\/978-3-031-20047-2_20"},{"issue":"4","key":"2307_CR62","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1145\/1177352.1177355","volume":"38","author":"A Yilmaz","year":"2006","unstructured":"Yilmaz, A., Javed, O., & Shah, M. (2006). Object tracking: A survey. ACM Computing Surveys, 38(4), 13.","journal-title":"ACM Computing Surveys"},{"key":"2307_CR63","unstructured":"Yutao, C., Cheng, J., Wang, L., & Wu, G. (2022). Mixformer: End-to-end tracking with iterative mixed attention. In Proceedings of IEEE conference on computer vision and pattern recognition."},{"key":"2307_CR64","doi-asserted-by":"crossref","unstructured":"Zhang, C., Wu, J., & Li, Y. (2022). Actionformer: Localizing moments of actions with transformers. In Proceedings of the European conference on computer vision.","DOI":"10.1007\/978-3-031-19772-7_29"},{"issue":"2","key":"2307_CR65","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1007\/s11263-012-0582-z","volume":"101","author":"T Zhang","year":"2013","unstructured":"Zhang, T., et al. (2013). Robust visual tracking via structured multi-task sparse learning. International Journal of Computer Vision, 101(2), 367\u2013383.","journal-title":"International Journal of Computer Vision"},{"key":"2307_CR66","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1007\/s11263-014-0738-0","volume":"111","author":"T Zhang","year":"2015","unstructured":"Zhang, T., et al. (2015). Robust visual tracking via consistent low-rank sparse learning. International Journal of Computer Vision, 111, 171\u2013190.","journal-title":"International Journal of Computer Vision"},{"key":"2307_CR67","doi-asserted-by":"crossref","unstructured":"Zhang, Z., et\u00a0al. (2020). Ocean: Object-aware anchor-free tracking. In Proceedings of the European conference on computer vision.","DOI":"10.1007\/978-3-030-58589-1_46"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02307-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-024-02307-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02307-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,17]],"date-time":"2025-04-17T06:02:39Z","timestamp":1744869759000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-024-02307-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,19]]},"references-count":67,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,5]]}},"alternative-id":["2307"],"URL":"https:\/\/doi.org\/10.1007\/s11263-024-02307-0","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"type":"print","value":"0920-5691"},{"type":"electronic","value":"1573-1405"}],"subject":[],"published":{"date-parts":[[2024,12,19]]},"assertion":[{"value":"7 January 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 November 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 December 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Confict of interest"}}]}}