{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T06:48:49Z","timestamp":1742971729304,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":37,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819985456"},{"type":"electronic","value":"9789819985463"}],"license":[{"start":{"date-parts":[[2023,12,26]],"date-time":"2023-12-26T00:00:00Z","timestamp":1703548800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,26]],"date-time":"2023-12-26T00:00:00Z","timestamp":1703548800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8546-3_38","type":"book-chapter","created":{"date-parts":[[2023,12,25]],"date-time":"2023-12-25T19:02:17Z","timestamp":1703530937000},"page":"469-480","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["CATrack: Convolution and\u00a0Attention Feature Fusion for\u00a0Visual Object Tracking"],"prefix":"10.1007","author":[{"given":"Longkun","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Jiajun","family":"Wen","sequence":"additional","affiliation":[]},{"given":"Zichen","family":"Dai","sequence":"additional","affiliation":[]},{"given":"Rouyi","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Zhihui","family":"Lai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,26]]},"reference":[{"key":"38_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"850","DOI":"10.1007\/978-3-319-48881-3_56","volume-title":"Computer Vision \u2013 ECCV 2016 Workshops","author":"L Bertinetto","year":"2016","unstructured":"Bertinetto, L., Valmadre, J., Henriques, J.F., Vedaldi, A., Torr, P.H.S.: Fully-convolutional siamese networks for object tracking. In: Hua, G., J\u00e9gou, H. (eds.) ECCV 2016. LNCS, vol. 9914, pp. 850\u2013865. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-48881-3_56"},{"key":"38_CR2","doi-asserted-by":"crossref","unstructured":"Bhat, G., Danelljan, M., Gool, L.V., Timofte, R.: Learning discriminative model prediction for tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6182\u20136191 (2019)","DOI":"10.1109\/ICCV.2019.00628"},{"key":"38_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-End object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"key":"38_CR4","doi-asserted-by":"publisher","first-page":"1882","DOI":"10.1109\/TIP.2022.3148876","volume":"31","author":"S Chan","year":"2022","unstructured":"Chan, S., Tao, J., Zhou, X., Bai, C., Zhang, X.: Siamese implicit region proposal network with compound attention for visual tracking. IEEE Trans. Image Process. 31, 1882\u20131894 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"38_CR5","doi-asserted-by":"crossref","unstructured":"Chen, X., Yan, B., Zhu, J., Wang, D., Yang, X., Lu, H.: Transformer tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8126\u20138135 (2021)","DOI":"10.1109\/CVPR46437.2021.00803"},{"key":"38_CR6","unstructured":"Cui, Y., Jiang, C., Wang, L., Wu, G.: Target transformed regression for accurate tracking. arXiv preprint arXiv:2104.00403 (2021)"},{"key":"38_CR7","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Bhat, G., Khan, F.S., Felsberg, M.: Atom: accurate tracking by overlap maximization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4660\u20134669 (2019)","DOI":"10.1109\/CVPR.2019.00479"},{"key":"38_CR8","doi-asserted-by":"crossref","unstructured":"d\u2019Ascoli, S., Touvron, H., Leavitt, M.L., Morcos, A.S., Biroli, G., Sagun, L.: Convit: improving vision transformers with soft convolutional inductive biases. In: International Conference on Machine Learning, pp. 2286\u20132296. PMLR (2021)","DOI":"10.1088\/1742-5468\/ac9830"},{"key":"38_CR9","unstructured":"Dosovitskiy, A., et al.: An image is worth 16$$\\times $$16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"38_CR10","doi-asserted-by":"crossref","unstructured":"Fan, H., et al.: Lasot: a high-quality benchmark for large-scale single object tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5374\u20135383 (2019)","DOI":"10.1109\/CVPR.2019.00552"},{"key":"38_CR11","doi-asserted-by":"crossref","unstructured":"Fu, Z., Liu, Q., Fu, Z., Wang, Y.: Stmtrack: template-free visual tracking with space-time memory networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13774\u201313783 (2021)","DOI":"10.1109\/CVPR46437.2021.01356"},{"key":"38_CR12","doi-asserted-by":"crossref","unstructured":"Graham, B., et al.: Levit: a vision transformer in convnet\u2019s clothing for faster inference. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12259\u201312269 (2021)","DOI":"10.1109\/ICCV48922.2021.01204"},{"key":"38_CR13","doi-asserted-by":"crossref","unstructured":"Guo, D., Shao, Y., Cui, Y., Wang, Z., Zhang, L., Shen, C.: Graph attention tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9543\u20139552 (2021)","DOI":"10.1109\/CVPR46437.2021.00942"},{"key":"38_CR14","doi-asserted-by":"crossref","unstructured":"Guo, Q., Feng, W., Zhou, C., Huang, R., Wan, L., Wang, S.: Learning dynamic siamese network for visual object tracking. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1763\u20131771 (2017)","DOI":"10.1109\/ICCV.2017.196"},{"key":"38_CR15","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"5","key":"38_CR16","doi-asserted-by":"publisher","first-page":"1562","DOI":"10.1109\/TPAMI.2019.2957464","volume":"43","author":"L Huang","year":"2019","unstructured":"Huang, L., Zhao, X., Huang, K.: Got-10k: a large high-diversity benchmark for generic object tracking in the wild. IEEE Trans. Pattern Anal. Mach. Intell. 43(5), 1562\u20131577 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"38_CR17","doi-asserted-by":"crossref","unstructured":"Huang, L., Zhao, X., Huang, K.: Globaltrack: a simple and strong baseline for long-term tracking. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 11037\u201311044 (2020)","DOI":"10.1609\/aaai.v34i07.6758"},{"key":"38_CR18","doi-asserted-by":"crossref","unstructured":"Li, B., Wu, W., Wang, Q., Zhang, F., Xing, J., Yan, J.: Siamrpn++: evolution of siamese visual tracking with very deep networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4282\u20134291 (2019)","DOI":"10.1109\/CVPR.2019.00441"},{"key":"38_CR19","doi-asserted-by":"crossref","unstructured":"Li, B., Yan, J., Wu, W., Zhu, Z., Hu, X.: High performance visual tracking with siamese region proposal network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8971\u20138980 (2018)","DOI":"10.1109\/CVPR.2018.00935"},{"key":"38_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"38_CR21","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"38_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"445","DOI":"10.1007\/978-3-319-46448-0_27","volume-title":"Computer Vision \u2013 ECCV 2016","author":"M Mueller","year":"2016","unstructured":"Mueller, M., Smith, N., Ghanem, B.: A benchmark and simulator for UAV tracking. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 445\u2013461. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_27"},{"key":"38_CR23","doi-asserted-by":"crossref","unstructured":"Muller, M., Bibi, A., Giancola, S., Alsubaihi, S., Ghanem, B.: Trackingnet: a large-scale dataset and benchmark for object tracking in the wild. In: Proceedings of the European conference on computer vision (ECCV), pp. 300\u2013317 (2018)","DOI":"10.1007\/978-3-030-01246-5_19"},{"key":"38_CR24","doi-asserted-by":"crossref","unstructured":"Nam, H., Han, B.: Learning multi-domain convolutional neural networks for visual tracking. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4293\u20134302 (2016)","DOI":"10.1109\/CVPR.2016.465"},{"key":"38_CR25","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vision 115, 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vision"},{"key":"38_CR26","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"38_CR27","doi-asserted-by":"crossref","unstructured":"Voigtlaender, P., Luiten, J., Torr, P.H., Leibe, B.: SIAM R-CNN: visual tracking by re-detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6578\u20136588 (2020)","DOI":"10.1109\/CVPR42600.2020.00661"},{"key":"38_CR28","doi-asserted-by":"publisher","unstructured":"Wang, J., Zhang, H., Zhang, J., Miao, M., Zhang, J.: Dual-branch memory network for visual object tracking. In: Pattern Recognition and Computer Vision: 5th Chinese Conference, PRCV 2022, Shenzhen, China, 4\u20137 November 2022, 2022, Proceedings, Part IV, pp. 646\u2013658. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-18916-6_51","DOI":"10.1007\/978-3-031-18916-6_51"},{"key":"38_CR29","doi-asserted-by":"crossref","unstructured":"Wang, N., Zhou, W., Wang, J., Li, H.: Transformer meets tracker: exploiting temporal context for robust visual tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1571\u20131580 (2021)","DOI":"10.1109\/CVPR46437.2021.00162"},{"key":"38_CR30","doi-asserted-by":"crossref","unstructured":"Wang, Q., Zhang, L., Bertinetto, L., Hu, W., Torr, P.H.: Fast online object tracking and segmentation: a unifying approach. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1328\u20131338 (2019)","DOI":"10.1109\/CVPR.2019.00142"},{"key":"38_CR31","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/j.neucom.2022.09.066","volume":"512","author":"B Wei","year":"2022","unstructured":"Wei, B., Chen, H., Ding, Q., Luo, H.: Siamagn: siamese attention-guided network for visual tracking. Neurocomputing 512, 69\u201382 (2022)","journal-title":"Neurocomputing"},{"key":"38_CR32","doi-asserted-by":"crossref","unstructured":"Xu, Y., Wang, Z., Li, Z., Yuan, Y., Yu, G.: Siamfc++: towards robust and accurate visual tracking with target estimation guidelines. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 12549\u201312556 (2020)","DOI":"10.1609\/aaai.v34i07.6944"},{"issue":"12","key":"38_CR33","first-page":"3976","volume":"16","author":"J Zhang","year":"2022","unstructured":"Zhang, J., Wang, J., Zhang, H., Miao, M., Cai, Z., Chen, F.: Multi-level cross-attention siamese network for visual object tracking. KSII Trans. Internet Inf. Syst. 16(12), 3976\u20133989 (2022)","journal-title":"KSII Trans. Internet Inf. Syst."},{"key":"38_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Liu, Y., Wang, X., Li, B., Hu, W.: Learn to match: automatic matching network design for visual tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13339\u201313348 (2021)","DOI":"10.1109\/ICCV48922.2021.01309"},{"key":"38_CR35","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Peng, H.: Deeper and wider siamese networks for real-time visual tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4591\u20134600 (2019)","DOI":"10.1109\/CVPR.2019.00472"},{"key":"38_CR36","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"771","DOI":"10.1007\/978-3-030-58589-1_46","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Z Zhang","year":"2020","unstructured":"Zhang, Z., Peng, H., Fu, J., Li, B., Hu, W.: Ocean: object-aware anchor-free tracking. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12366, pp. 771\u2013787. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58589-1_46"},{"key":"38_CR37","doi-asserted-by":"crossref","unstructured":"Zhu, Z., Wang, Q., Li, B., Wu, W., Yan, J., Hu, W.: Distractor-aware siamese networks for visual object tracking. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 101\u2013117 (2018)","DOI":"10.1007\/978-3-030-01240-3_7"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8546-3_38","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,25]],"date-time":"2023-12-25T19:17:29Z","timestamp":1703531849000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8546-3_38"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,26]]},"ISBN":["9789819985456","9789819985463"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8546-3_38","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023,12,26]]},"assertion":[{"value":"26 December 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Xiamen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/prcv2023.xmu.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1420","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"532","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,78","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3,69","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}