{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T22:56:09Z","timestamp":1743029769173,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":32,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819620609"},{"type":"electronic","value":"9789819620616"}],"license":[{"start":{"date-parts":[[2024,12,31]],"date-time":"2024-12-31T00:00:00Z","timestamp":1735603200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,31]],"date-time":"2024-12-31T00:00:00Z","timestamp":1735603200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-2061-6_4","type":"book-chapter","created":{"date-parts":[[2024,12,30]],"date-time":"2024-12-30T05:46:53Z","timestamp":1735537613000},"page":"45-59","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["HCV: Lightweight Hybrid CNN-Vision Transformer for\u00a0Visual Object Tracking"],"prefix":"10.1007","author":[{"given":"Liang-Chia","family":"Chen","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5722-7239","authenticated-orcid":false,"given":"Wei-Ta","family":"Chu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,31]]},"reference":[{"key":"4_CR1","doi-asserted-by":"crossref","unstructured":"Blatter, P., Kanakis, M., Danelljan, M., Van\u00a0Gool, L.: Efficient visual tracking with exemplar transformers. In: Proceedings of IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1571\u20131581 (2023)","DOI":"10.1109\/WACV56688.2023.00162"},{"key":"4_CR2","doi-asserted-by":"crossref","unstructured":"Borsuk, V., Vei, R., Kupyn, O., Martyniuk, T., Krashenyi, I., Matas, J.: FEAR: fast, efficient, accurate and robust visual tracker. In: Proceedings of European Conference on Computer Vision, pp. 644\u2013663 (2022)","DOI":"10.1007\/978-3-031-20047-2_37"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Cai, Y., Liu, J., Tang, J., Wu, G.: Robust object modeling for visual tracking. In: Proceedings of IEEE\/CVF International Conference on Computer Vision, pp. 9555\u20139566 (2023)","DOI":"10.1109\/ICCV51070.2023.00879"},{"key":"4_CR4","doi-asserted-by":"crossref","unstructured":"Chen, X., Kang, B., Wang, D., Li, D., Lu, H.: Efficient visual tracking via hierarchical cross-attention transformer. In: Proceedings of European Conference on Computer Vision, pp. 461\u2013477 (2022)","DOI":"10.1007\/978-3-031-25085-9_26"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Chen, X., Yan, B., Zhu, J., Wang, D., Yang, X., Lu, H.: Transformer tracking. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8126\u20138135 (2021)","DOI":"10.1109\/CVPR46437.2021.00803"},{"key":"4_CR6","doi-asserted-by":"crossref","unstructured":"Cui, Y., Jiang, C., Wang, L., Wu, G.: MixFormer: end-to-end tracking with iterative mixed attention. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13608\u201313618 (2022)","DOI":"10.1109\/CVPR52688.2022.01324"},{"key":"4_CR7","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Bhat, G., Khan, F.S., Felsberg, M.: ATOM: accurate tracking by overlap maximization. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4660\u20134669 (2019)","DOI":"10.1109\/CVPR.2019.00479"},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Danelljan, M., Bhat, G., Shahbaz\u00a0Khan, F., Felsberg, M.: ECO: efficient convolution operators for tracking. In: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, pp. 6638\u20136646 (2017)","DOI":"10.1109\/CVPR.2017.733"},{"key":"4_CR9","unstructured":"Dosovitskiy, A., et al.: An image is worth 16$$\\,\\times \\,$$16 words: transformers for image recognition at scale. In: Proceedings of International Conference on Learning Representations (2021)"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Fan, H., et al.: LaSOT: a high-quality benchmark for large-scale single object tracking. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5374\u20135383 (2019)","DOI":"10.1109\/CVPR.2019.00552"},{"key":"4_CR11","doi-asserted-by":"crossref","unstructured":"Gopal, G., Amer, M.: Separable self and mixed attention transformers for efficient object tracking. In: Proceedings of IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 6694\u20136703 (2024)","DOI":"10.1109\/WACV57701.2024.00657"},{"key":"4_CR12","doi-asserted-by":"crossref","unstructured":"Howard, A., et\u00a0al.: Searching for MobileNetV3. In: Proceedings of IEEE\/CVF International Conference on Computer Vision, pp. 1314\u20131324 (2019)","DOI":"10.1109\/ICCV.2019.00140"},{"issue":"5","key":"4_CR13","doi-asserted-by":"publisher","first-page":"1562","DOI":"10.1109\/TPAMI.2019.2957464","volume":"43","author":"L Huang","year":"2019","unstructured":"Huang, L., Zhao, X., Huang, K.: GOT-10k: a large high-diversity benchmark for generic object tracking in the wild. IEEE Trans. Pattern Anal. Mach. Intell. 43(5), 1562\u20131577 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4_CR14","doi-asserted-by":"crossref","unstructured":"Kang, B., Chen, X., Wang, D., Peng, H., Lu, H.: Exploring lightweight hierarchical vision transformers for efficient visual tracking. In: Proceedings of IEEE\/CVF International Conference on Computer Vision, pp. 9612\u20139621 (2023)","DOI":"10.1109\/ICCV51070.2023.00881"},{"key":"4_CR15","unstructured":"Kristan, M., et\u00a0al.: The seventh visual object tracking VOT2019 challenge results. In: Proceedings of IEEE\/CVF International Conference on Computer Vision Workshops (2019)"},{"key":"4_CR16","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. In: Proceedings of Advances in Neural Information Processing Systems, vol.\u00a025 (2012)"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Law, H., Deng, J.: CornerNet: detecting objects as paired keypoints. In: Proceedings of European Conference on Computer Vision, pp. 734\u2013750 (2018)","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Li, B., Wu, W., Wang, Q., Zhang, F., Xing, J., Yan, J.: SiamRPN++: evolution of Siamese visual tracking with very deep networks. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4282\u20134291 (2019)","DOI":"10.1109\/CVPR.2019.00441"},{"key":"4_CR19","doi-asserted-by":"crossref","unstructured":"Li, B., Yan, J., Wu, W., Zhu, Z., Hu, X.: High performance visual tracking with Siamese region proposal network. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8971\u20138980 (2018)","DOI":"10.1109\/CVPR.2018.00935"},{"key":"4_CR20","unstructured":"Lin, L., Fan, H., Zhang, Z., Xu, Y., Ling, H.: SwinTrack: a simple and strong baseline for transformer tracking. In: Proceedings of Advances in Neural Information Processing Systems (2022)"},{"key":"4_CR21","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., et al.: Microsoft COCO: common objects in context. In: Proceedings of European Conference on Computer Vision, pp. 740\u2013755 (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"4_CR22","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: Proceedings of International Conference on Learning Representations (2019)"},{"key":"4_CR23","doi-asserted-by":"crossref","unstructured":"Ma, N., Zhang, X., Zheng, H.T., Sun, J.: ShuffleNet V2: practical guidelines for efficient CNN architecture design. In: Proceedings of European Conference on Computer Vision, pp. 116\u2013131 (2018)","DOI":"10.1007\/978-3-030-01264-9_8"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"Mueller, M., Smith, N., Ghanem, B.: A benchmark and simulator for UAV tracking. In: Proceedings of European Conference on Computer Vision, pp. 445\u2013461 (2016)","DOI":"10.1007\/978-3-319-46448-0_27"},{"key":"4_CR25","doi-asserted-by":"crossref","unstructured":"Muller, M., Bibi, A., Giancola, S., Alsubaihi, S., Ghanem, B.: TrackingNet: a large-scale dataset and benchmark for object tracking in the wild. In: Proceedings of European Conference on Computer Vision, pp. 300\u2013317 (2018)","DOI":"10.1007\/978-3-030-01246-5_19"},{"key":"4_CR26","doi-asserted-by":"crossref","unstructured":"Rezatofighi, H., Tsoi, N., Gwak, J., Sadeghian, A., Reid, I., Savarese, S.: Generalized intersection over union: A metric and a loss for bounding box regression. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 658\u2013666 (2019)","DOI":"10.1109\/CVPR.2019.00075"},{"key":"4_CR27","doi-asserted-by":"crossref","unstructured":"Xu, Y., Wang, Z., Li, Z., Yuan, Y., Yu, G.: SiamFC++: towards robust and accurate visual tracking with target estimation guidelines. In: Proceedings of AAAI Conference on Artificial Intelligence, pp. 12549\u201312556 (2020)","DOI":"10.1609\/aaai.v34i07.6944"},{"key":"4_CR28","doi-asserted-by":"crossref","unstructured":"Yan, B., Peng, H., Fu, J., Wang, D., Lu, H.: Learning spatio-temporal transformer for visual tracking. In: Proceedings of IEEE\/CVF International Conference on Computer Vision, pp. 10448\u201310457 (2021)","DOI":"10.1109\/ICCV48922.2021.01028"},{"key":"4_CR29","doi-asserted-by":"crossref","unstructured":"Yan, B., Peng, H., Wu, K., Wang, D., Fu, J., Lu, H.: LightTrack: finding lightweight neural networks for object tracking via one-shot architecture search. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15180\u201315189 (2021)","DOI":"10.1109\/CVPR46437.2021.01493"},{"key":"4_CR30","doi-asserted-by":"crossref","unstructured":"Ye, B., Chang, H., Ma, B., Shan, S., Chen, X.: Joint feature learning and relation modeling for tracking: a one-stream framework. In: Proceedings of European Conference on Computer Vision, pp. 341\u2013357 (2022)","DOI":"10.1007\/978-3-031-20047-2_20"},{"key":"4_CR31","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Peng, H., Fu, J., Li, B., Hu, W.: Ocean: object-aware anchor-free tracking. In: Proceedings of European Conference on Computer Vision, pp. 771\u2013787 (2020)","DOI":"10.1007\/978-3-030-58589-1_46"},{"key":"4_CR32","doi-asserted-by":"crossref","unstructured":"Zheng, Y., Zhong, B., Liang, Q., Mo, Z., Zhang, S., Li, X.: ODTrack: online dense temporal token learning for visual tracking. In: Proceedings of AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 7588\u20137596 (2024)","DOI":"10.1609\/aaai.v38i7.28591"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-2061-6_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,30]],"date-time":"2024-12-30T06:02:48Z","timestamp":1735538568000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-2061-6_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,31]]},"ISBN":["9789819620609","9789819620616"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-2061-6_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,31]]},"assertion":[{"value":"31 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Nara","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 January 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 January 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/mmm2025.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}