{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,14]],"date-time":"2025-02-14T05:22:53Z","timestamp":1739510573268,"version":"3.37.0"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2025,1,17]],"date-time":"2025-01-17T00:00:00Z","timestamp":1737072000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,17]],"date-time":"2025-01-17T00:00:00Z","timestamp":1737072000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100015501","name":"Qu\u1ef9 \u0110\u1ed5i m\u1edbi s\u00e1ng t\u1ea1o Vingroup","doi-asserted-by":"publisher","award":["VINIF.2022.ThS.JVN.01","VINIF.2019.DA19"],"award-info":[{"award-number":["VINIF.2022.ThS.JVN.01","VINIF.2019.DA19"]}],"id":[{"id":"10.13039\/501100015501","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1007\/s11760-024-03792-z","type":"journal-article","created":{"date-parts":[[2025,1,17]],"date-time":"2025-01-17T07:22:45Z","timestamp":1737098565000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Unified ViT-CNN for few-shot object counting"],"prefix":"10.1007","volume":"19","author":[{"given":"Khanh-An C.","family":"Quan","sequence":"first","affiliation":[]},{"given":"Vinh-Tiep","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Tam V.","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Minh-Triet","family":"Tran","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,17]]},"reference":[{"key":"3792_CR1","doi-asserted-by":"crossref","unstructured":"Lin, H., et al.: Boosting crowd counting via multifaceted attention. In: CVPR, pp.\u00a019628\u201319637 (2022)","DOI":"10.1109\/CVPR52688.2022.01901"},{"key":"3792_CR2","doi-asserted-by":"crossref","unstructured":"Song, Q., et al.: Rethinking counting and localization in crowds: a purely point-based framework. In: ICCV, pp.\u00a03365\u20133374 (2021)","DOI":"10.1109\/ICCV48922.2021.00335"},{"key":"3792_CR3","doi-asserted-by":"crossref","unstructured":"Mundhenk, T.N., et al.: A large contextual dataset for classification, detection and counting of cars with deep learning. In: ECCV 2016, pp.\u00a0785\u2013800 (2016)","DOI":"10.1007\/978-3-319-46487-9_48"},{"key":"3792_CR4","unstructured":"Meng-Ru, H., Yen-Liang, L., Winston H.H.: Drone-based object counting by spatially regularized regional proposal network. In: ICCV, pp.\u00a04145\u20134153 (2017)"},{"key":"3792_CR5","doi-asserted-by":"crossref","unstructured":"Xu, L., et al.: Robust fruit counting: combining deep learning, tracking, and structure from motion. In: IROS, pp.\u00a01045\u20131052. IEEE (2018)","DOI":"10.1109\/IROS.2018.8594239"},{"key":"3792_CR6","doi-asserted-by":"crossref","unstructured":"Bai, H., et al.: CounTr: an end-to-end transformer approach for\u00a0crowd counting and\u00a0density estimation. In: ECCV Workshops, pp.\u00a0207\u2013222 (2022)","DOI":"10.1007\/978-3-031-25075-0_16"},{"key":"3792_CR7","unstructured":"Liu, C., et al.: CounTR: transformer-based generalised visual counting. In: BMCV. BMVA Press, (2022)"},{"key":"3792_CR8","unstructured":"Park, N., Kim, S.: How do vision transformers work? In: ICLR (2022)"},{"key":"3792_CR9","unstructured":"Si, C., et al.: Inception transformer. In: NIPS (2022)"},{"key":"3792_CR10","unstructured":"Chen, Z., et al.: Vision transformer adapter for dense predictions. In: ICLR (2023)"},{"key":"3792_CR11","doi-asserted-by":"crossref","unstructured":"Ranjan, V., et al.: Learning to count everything. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00340"},{"key":"3792_CR12","doi-asserted-by":"crossref","unstructured":"Hsieh, M.-R., Lin, Y.-L., Hsu, W.H.: Drone-based object counting by spatially regularized regional proposal networks. In: ICCV. IEEE (2017)","DOI":"10.1109\/ICCV.2017.446"},{"key":"3792_CR13","doi-asserted-by":"crossref","unstructured":"Barinova, O., Lempitsky, V., Kholi, P.: On detection of multiple object instances using hough transforms. In: IEEE T-PAMI 34.9, pp.\u00a01773\u20131784 (2012)","DOI":"10.1109\/TPAMI.2012.79"},{"key":"3792_CR14","doi-asserted-by":"crossref","unstructured":"Lu, E., Xie, W., Zisserman, A.: Class-agnostic counting. In: ACCV (2018)","DOI":"10.1007\/978-3-030-20893-6_42"},{"key":"3792_CR15","doi-asserted-by":"crossref","unstructured":"Yang, S.-D., et al.: Class-agnostic Few-shot object counting. In: WACV, pp.\u00a0869\u2013877 (2021)","DOI":"10.1109\/WACV48630.2021.00091"},{"key":"3792_CR16","doi-asserted-by":"crossref","unstructured":"Shi, M., et al.: Represent, compare, and learn: a similarity-aware framework for class-agnostic counting. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00931"},{"key":"3792_CR17","doi-asserted-by":"crossref","unstructured":"You, Z., et al.: Few-shot object counting with similarity-aware feature enhancement. In: WACV, pp.\u00a06304\u20136313. IEEE (2023)","DOI":"10.1109\/WACV56688.2023.00625"},{"key":"3792_CR18","unstructured":"Lin, W., et al.: Scale-prior deformable convolution for exemplar-guided class-agnostic counting. In: BMCV. BMVA Press (2022)"},{"key":"3792_CR19","unstructured":"Alexey, D., et al.: An image is worth 16 $$\\times $$ 16 words: transformers for image recognition at scale. In: ICLR (2021)"},{"key":"3792_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110556","volume":"153","author":"B-B Gao","year":"2024","unstructured":"Gao, B.-B., Huang, Z.: CSTrans: correlation-guided self-activation transformer for counting everything. Pattern Recogn. 153, 110556 (2024)","journal-title":"Pattern Recogn."},{"key":"3792_CR21","doi-asserted-by":"crossref","unstructured":"He, K., et al.: Masked autoencoders are scalable vision learners. In: CVPR, pp.\u00a016000\u201316009 (2022)","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"3792_CR22","unstructured":"Zhu, X., et al.: Deformable DETR: deformable transformers for end-to-end object detection. In: ICLR (2021)"},{"key":"3792_CR23","doi-asserted-by":"crossref","unstructured":"He, K., et al.: Deep residual learning for image recognition. In: CVPR. CVPR \u201916, pp.\u00a0770\u2013778. IEEE (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"3792_CR24","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., et al.: Feature pyramid networks for object detection. In: CVPR, pp.\u00a02117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"3792_CR25","doi-asserted-by":"crossref","unstructured":"Xiao, T., et al.: Unified perceptual parsing for scene understanding. In: ECCV. Springer (2018)","DOI":"10.1007\/978-3-030-01228-1_26"},{"key":"3792_CR26","unstructured":"Finn, C., Abbeel, P., Levine, S.: Model-agnostic meta-learning for fast adaptation of deep networks. In: ICML. ICML\u201917, pp.\u00a01126\u20131135. JMLR.org, Sydney, NSW, Australia (2017)"},{"key":"3792_CR27","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: ICLR (2017)"},{"key":"3792_CR28","unstructured":"Ren, S., et al.: Faster R-CNN: towards real-time object detection with region proposal networks. In: NIPS, vol.\u00a028. Curran Associates, Inc. (2015)"},{"key":"3792_CR29","doi-asserted-by":"crossref","unstructured":"Redmon, J., et al.: You only look once: unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp.\u00a0779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"3792_CR30","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., et al.: Focal loss for dense object detection. In: IEEE T-PAMI 42.2 , pp.\u00a0318\u2013327 (2020)","DOI":"10.1109\/TPAMI.2018.2858826"},{"key":"3792_CR31","doi-asserted-by":"crossref","unstructured":"Mundhenk, T.N., et al.: A large contextual dataset for classification, detection and counting of cars with deep learning. In: ECCV, pp.\u00a0785\u2013800 (2016)","DOI":"10.1007\/978-3-319-46487-9_48"},{"key":"3792_CR32","doi-asserted-by":"crossref","unstructured":"Stahl, T., Pintea, S.L., van Gemert, J.C.: Divide and count: generic object counting by image divisions. In: IEEE T-IP 28.2, pp.\u00a01035\u20131044 (2019)","DOI":"10.1109\/TIP.2018.2875353"},{"key":"3792_CR33","doi-asserted-by":"crossref","unstructured":"Goldman, E., et al.: Precise detection in densely packed scenes. In: CVPR, pp.\u00a05222\u20135231 (2019)","DOI":"10.1109\/CVPR.2019.00537"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-024-03792-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-024-03792-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-024-03792-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,13]],"date-time":"2025-02-13T14:46:26Z","timestamp":1739457986000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-024-03792-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,17]]},"references-count":33,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2025,3]]}},"alternative-id":["3792"],"URL":"https:\/\/doi.org\/10.1007\/s11760-024-03792-z","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"type":"print","value":"1863-1703"},{"type":"electronic","value":"1863-1711"}],"subject":[],"published":{"date-parts":[[2025,1,17]]},"assertion":[{"value":"10 July 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 November 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 December 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 January 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}],"article-number":"221"}}