{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T16:46:20Z","timestamp":1771951580036,"version":"3.50.1"},"publisher-location":"Cham","reference-count":46,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031250743","type":"print"},{"value":"9783031250750","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-25075-0_16","type":"book-chapter","created":{"date-parts":[[2023,2,19]],"date-time":"2023-02-19T09:16:53Z","timestamp":1676798213000},"page":"207-222","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["CounTr: An End-to-End Transformer Approach for\u00a0Crowd Counting and\u00a0Density Estimation"],"prefix":"10.1007","author":[{"given":"Haoyue","family":"Bai","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao","family":"He","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhuoxuan","family":"Peng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianyuan","family":"Dai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"S.-H. Gary","family":"Chan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,2,19]]},"reference":[{"key":"16_CR1","unstructured":"Aitken, A., Ledig, C., Theis, L., Caballero, J., Wang, Z., Shi, W.: Checkerboard artifact free sub-pixel convolution: A note on sub-pixel convolution, resize convolution and convolution resize. arXiv:1707.02937 (2017)"},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Bai, H., Mao, J., Chan, S.H.G.: A survey on deep learning-based single image crowd counting: Network design, loss function and supervisory signal. Neurocomputing (2022)","DOI":"10.1016\/j.neucom.2022.08.037"},{"key":"16_CR3","doi-asserted-by":"crossref","unstructured":"Bai, H., Wen, S., Gary Chan, S.H.: Crowd counting on images with scale variation and isolated clusters. In: ICCVW (2019)","DOI":"10.1109\/ICCVW.2019.00009"},{"key":"16_CR4","unstructured":"Bao, H., Dong, L., Wei, F.: Beit: Bert pre-training of image transformers. arXiv:2106.08254 (2021)"},{"key":"16_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"757","DOI":"10.1007\/978-3-030-01228-1_45","volume-title":"Computer Vision \u2013 ECCV 2018","author":"X Cao","year":"2018","unstructured":"Cao, X., Wang, Z., Zhao, Y., Su, F.: Scale aggregation network for accurate and efficient crowd counting. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11209, pp. 757\u2013773. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01228-1_45"},{"key":"16_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"key":"16_CR7","doi-asserted-by":"crossref","unstructured":"Deb, D., Ventura, J.: An aggregated multicolumn dilated convolution network for perspective-free counting. In: CVPR Workshops (2018)","DOI":"10.1109\/CVPRW.2018.00057"},{"key":"16_CR8","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv:2010.11929 (2020)"},{"key":"16_CR9","doi-asserted-by":"crossref","unstructured":"Fang, Y., Zhan, B., Cai, W., Gao, S., Hu, B.: Locality-constrained spatial transformer network for video crowd counting. In: ICME (2019)","DOI":"10.1109\/ICME.2019.00145"},{"key":"16_CR10","doi-asserted-by":"crossref","unstructured":"Gao, J., Wang, Q., Yuan, Y.: Scar: Spatial-\/channel-wise attention regression networks for crowd counting. Neurocomputing (2019)","DOI":"10.1016\/j.neucom.2019.08.018"},{"key":"16_CR11","doi-asserted-by":"crossref","unstructured":"He, S., Luo, H., Wang, P., Wang, F., Li, H., Jiang, W.: Transreid: Transformer-based object re-identification. arXiv:2102.04378 (2021)","DOI":"10.1109\/ICCV48922.2021.01474"},{"key":"16_CR12","doi-asserted-by":"crossref","unstructured":"Idrees, H., Saleemi, I., Seibert, C., Shah, M.: Multi-source multi-scale counting in extremely dense crowd images. In: CVPR (2013)","DOI":"10.1109\/CVPR.2013.329"},{"key":"16_CR13","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: NIPS (2012)"},{"key":"16_CR14","unstructured":"Lempitsky, V., Zisserman, A.: Learning to count objects in images. In: NeurIPS (2010)"},{"key":"16_CR15","doi-asserted-by":"crossref","unstructured":"Li, Y., Zhang, X., Chen, D.: Csrnet: Dilated convolutional neural networks for understanding the highly congested scenes. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00120"},{"key":"16_CR16","doi-asserted-by":"crossref","unstructured":"Liang, J., Cao, J., Sun, G., Zhang, K., Van Gool, L., Timofte, R.: Swinir: Image restoration using swin transformer. arXiv:2108.10257 (2021)","DOI":"10.1109\/ICCVW54120.2021.00210"},{"key":"16_CR17","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"16_CR18","doi-asserted-by":"crossref","unstructured":"Liu, L., et al.: Denet: A universal network for counting crowd with varying densities and scales. In: TMM (2020)","DOI":"10.1109\/TMM.2020.2992979"},{"key":"16_CR19","doi-asserted-by":"crossref","unstructured":"Liu, L., Qiu, Z., Li, G., Liu, S., Ouyang, W., Lin, L.: Crowd counting with deep structured scale integration network. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00186"},{"key":"16_CR20","doi-asserted-by":"crossref","unstructured":"Liu, W., Salzmann, M., Fua, P.: Context-aware crowd counting. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00524"},{"key":"16_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1007\/978-3-030-58586-0_15","volume-title":"Computer Vision \u2013 ECCV 2020","author":"X Liu","year":"2020","unstructured":"Liu, X., Yang, J., Ding, W., Wang, T., Wang, Z., Xiong, J.: Adaptive mixture regression network with local counting map for crowd counting. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12369, pp. 241\u2013257. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58586-0_15"},{"key":"16_CR22","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Crowd counting via cross-stage refinement networks. In: TIP (2020)","DOI":"10.1109\/TIP.2020.2994410"},{"key":"16_CR23","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: Hierarchical vision transformer using shifted windows. arXiv:2103.14030 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"16_CR24","doi-asserted-by":"crossref","unstructured":"Ma, Z., Wei, X., Hong, X., Gong, Y.: Bayesian loss for crowd count estimation with point supervision. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00624"},{"key":"16_CR25","unstructured":"Mao, J., et al.: One million scenes for autonomous driving: Once dataset. arXiv preprint arXiv:2106.11037 (2021)"},{"key":"16_CR26","unstructured":"Mao, J., Shi, S., Wang, X., Li, H.: 3d object detection for autonomous driving: A review and new outlooks. arXiv preprint arXiv:2206.09474 (2022)"},{"key":"16_CR27","doi-asserted-by":"crossref","unstructured":"Mao, J., et al.: Voxel transformer for 3d object detection. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00315"},{"key":"16_CR28","doi-asserted-by":"crossref","unstructured":"Miao, Y., Lin, Z., Ding, G., Han, J.: Shallow feature based dense attention network for crowd counting. In: AAAI (2020)","DOI":"10.1609\/aaai.v34i07.6848"},{"key":"16_CR29","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: Unified, real-time object detection. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"16_CR30","doi-asserted-by":"crossref","unstructured":"Redmon, J., Farhadi, A.: Yolo9000: better, faster, stronger. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.690"},{"key":"16_CR31","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-Net: Convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015. LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28"},{"key":"16_CR32","doi-asserted-by":"crossref","unstructured":"Sam, D.B., Surya, S., Babu, R.V.: Switching convolutional neural network for crowd counting. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.429"},{"key":"16_CR33","doi-asserted-by":"crossref","unstructured":"Sindagi, V.A., Patel, V.M.: Generating high-quality crowd density maps using contextual pyramid cnns. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.206"},{"key":"16_CR34","doi-asserted-by":"crossref","unstructured":"Sindagi, V.A., Patel, V.M.: Multi-level bottom-top and top-bottom feature fusion for crowd counting. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00109"},{"key":"16_CR35","doi-asserted-by":"crossref","unstructured":"Song, Q., et al.: To choose or to fuse? scale selection for crowd counting. In: AAAI (2021)","DOI":"10.1609\/aaai.v35i3.16360"},{"key":"16_CR36","unstructured":"Sun, G., Liu, Y., Probst, T., Paudel, D.P., Popovic, N., Van Gool, L.: Boosting crowd counting with transformers. arXiv:2105.10926 (2021)"},{"key":"16_CR37","doi-asserted-by":"crossref","unstructured":"Tian, Y., Lei, Y., Zhang, J., Wang, J.Z.: Padnet: Pan-density crowd counting. In: TIP (2019)","DOI":"10.1109\/TIP.2019.2952083"},{"key":"16_CR38","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS (2017)"},{"key":"16_CR39","unstructured":"Wang, B., Liu, H., Samaras, D., Hoai, M.: Distribution matching for crowd counting. arXiv:2009.13077 (2020)"},{"key":"16_CR40","doi-asserted-by":"crossref","unstructured":"Wang, Y., et al.: End-to-end video instance segmentation with transformers. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00863"},{"key":"16_CR41","unstructured":"Xie, E., Wang, W., Yu, Z., Anandkumar, A., Alvarez, J.M., Luo, P.: Segformer: Simple and efficient design for semantic segmentation with transformers. arXiv:2105.15203 (2021)"},{"key":"16_CR42","doi-asserted-by":"crossref","unstructured":"Xiong, F., Shi, X., Yeung, D.Y.: Spatiotemporal modeling for crowd counting in videos. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.551"},{"key":"16_CR43","doi-asserted-by":"crossref","unstructured":"Yan, Z., Zhang, R., Zhang, H., Zhang, Q., Zuo, W.: Crowd counting via perspective-guided fractional-dilation convolution. In: TMM (2021)","DOI":"10.1109\/TMM.2021.3086709"},{"key":"16_CR44","doi-asserted-by":"crossref","unstructured":"Zhang, A., et al.: Relational attention network for crowd counting. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00689"},{"key":"16_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, L., Shi, M., Chen, Q.: Crowd counting via scale-adaptive convolutional neural network. In: WACV (2018)","DOI":"10.1109\/WACV.2018.00127"},{"key":"16_CR46","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Zhou, D., Chen, S., Gao, S., Ma, Y.: Single-image crowd counting via multi-column convolutional neural network. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.70"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-25075-0_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,19]],"date-time":"2023-02-19T09:28:11Z","timestamp":1676798891000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-25075-0_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031250743","9783031250750"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-25075-0_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"19 February 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"From the workshops, 367 reviewed full papers have been selected for publication","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}