{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T07:02:24Z","timestamp":1742972544852,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":47,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819786190"},{"type":"electronic","value":"9789819786206"}],"license":[{"start":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T00:00:00Z","timestamp":1729382400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T00:00:00Z","timestamp":1729382400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8620-6_5","type":"book-chapter","created":{"date-parts":[[2024,10,19]],"date-time":"2024-10-19T21:02:10Z","timestamp":1729371730000},"page":"67-83","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Cascade Coarse-to-Fine Point-Query Transformer for RGB-T Crowd Counting"],"prefix":"10.1007","author":[{"given":"Xian","family":"Qu","sequence":"first","affiliation":[]},{"given":"Yingyi","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Xiaoming","family":"Mai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,20]]},"reference":[{"key":"5_CR1","doi-asserted-by":"crossref","unstructured":"Pore, S.D., Momin, B.: Bidirectional people counting system in video surveillance. In: RTEICT, pp. 724\u2013727. IEEE (2016)","DOI":"10.1109\/RTEICT.2016.7807919"},{"key":"5_CR2","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1016\/j.neucom.2015.03.083","volume":"166","author":"Z Zhang","year":"2015","unstructured":"Zhang, Z., Wang, M., Geng, X.: Crowd counting in public video surveillance by label distribution learning. Neurocomputing 166, 151\u2013163 (2015)","journal-title":"Neurocomputing"},{"key":"5_CR3","doi-asserted-by":"crossref","unstructured":"Danielis, P., Kouyoumdjieva, S.T., Karlsson, G.: Urbancount: mobile crowd counting in urban environments. In: IEMCON, pp. 640\u2013648. IEEE (2017)","DOI":"10.1109\/IEMCON.2017.8117189"},{"key":"5_CR4","doi-asserted-by":"crossref","unstructured":"Grgurevi\u0107, I., Jur\u0161i\u0107, K., Raji\u010d, V.: Review of automatic passenger counting systems in public urban transport. In: EAI MMS, pp. 1\u201315. Springer (2022)","DOI":"10.1007\/978-3-030-67241-6_1"},{"key":"5_CR5","unstructured":"Ghodgaonkar, I., Chakraborty, S., Banna, V., Allcroft, S., Metwaly, M., Bordwell, F., Kimura, K., Zhao, X., Goel, A., Tung, C., et\u00a0al.: Analyzing worldwide social distancing through large-scale computer vision. arXiv:2008.12363 (2020)"},{"issue":"11","key":"5_CR6","first-page":"7169","volume":"22","author":"L Liu","year":"2020","unstructured":"Liu, L., Zhen, J., Li, G., Zhan, G., He, Z., Du, B., Lin, L.: Dynamic spatial-temporal representation learning for traffic flow prediction. T-ITS 22(11), 7169\u20137183 (2020)","journal-title":"T-ITS"},{"key":"5_CR7","doi-asserted-by":"crossref","unstructured":"Zhang, S., Wu, G., Costeira, J.P., Moura, J.M.: Understanding traffic density from large-scale web camera data. In: CVPR, pp. 5898\u20135907 (2017)","DOI":"10.1109\/CVPR.2017.454"},{"key":"5_CR8","doi-asserted-by":"crossref","unstructured":"Liang, D., Xu, W., Bai, X.: An end-to-end transformer model for crowd localization. In: ECCV, pp. 38\u201354. Springer (2022)","DOI":"10.1007\/978-3-031-19769-7_3"},{"key":"5_CR9","doi-asserted-by":"crossref","unstructured":"Lin, H., Ma, Z., Ji, R., Wang, Y., Hong, X.: Boosting crowd counting via multifaceted attention. In: CVPR, pp. 19628\u201319637 (2022)","DOI":"10.1109\/CVPR52688.2022.01901"},{"key":"5_CR10","unstructured":"Tian, Y., Chu, X., Wang, H.: Cctrans: simplifying and improving crowd counting with transformer. arXiv:2109.14483 (2021)"},{"key":"5_CR11","unstructured":"Wei, X., Kang, Y., Yang, J., Qiu, Y., Shi, D., Tan, W., Gong, Y.: Scene-adaptive attention network for crowd counting. arXiv:2112.15509 (2021)"},{"key":"5_CR12","doi-asserted-by":"crossref","unstructured":"Xu, M.: An efficient crowd estimation method using convolutional neural network with thermal images. In: ICSIDP, pp.\u00a01\u20136. IEEE (2019)","DOI":"10.1109\/ICSIDP47821.2019.9172971"},{"issue":"1","key":"5_CR13","doi-asserted-by":"publisher","first-page":"62","DOI":"10.3390\/s16010062","volume":"16","author":"MS Kristoffersen","year":"2016","unstructured":"Kristoffersen, M.S., Dueholm, J.V., Gade, R., Moeslund, T.B.: Pedestrian counting with occlusion handling using stereo thermal cameras. Sensors 16(1), 62 (2016)","journal-title":"Sensors"},{"key":"5_CR14","doi-asserted-by":"publisher","first-page":"124835","DOI":"10.1109\/ACCESS.2022.3225233","volume":"10","author":"M Piechocki","year":"2022","unstructured":"Piechocki, M., Kraft, M., Pajchrowski, T., Aszkowski, P., Pieczynski, D.: Efficient people counting in thermal images: the benchmark of resource-constrained hardware. IEEE Access 10, 124835\u2013124847 (2022)","journal-title":"IEEE Access"},{"key":"5_CR15","doi-asserted-by":"crossref","unstructured":"Yang, M., Huang, Z., Hu, P., Li, T., Lv, J., Peng, X.: Learning with twin noisy labels for visible-infrared person re-identification. In: CVPR, pp. 14308\u201314317 (2022)","DOI":"10.1109\/CVPR52688.2022.01391"},{"key":"5_CR16","unstructured":"Yang, M., Huang, Z., Peng, X.: Robust object re-identification with coupled noisy labels. In: IJCV, pp. 1\u201319 (2024)"},{"key":"5_CR17","unstructured":"Yang, M., Li, Y., Zhang, C., Hu, P., Peng, X.: Test-time adaptation against multi-modal reliability bias. In: ICLR (2024)"},{"key":"5_CR18","doi-asserted-by":"crossref","unstructured":"Liu, L., Chen, J., Wu, H., Li, G., Li, C., Lin, L.: Cross-modal collaborative representation learning and a large-scale rgbt benchmark for crowd counting. In: CVPR, pp. 4823\u20134833 (2021)","DOI":"10.1109\/CVPR46437.2021.00479"},{"key":"5_CR19","unstructured":"Chen, P., Gao, J., Yuan, Y., Wang, Q.: Mafnet: a multi-attention fusion network for rgb-t crowd counting. arXiv:2208.06761 (2022)"},{"key":"5_CR20","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Choi, S., Hong, S.: Spatio-channel attention blocks for cross-modal crowd counting. In: ACCV, pp. 90\u2013107 (2022)","DOI":"10.1007\/978-3-031-26284-5_2"},{"key":"5_CR21","doi-asserted-by":"crossref","unstructured":"Tang, H., Wang, Y., Chau, L.P.: Tafnet: A three-stream adaptive fusion network for rgb-t crowd counting. In: ISCAS, pp. 3299\u20133303. IEEE (2022)","DOI":"10.1109\/ISCAS48785.2022.9937583"},{"key":"5_CR22","doi-asserted-by":"crossref","unstructured":"Peng, T., Li, Q., Zhu, P.: Rgb-t crowd counting from drone: a benchmark and mmccn network. In: ACCV (2020)","DOI":"10.1007\/978-3-030-69544-6_30"},{"issue":"12","key":"5_CR23","first-page":"24540","volume":"23","author":"W Zhou","year":"2022","unstructured":"Zhou, W., Pan, Y., Lei, J., Ye, L., Yu, L.: Defnet: Dual-branch enhanced feature fusion network for rgb-t crowd counting. T-ITS 23(12), 24540\u201324549 (2022)","journal-title":"T-ITS"},{"key":"5_CR24","doi-asserted-by":"crossref","unstructured":"Zhang, B., Du, Y., Zhao, Y., Wan, J., Tong, Z.: I-mmccn: improved mmccn for rgb-t crowd counting of drone images. In: IC-NIDC, pp. 117\u2013121. IEEE (2021)","DOI":"10.1109\/IC-NIDC54101.2021.9660586"},{"key":"5_CR25","unstructured":"Liu, Z., Wu, W., Tan, Y., Zhang, G.: Rgb-t multi-modal crowd counting based on transformer. arXiv:2301.03033 (2023)"},{"key":"5_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2022.104592","volume":"129","author":"S Zhang","year":"2023","unstructured":"Zhang, S., Wang, W., Zhao, W., Wang, L., Li, Q.: A cross-modal crowd counting method combining cnn and cross-modal transformer. IVC 129, 104592 (2023)","journal-title":"IVC"},{"key":"5_CR27","doi-asserted-by":"crossref","unstructured":"Wu, Z., Liu, L., Zhang, Y., Mao, M., Lin, L., Li, G.: Multimodal crowd counting with mutual attention transformers. In: ICME, pp.\u00a01\u20136 (2022)","DOI":"10.1109\/ICME52920.2022.9859777"},{"key":"5_CR28","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. NIPS 30 (2017)"},{"key":"5_CR29","doi-asserted-by":"crossref","unstructured":"Liu, C., Lu, H., Cao, Z., Liu, T.: Point-query quadtree for crowd counting, localization, and more. In: ICCV, pp. 1676\u20131685 (2023)","DOI":"10.1109\/ICCV51070.2023.00161"},{"key":"5_CR30","doi-asserted-by":"crossref","unstructured":"Liu, Y., Shi, M., Zhao, Q., Wang, X.: Point in, box out: Beyond counting persons in crowds. In: CVPR, pp. 6469\u20136478 (2019)","DOI":"10.1109\/CVPR.2019.00663"},{"key":"5_CR31","doi-asserted-by":"crossref","unstructured":"Liu, C., Weng, X., Mu, Y.: Recurrent attentive zooming for joint crowd counting and precise localization. In: CVPR, pp. 1217\u20131226 (2019)","DOI":"10.1109\/CVPR.2019.00131"},{"key":"5_CR32","doi-asserted-by":"crossref","unstructured":"Ryan, D., Denman, S., Fookes, C., Sridharan, S.: Crowd counting using multiple local features. In: DICTA, pp. 81\u201388 (2009)","DOI":"10.1109\/DICTA.2009.22"},{"key":"5_CR33","doi-asserted-by":"crossref","unstructured":"Liu, L., Lu, H., Zou, H., Xiong, H., Cao, Z., Shen, C.: Weighing counts: sequential crowd counting by reinforcement learning. In: ECCV, pp. 164\u2013181. Springer (2020)","DOI":"10.1007\/978-3-030-58607-2_10"},{"key":"5_CR34","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: ECCV, pp. 213\u2013229. Springer (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"issue":"1\u20132","key":"5_CR35","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1002\/nav.3800020109","volume":"2","author":"HW Kuhn","year":"1955","unstructured":"Kuhn, H.W.: The hungarian method for the assignment problem. Naval Res. Log. Q. 2(1\u20132), 83\u201397 (1955)","journal-title":"Naval Res. Log. Q."},{"key":"5_CR36","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"5_CR37","doi-asserted-by":"crossref","unstructured":"Guerrero-G\u00f3mez-Olmedo, R., Torre-Jim\u00e9nez, B., L\u00f3pez-Sastre, R., Maldonado-Basc\u00f3n, S., Onoro-Rubio, D.: Extremely overlapping vehicle counting. In: IbPRIA, pp. 423\u2013431. Springer (2015)","DOI":"10.1007\/978-3-319-19390-8_48"},{"key":"5_CR38","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556 (2014)"},{"key":"5_CR39","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv:1412.6980 (2014)"},{"key":"5_CR40","unstructured":"Hu, Y., Liu, Y., Cao, G., Shang, Y.: Glfnet: An rgb-t crowd counting network based on global-local multimodal feature fusion. SSRN7"},{"key":"5_CR41","doi-asserted-by":"crossref","unstructured":"Li, Y., Zhang, X., Chen, D.: Csrnet: Dilated convolutional neural networks for understanding the highly congested scenes. In: CVPR, pp. 1091\u20131100 (2018)","DOI":"10.1109\/CVPR.2018.00120"},{"key":"5_CR42","doi-asserted-by":"crossref","unstructured":"Ma, Z., Wei, X., Hong, X., Gong, Y.: Bayesian loss for crowd count estimation with point supervision. In: ICCV, pp. 6142\u20136151 (2019)","DOI":"10.1109\/ICCV.2019.00624"},{"key":"5_CR43","first-page":"1595","volume":"33","author":"B Wang","year":"2020","unstructured":"Wang, B., Liu, H., Samaras, D., Nguyen, M.H.: Distribution matching for crowd counting. NIPS 33, 1595\u20131607 (2020)","journal-title":"NIPS"},{"key":"5_CR44","doi-asserted-by":"crossref","unstructured":"Song, Q., Wang, C., Jiang, Z., Wang, Y., Tai, Y., Wang, C., Li, J., Huang, F., Wu, Y.: Rethinking counting and localization in crowds: a purely point-based framework. In: ICCV, pp. 3365\u20133374 (2021)","DOI":"10.1109\/ICCV48922.2021.00335"},{"key":"5_CR45","doi-asserted-by":"crossref","unstructured":"Lin, H., Ma, Z., Ji, R., Wang, Y., Hong, X.: Boosting crowd counting via multifaceted attention. In: CVPR, pp. 19628\u201319637 (2022)","DOI":"10.1109\/CVPR52688.2022.01901"},{"key":"5_CR46","doi-asserted-by":"crossref","unstructured":"Zhou, W., Yang, X., Lei, J., Yan, W., Yu, L.: Mc $$3$$ net: multimodality cross-guided compensation coordination network for rgb-t crowd counting. T-ITS (2023)","DOI":"10.1109\/TITS.2023.3321328"},{"key":"5_CR47","doi-asserted-by":"crossref","unstructured":"Guo, Q., Yuan, P., Huang, X., Ye, Y.: Consistency-constrained rgb-t crowd counting via mutual information maximization. Complex & Intell. Syst. 1\u201322 (2024)","DOI":"10.1007\/s40747-024-01427-x"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8620-6_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,14]],"date-time":"2025-01-14T20:17:28Z","timestamp":1736885848000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8620-6_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,20]]},"ISBN":["9789819786190","9789819786206"],"references-count":47,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8620-6_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,10,20]]},"assertion":[{"value":"20 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}