{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T02:04:17Z","timestamp":1779933857786,"version":"3.53.1"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Swiss National Science Foundation via Sinergia","award":["CRSII5- 180359"],"award-info":[{"award-number":["CRSII5- 180359"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62376209, 62376070, 62076195"],"award-info":[{"award-number":["62376209, 62376070, 62076195"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Xizang Autonomous Region Central Guided Local Science and Technology Development Fund Project","award":["XZ202401YD0002"],"award-info":[{"award-number":["XZ202401YD0002"]}]},{"name":"Natural Science Basic Research Plan in Shaanxi Province of China","award":["2022JQ-631"],"award-info":[{"award-number":["2022JQ-631"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3680976","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:41Z","timestamp":1729925981000},"page":"8681-8690","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Boosting Semi-supervised Crowd Counting with Scale-based Active Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2870-3974","authenticated-orcid":false,"given":"Shiwei","family":"Zhang","sequence":"first","affiliation":[{"name":"School of Software Engineering, Xi'an Jiaotong University, Xi'an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2899-0371","authenticated-orcid":false,"given":"Wei","family":"Ke","sequence":"additional","affiliation":[{"name":"School of Software Engineering, Xi'an Jiaotong University, Xi'an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0327-6729","authenticated-orcid":false,"given":"Shuai","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Software Engineering, Xi'an Jiaotong University, Xi'an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0611-0636","authenticated-orcid":false,"given":"Xiaopeng","family":"Hong","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Harbin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5818-4285","authenticated-orcid":false,"given":"Tong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Swiss Federal Institute of Technology Lausanne, Lausanne, Switzerland"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Completely Self-supervised Crowd Counting via Distribution Matching","author":"Sam Deepak Babu","unstructured":"Deepak Babu Sam, Abhinav Agarwalla, Jimmy Joseph, Vishwanath A Sindagi, R Venkatesh Babu, and Vishal M Patel. 2022. Completely Self-supervised Crowd Counting via Distribution Matching. In ECCV. Springer."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00976"},{"key":"e_1_3_2_1_3_1","volume-title":"Scale Aggregation Network for Accurate and Efficient Crowd Counting. In European Conference on Computer Vision. 757--773","author":"Cao Xinkun","year":"2018","unstructured":"Xinkun Cao, Zhipeng Wang, Yanyun Zhao, and Fei Su. 2018. Scale Aggregation Network for Accurate and Efficient Crowd Counting. In European Conference on Computer Vision. 757--773."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3055631"},{"key":"e_1_3_2_1_5_1","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 10264--10273","author":"Choi Jiwoong","unstructured":"Jiwoong Choi, Ismail Elezi, Hyuk-Jae Lee, Clement Farabet, and Jose M. Alvarez. 2021. Active Learning for Deep Object Detection via Probabilistic Modeling. In Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 10264--10273."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-377-6.50027-X"},{"key":"e_1_3_2_1_7_1","volume-title":"international conference on machine learning. PMLR, 1050--1059","author":"Gal Yarin","year":"2016","unstructured":"Yarin Gal and Zoubin Ghahramani. 2016. Dropout as a bayesian approximation: Representing model uncertainty in deep learning. In international conference on machine learning. PMLR, 1050--1059."},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings, Part X 16","author":"Gao Mingfei","year":"2020","unstructured":"Mingfei Gao, Zizhao Zhang, Guo Yu, Sercan \u00d6 Arik, Larry S Davis, and Tomas Pfister. 2020. Consistency-based semi-supervised active learning: Towards minimizing labeling cost. In Computer Vision--ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part X 16. Springer, 510--526."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2740160"},{"key":"e_1_3_2_1_10_1","volume-title":"Density Map Estimation and Localization in Dense Crowds. In European Conference on Computer Vision. 544--559","author":"Idrees Haroon","year":"2018","unstructured":"Haroon Idrees, Muhmmad Tayyab, Kishan Athrey, Dong Zhang, Somaya Al-M\u00e1adeed, Nasir M. Rajpoot, and Mubarak Shah. 2018. Composition Loss for Counting, Density Map Estimation and Localization in Dense Crowds. In European Conference on Computer Vision. 544--559."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00629"},{"key":"e_1_3_2_1_12_1","volume-title":"Attention Scaling for Crowd Counting. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 4705--4714","author":"Jiang Xiaoheng","year":"2020","unstructured":"Xiaoheng Jiang, Li Zhang, Mingliang Xu, Tianzhu Zhang, Pei Lv, Bing Zhou, Xin Yang, and Yanwei Pang. 2020. Attention Scaling for Crowd Counting. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 4705--4714."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"e_1_3_2_1_14_1","volume-title":"Lempitsky and Andrew Zisserman","author":"Victor","year":"2010","unstructured":"Victor S. Lempitsky and Andrew Zisserman. 2010. Learning To Count Objects in Images. In Advances in Neural Information Processing Systems. 1324--1332."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01534"},{"key":"e_1_3_2_1_16_1","volume-title":"CSRNet: Dilated Convolutional Neural Networks for Understanding the Highly Congested Scenes. In IEEE Conference on Computer Vision and Pattern Recognition. 1091--1100","author":"Li Yuhong","year":"2018","unstructured":"Yuhong Li, Xiaofan Zhang, and Deming Chen. 2018. CSRNet: Dilated Convolutional Neural Networks for Understanding the Highly Congested Scenes. In IEEE Conference on Computer Vision and Pattern Recognition. 1091--1100."},{"key":"e_1_3_2_1_17_1","first-page":"160104","article-title":"Transcrowd: weakly-supervised crowd counting with transformers","volume":"65","author":"Liang D.","year":"2022","unstructured":"D. Liang, X. Chen, W. Xu, Y. Zhou, and X. Bai. 2022. Transcrowd: weakly-supervised crowd counting with transformers. SCIS, Vol. 65, 6 (2022), 160104.","journal-title":"SCIS"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00283"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"D. Liang W. Xu and X. Bai. 2022. An end-to-end transformer model for crowd localization. In ECCV. 38--54.","DOI":"10.1007\/978-3-031-19769-7_3"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Hui Lin Zhiheng Ma Xiaopeng Hong Yaowei Wang and Zhou Su. 2022. Semi-supervised Crowd Counting via Density Agency. In ACM MM. 1416--1426.","DOI":"10.1145\/3503161.3547867"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01901"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02075"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00161"},{"key":"e_1_3_2_1_24_1","volume-title":"IEEE Conference on Computer Vision and Pattern Recognition. 7661--7669","author":"Liu Xialei","unstructured":"Xialei Liu, Joost van de Weijer, and Andrew D. Bagdanov. 2018. Leveraging Unlabeled Data for Crowd Counting by Learning to Rank. In IEEE Conference on Computer Vision and Pattern Recognition. 7661--7669."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58555-6_15"},{"key":"e_1_3_2_1_26_1","volume-title":"Reducing spatial labeling redundancy for active semi-supervised crowd counting","author":"Liu Yongtuo","year":"2022","unstructured":"Yongtuo Liu, Sucheng Ren, Liangyu Chai, Hanjie Wu, Dan Xu, Jing Qin, and Shengfeng He. 2022. Reducing spatial labeling redundancy for active semi-supervised crowd counting. IEEE Transactions on Pattern Analysis and Machine Intelligence (2022)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00319"},{"key":"e_1_3_2_1_28_1","volume-title":"Bayesian Loss for Crowd Count Estimation With Point Supervision. In IEEE\/CVF International Conference on Computer Vision. 6141--6150","author":"Ma Zhiheng","year":"2019","unstructured":"Zhiheng Ma, Xing Wei, Xiaopeng Hong, and Yihong Gong. 2019. Bayesian Loss for Crowd Count Estimation With Point Supervision. In IEEE\/CVF International Conference on Computer Vision. 6141--6150."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"Yanda Meng Hongrun Zhang Yitian Zhao Xiaoyun Yang Xuesheng Qian Xiaowei Huang and Yalin Zheng. 2021. Spatial Uncertainty-Aware Semi-Supervised Crowd Counting. In CVPR. 15529--15539.","DOI":"10.1109\/ICCV48922.2021.01526"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3009030"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3207584"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015349"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"L. Niu X. Wang C. Duan Q. Shen and W. Liu. 2022. Local Point Matching Network for Stabilized Crowd Counting and Localization. In PRCV.","DOI":"10.1007\/978-3-031-18907-4_44"},{"key":"e_1_3_2_1_34_1","volume-title":"Switching Convolutional Neural Network for Crowd Counting. In IEEE Conference on Computer Vision and Pattern Recognition. 4031--4039","author":"Sam Deepak Babu","unstructured":"Deepak Babu Sam, Shiv Surya, and R. Venkatesh Babu. 2017. Switching Convolutional Neural Network for Crowd Counting. In IEEE Conference on Computer Vision and Pattern Recognition. 4031--4039."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2023.3330412"},{"key":"e_1_3_2_1_36_1","volume-title":"Active learning for convolutional neural networks: A core-set approach. arXiv preprint arXiv:1708.00489","author":"Sener Ozan","year":"2017","unstructured":"Ozan Sener and Silvio Savarese. 2017. Active learning for convolutional neural networks: A core-set approach. arXiv preprint arXiv:1708.00489 (2017)."},{"key":"e_1_3_2_1_37_1","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition. In International Conference on Learning Representations, Yoshua Bengio and Yann LeCun (Eds.).","author":"Simonyan Karen","year":"2015","unstructured":"Karen Simonyan and Andrew Zisserman. 2015. Very Deep Convolutional Networks for Large-Scale Image Recognition. In International Conference on Learning Representations, Yoshua Bengio and Yann LeCun (Eds.)."},{"key":"e_1_3_2_1_38_1","first-page":"2594","article-title":"JHU-CROWD: Large-Scale Crowd Counting Dataset and A Benchmark Method","volume":"44","author":"Sindagi Vishwanath A.","year":"2022","unstructured":"Vishwanath A. Sindagi, Rajeev Yasarla, and Vishal M. Patel. 2022. JHU-CROWD: Large-Scale Crowd Counting Dataset and A Benchmark Method. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 44, 5 (2022), 2594--2609.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_39_1","volume-title":"European Conference Computer Vision. 212--229","author":"Sindagi Vishwanath A.","unstructured":"Vishwanath A. Sindagi, Rajeev Yasarla, Deepak Babu Sam, R. Venkatesh Babu, and Vishal M. Patel. 2020. Learning to Count in the Crowd from Limited Labeled Data. In European Conference Computer Vision. 212--229."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00607"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00335"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2876304"},{"key":"e_1_3_2_1_43_1","unstructured":"Antti Tarvainen and Harri Valpola. 2017. Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results. In Advances in Neural Information Processing Systems. 1195--1204."},{"key":"e_1_3_2_1_44_1","volume-title":"C-MIL: Continuation Multiple Instance Learning for Weakly Supervised Object Detection. In IEEE Conference on Computer Vision and Pattern Recognition. 2199--2208","author":"Wan Fang","year":"2019","unstructured":"Fang Wan, Chang Liu, Wei Ke, Xiangyang Ji, Jianbin Jiao, and Qixiang Ye. 2019. C-MIL: Continuation Multiple Instance Learning for Weakly Supervised Object Detection. In IEEE Conference on Computer Vision and Pattern Recognition. 2199--2208."},{"key":"e_1_3_2_1_45_1","unstructured":"Boyu Wang Huidong Liu Dimitris Samaras and Minh Hoai Nguyen. 2020. Distribution Matching for Crowd Counting. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00322"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3013269"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3313490"},{"key":"e_1_3_2_1_49_1","volume-title":"Glance to Count: Learning to Rank with Anchors for Weakly-supervised Crowd Counting. arXiv:2205.14659","author":"Xiong Zheng","year":"2022","unstructured":"Zheng Xiong, Liangyu Chai, Wenxi Liu, Yongtuo Liu, Sucheng Ren, and Shengfeng He. 2022. Glance to Count: Learning to Rank with Anchors for Weakly-supervised Crowd Counting. arXiv:2205.14659 (2022)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Yanyu Xu Ziming Zhong Dongze Lian Jing Li Zhengxin Li Xinxing Xu and Shenghua Gao. 2021. Crowd Counting With Partial Annotations in an Image. In ICCV. 15550--15559.","DOI":"10.1109\/ICCV48922.2021.01528"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58598-3_1"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00529"},{"key":"e_1_3_2_1_53_1","volume-title":"Relational Attention Network for Crowd Counting. In IEEE\/CVF International Conference on Computer Vision. 6787--6796","author":"Zhang Anran","year":"2019","unstructured":"Anran Zhang, Jiayi Shen, Zehao Xiao, Fan Zhu, Xiantong Zhen, Xianbin Cao, and Ling Shao. 2019. Relational Attention Network for Crowd Counting. In IEEE\/CVF International Conference on Computer Vision. 6787--6796."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.70"},{"key":"e_1_3_2_1_55_1","volume-title":"Active Crowd Counting with Limited Supervision. In European Conference on Computer Vision. 565--581","author":"Zhao Zhen","year":"2020","unstructured":"Zhen Zhao, Miaojing Shi, Xiaoxiao Zhao, and Li Li. 2020. Active Crowd Counting with Limited Supervision. In European Conference on Computer Vision. 565--581."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680976","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3680976","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:35Z","timestamp":1750295855000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680976"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":55,"alternative-id":["10.1145\/3664647.3680976","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3680976","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}