{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T09:24:17Z","timestamp":1758273857728,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,22]],"date-time":"2024-03-22T00:00:00Z","timestamp":1711065600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,22]]},"DOI":"10.1145\/3654823.3654857","type":"proceedings-article","created":{"date-parts":[[2024,5,29]],"date-time":"2024-05-29T16:20:33Z","timestamp":1716999633000},"page":"184-190","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Object Detection and Instance Segmentation in Construction Sites"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-5837-6713","authenticated-orcid":false,"given":"Cong","family":"Zhang","sequence":"first","affiliation":[{"name":"University of Electronic Science and Technology of China, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3409-4469","authenticated-orcid":false,"given":"Jie","family":"Shen","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, China"}]}],"member":"320","published-online":{"date-parts":[[2024,5,29]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2020.103118"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00644"},{"key":"e_1_3_2_1_3_1","volume-title":"Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs","author":"Chen Liang-Chieh","year":"2017","unstructured":"Liang-Chieh Chen, George Papandreou, Iasonas Kokkinos, Kevin Murphy, and Alan\u00a0L Yuille. 2017. Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE transactions on pattern analysis and machine intelligence 40, 4 (2017), 834\u2013848."},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings, Part XIV 16","author":"Cheng Tianheng","year":"2020","unstructured":"Tianheng Cheng, Xinggang Wang, Lichao Huang, and Wenyu Liu. 2020. Boundary-preserving mask r-cnn. In Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XIV 16. Springer, 660\u2013676."},{"key":"e_1_3_2_1_5_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2022.104499"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2017.09.018"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aei.2018.01.001"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00326"},{"key":"e_1_3_2_1_10_1","volume-title":"Is attention better than matrix decomposition?arXiv preprint arXiv:2109.04553","author":"Geng Zhengyang","year":"2021","unstructured":"Zhengyang Geng, Meng-Hao Guo, Hongxu Chen, Xia Li, Ke Wei, and Zhouchen Lin. 2021. Is attention better than matrix decomposition?arXiv preprint arXiv:2109.04553 (2021)."},{"key":"e_1_3_2_1_11_1","volume-title":"Segnext: Rethinking convolutional attention design for semantic segmentation. arXiv preprint arXiv:2209.08575","author":"Guo Meng-Hao","year":"2022","unstructured":"Meng-Hao Guo, Cheng-Ze Lu, Qibin Hou, Zhengning Liu, Ming-Ming Cheng, and Shi-Min Hu. 2022. Segnext: Rethinking convolutional attention design for semantic segmentation. arXiv preprint arXiv:2209.08575 (2022)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2389824"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2021.103785"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2018.12.014"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1061\/(ASCE)CP.1943-5487.0000731"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00982"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1061\/(ASCE)CP.1943-5487.0000756"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.3390\/s22093307"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1061\/(ASCE)CO.1943-7862.0000974"},{"key":"e_1_3_2_1_22_1","volume-title":"Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems 28","author":"Ren Shaoqing","year":"2015","unstructured":"Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_2_1_23_1","volume-title":"Computing in Civil Engineering","author":"Roberts Dominic","year":"2017","unstructured":"Dominic Roberts, Timothy Bretl, and Mani Golparvar-Fard. 2017. Detecting and classifying cranes using camera-equipped UAVs for monitoring crane-related safety hazards. In Computing in Civil Engineering 2017. 442\u2013449."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1111\/mice.12454"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1111\/mice.12749"},{"key":"e_1_3_2_1_26_1","volume-title":"Proc., Construction Research Congress. ASCE Grand Bahama Island, Bahamas.","author":"Su YY","year":"2007","unstructured":"YY Su and LY Liu. 2007. Real-time tracking and analysis of construction operations. In Proc., Construction Research Congress. ASCE Grand Bahama Island, Bahamas."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01371"},{"key":"e_1_3_2_1_28_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_29_1","volume-title":"Deep high-resolution representation learning for visual recognition","author":"Wang Jingdong","year":"2020","unstructured":"Jingdong Wang, Ke Sun, Tianheng Cheng, Borui Jiang, Chaorui Deng, Yang Zhao, Dong Liu, Yadong Mu, Mingkui Tan, Xinggang Wang, 2020. Deep high-resolution representation learning for visual recognition. IEEE transactions on pattern analysis and machine intelligence 43, 10 (2020), 3349\u20133364."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2019.102920"},{"key":"e_1_3_2_1_31_1","first-page":"12077","article-title":"SegFormer: Simple and efficient design for semantic segmentation with transformers","volume":"34","author":"Xie Enze","year":"2021","unstructured":"Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose\u00a0M Alvarez, and Ping Luo. 2021. SegFormer: Simple and efficient design for semantic segmentation with transformers. Advances in Neural Information Processing Systems 34 (2021), 12077\u201312090.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2020.103482"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2023.105083"},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings, Part VI 16","author":"Yuan Yuhui","year":"2020","unstructured":"Yuhui Yuan, Xilin Chen, and Jingdong Wang. 2020. Object-contextual representations for semantic segmentation. In Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part VI 16. Springer, 173\u2013190."},{"key":"e_1_3_2_1_35_1","volume-title":"Object detection in 20 years: A survey. Proc","author":"Zou Zhengxia","year":"2023","unstructured":"Zhengxia Zou, Keyan Chen, Zhenwei Shi, Yuhong Guo, and Jieping Ye. 2023. Object detection in 20 years: A survey. Proc. IEEE (2023)."}],"event":{"name":"CACML 2024: 2024 3rd Asia Conference on Algorithms, Computing and Machine Learning","acronym":"CACML 2024","location":"Shanghai China"},"container-title":["Proceedings of the 2024 3rd Asia Conference on Algorithms, Computing and Machine Learning"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3654823.3654857","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3654823.3654857","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T15:15:45Z","timestamp":1755875745000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3654823.3654857"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,22]]},"references-count":35,"alternative-id":["10.1145\/3654823.3654857","10.1145\/3654823"],"URL":"https:\/\/doi.org\/10.1145\/3654823.3654857","relation":{},"subject":[],"published":{"date-parts":[[2024,3,22]]},"assertion":[{"value":"2024-05-29","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}