{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T22:47:39Z","timestamp":1772923659186,"version":"3.50.1"},"reference-count":57,"publisher":"American Society of Civil Engineers (ASCE)","issue":"3","content-domain":{"domain":["ascelibrary.org"],"crossmark-restriction":true},"short-container-title":["J. Comput. Civ. Eng."],"published-print":{"date-parts":[[2026,5,1]]},"DOI":"10.1061\/jccee5.cpeng-7157","type":"journal-article","created":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T03:51:46Z","timestamp":1771991506000},"update-policy":"https:\/\/doi.org\/10.1061\/do.news.20190416.0001","source":"Crossref","is-referenced-by-count":0,"title":["Graph-Based Human\u2013Object Interaction Detection for Automated Worker Accountability Monitoring on Construction Sites"],"prefix":"10.1061","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8694-099X","authenticated-orcid":true,"given":"Mik Wanul","family":"Khosiin","sequence":"first","affiliation":[{"name":"National Taiwan Univ.","place":["Taiwan"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3781-9402","authenticated-orcid":true,"given":"Jacob J.","family":"Lin","sequence":"additional","affiliation":[{"name":"National Taiwan Univ.","place":["Taiwan"]}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6668-4907","authenticated-orcid":true,"given":"Eko Andi","family":"Suryo","sequence":"additional","affiliation":[{"name":"Univ. of Brawijaya","place":["Indonesia"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8425-0776","authenticated-orcid":true,"given":"Kartika Puspa","family":"Negara","sequence":"additional","affiliation":[{"name":"Univ. of Brawijaya","place":["Indonesia"]}]},{"given":"Ismiarta","family":"Aknuranda","sequence":"additional","affiliation":[{"name":"Univ. of Brawijaya","place":["Indonesia"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5281-2535","authenticated-orcid":true,"given":"Chuin-Shan","family":"Chen","sequence":"additional","affiliation":[{"name":"National Taiwan Univ.","place":["Taiwan"]}]}],"member":"30","reference":[{"key":"e_1_3_4_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2024.105684"},{"key":"e_1_3_4_3_1","first-page":"252","volume-title":"The management of quality in construction","author":"Ashford J.","year":"1989","unstructured":"Ashford, J. 1989. The management of quality in construction. 1st ed., 252. London: Routledge.","edition":"1"},{"key":"e_1_3_4_4_1","doi-asserted-by":"crossref","unstructured":"Carion N. F. Massa G. Synnaeve N. Usunier A. Kirillov and S. Zagoruyko. 2020. \u201cEnd-to-end object detection with transformers.\u201d Preprint submitted May 26 2020. https:\/\/arxiv.org\/abs\/2005.12872.","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_4_5_1","unstructured":"Chae Y. H. Lee C. R. Ahn M. Jung and M. Park. 2022. \u201cVision-based activity recognition monitoring based on human-object interaction at construction sites.\u201d In Vol. 2022.06a of Proc. Int. Conf. on Construction Engineering and Project Management 877\u2013885. Seoul: Korea Institute of Construction Engineering and Management."},{"key":"e_1_3_4_6_1","unstructured":"Chao Y. Y. Liu X. Liu H. Zeng and J. Deng. 2017. \u201cLearning to detect human-object interactions.\u201d Preprint submitted February 17 2017. https:\/\/arxiv.org\/abs\/1702.05448."},{"key":"e_1_3_4_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2023.104809"},{"key":"e_1_3_4_8_1","doi-asserted-by":"crossref","unstructured":"Chiou M.-J. C.-Y. Liao L.-W. Wang R. Zimmermann and J. Feng. 2021. \u201cST-HOI: A spatial-temporal baseline for human-object interaction detection in videos.\u201d In Proc. 2021 Workshop on Intelligent Cross-Data Analysis and Retrieval 9\u201317. Ottawa: Univ. of Ottawa.","DOI":"10.1145\/3463944.3469097"},{"key":"e_1_3_4_9_1","unstructured":"CII (Construction Industry Institute). 2001. Engineering productivity measurement. RR156-11:286. Austin TX: CII."},{"key":"e_1_3_4_10_1","doi-asserted-by":"crossref","unstructured":"Garcia-Garcia S. and R. Pinto-El\u00edas. 2022. \u201cHuman activity recognition implenting the yolo models.\u201d In Proc. 2022 Int. Conf. on Mechatronics Electronics and Automotive Engineering (ICMEAE) 127\u2013132. New York: IEEE.","DOI":"10.1109\/ICMEAE58636.2022.00029"},{"key":"e_1_3_4_11_1","doi-asserted-by":"crossref","unstructured":"Girshick R. 2015. \u201cFast R-CNN.\u201d In Proc. 2015 IEEE Int. Conf. on Computer Vision (ICCV) 1440\u20131448. New York: IEEE.","DOI":"10.1109\/ICCV.2015.169"},{"key":"e_1_3_4_12_1","unstructured":"Gupta S. and J. Malik. 2015. \u201cVisual semantic role labeling.\u201d Preprint submitted May 17 2015. https:\/\/arxiv.org\/abs\/1505.04474."},{"key":"e_1_3_4_13_1","volume-title":"Advances in neural information processing systems","author":"Hamilton W.","year":"2017","unstructured":"Hamilton, W., Z. Ying, and J. Leskovec. 2017. \u201cInductive representation learning on large graphs.\u201d In Vol. 30 of Advances in neural information processing systems, edited by I. Guyon, U. V. Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, and R. Garnett. Red Hook, NY: Curran Associates."},{"key":"e_1_3_4_14_1","doi-asserted-by":"crossref","unstructured":"He K. G. Gkioxari P. Doll\u00e1r and R. Girshick. 2017. \u201cMask R-CNN.\u201d In Proc. 2017 IEEE Int. Conf. on Computer Vision (ICCV) 2980\u20132988. New York: IEEE.","DOI":"10.1109\/ICCV.2017.322"},{"key":"e_1_3_4_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2023.104932"},{"key":"e_1_3_4_16_1","volume-title":"Ultralytics\/yolov5: V3.1-bug fixes and performance improvements","author":"Jocher G.","year":"2020","unstructured":"Jocher, G., et al. 2020. Ultralytics\/yolov5: V3.1-bug fixes and performance improvements. Geneva: Zenodo. https:\/\/doi.org\/10.5281\/zenodo.4154370."},{"key":"e_1_3_4_17_1","unstructured":"Jocher G. A. Chaurasia and J. Qiu. 2023. \u201cUltralytics yolov8.\u201d Accessed September 25 2025. https:\/\/github.com\/ultralytics\/ultralytics."},{"key":"e_1_3_4_18_1","unstructured":"Khosiin M. W. J. J. Lin and C.-S. Chen. 2024. \u201cWorker accountability in computer vision for construction productivity measurement: A systematic review.\u201d In Proc. Int. Conf. on Construction Engineering and Project Management 775\u2013782. Seoul: Korea Institute of Construction Engineering and Management."},{"key":"e_1_3_4_19_1","doi-asserted-by":"crossref","unstructured":"Khosiin M. W. J. J. Lin E. A. Suryo K. P. Negara I. Aknuranda and C.-S. Chen. 2025. \u201cVideo-based productivity monitoring of worker and large-scale object interactions in construction sites.\u201d In Proc. 42nd Int. Symp. on Automation and Robotics in Construction 580\u2013587. Cambridge UK: International Association for Automation and Robotics in Construction.","DOI":"10.22260\/ISARC2025\/0076"},{"key":"e_1_3_4_20_1","doi-asserted-by":"crossref","unstructured":"Khosiin M. W. and A. Umam. 2023. \u201cImplementing a relational database in processing construction project documents.\u201d In Proc. 5th Int. Conf. on Rehabilitation and Maintenance in Civil Engineering 891\u2013900. Singapore: Springer.","DOI":"10.1007\/978-981-16-9348-9_79"},{"key":"e_1_3_4_21_1","unstructured":"K\u00f6p\u00fckl\u00fc O. X. Wei and G. Rigoll. 2019. \u201cYou only watch once: A unified CNN architecture for real-time spatiotemporal action localization.\u201d Preprint submitted November 15 2019. https:\/\/arxiv.org\/abs\/1911.06644."},{"key":"e_1_3_4_22_1","doi-asserted-by":"crossref","unstructured":"Li H. G. Zhu W. Zhen L. Ni P. Shen L. Zhang N. Wang and C. Hua. 2022. \u201cSpatial parsing and dynamic temporal pooling networks for human-object interaction detection.\u201d In Proc. 2022 Int. Joint Conf. on Neural Networks (IJCNN) 1\u20138. New York: IEEE.","DOI":"10.1109\/IJCNN55064.2022.9892087"},{"key":"e_1_3_4_23_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jobe.2021.103352"},{"key":"e_1_3_4_24_1","unstructured":"Liang Z. Y. Guan and J. Rojas. 2020a. \u201cVisual-semantic graph attention network for human-object interaction detection.\u201d Preprint submitted January 7 2020. https:\/\/arxiv.org\/abs\/2001.02302."},{"key":"e_1_3_4_25_1","unstructured":"Liang Z. J. Liu Y. Guan and J. Rojas. 2020b. \u201cPose-based modular network for human-object interaction detection.\u201d Preprint submitted August 5 2020. https:\/\/arxiv.org\/abs\/2008.02042."},{"key":"e_1_3_4_26_1","doi-asserted-by":"crossref","unstructured":"Liang Z. J. Liu Y. Guan and J. Rojas. 2021. \u201cVisual-semantic graph attention networks for human-object interaction detection.\u201d In Proc. 2021 IEEE Int. Conf. on Robotics and Biomimetics (ROBIO) 1441\u20131447. New York: IEEE.","DOI":"10.1109\/ROBIO54168.2021.9739429"},{"key":"e_1_3_4_27_1","doi-asserted-by":"crossref","unstructured":"Lin T.-Y. M. Maire S. Belongie J. Hays P. Perona D. Ramanan P. Doll\u00e1r and C. L. Zitnick. 2014. \u201cMicrosoft COCO: Common objects in context.\u201d In Proc. Computer Vision\u2014ECCV 2014 740\u2013755. Cham Switzerland: Springer.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_4_28_1","unstructured":"Mikolov T. I. Sutskever K. Chen G. Corrado and J. Dean. 2013. \u201cDistributed representations of words and phrases and their compositionality.\u201d In Proc. Advances in Neural Information Processing Systems (NeurIPS) 3111\u20133119. Red Hook NY: Curran Associates."},{"key":"e_1_3_4_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/219717.219748"},{"key":"e_1_3_4_30_1","volume-title":"Delivering on construction productivity is no longer optional","author":"Mischke J.","year":"2024","unstructured":"Mischke, J., and K. Stokvis, K. Vermeltfoort, and B. Biemans. 2024. Delivering on construction productivity is no longer optional. New York: McKinsey & Company."},{"key":"e_1_3_4_31_1","doi-asserted-by":"crossref","unstructured":"Mutegeki R. and D. S. Han. 2020. \u201cA CNN-LSTM approach to human activity recognition.\u201d In Proc. 2020 Int. Conf. on Artificial Intelligence in Information and Communication (ICAIIC) 362\u2013366. New York: IEEE.","DOI":"10.1109\/ICAIIC48513.2020.9065078"},{"key":"e_1_3_4_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2023.105157"},{"key":"e_1_3_4_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compind.2022.103610"},{"key":"e_1_3_4_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2012.09.012"},{"key":"e_1_3_4_35_1","doi-asserted-by":"crossref","unstructured":"Park J. J.-W. Park and J.-S. Lee. 2023. \u201cViPLO: Vision transformer based pose-conditioned self-loop graph for human-object interaction detection.\u201d In Proc. 2023 IEEE\/CVF Conf. on Computer Vision and Pattern Recognition (CVPR) 17152\u201317162. New York: IEEE.","DOI":"10.1109\/CVPR52729.2023.01645"},{"key":"e_1_3_4_36_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2024.105451"},{"key":"e_1_3_4_37_1","doi-asserted-by":"publisher","DOI":"10.1061\/JCEMD4.COENG-14645"},{"key":"e_1_3_4_38_1","unstructured":"Ren S. K. He R. B. Girshick and J. Sun. 2015. \u201cFaster R-CNN: Towards real-time object detection with region proposal networks.\u201d Preprint submitted June 4 2015. https:\/\/arxiv.org\/abs\/1506.01497."},{"key":"e_1_3_4_39_1","doi-asserted-by":"publisher","DOI":"10.1061\/(ASCE)CP.1943-5487.0000898"},{"key":"e_1_3_4_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2008.2005605"},{"key":"e_1_3_4_41_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2020.103138"},{"key":"e_1_3_4_42_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-46452-2_17"},{"key":"e_1_3_4_43_1","doi-asserted-by":"crossref","unstructured":"Sun H. and Y. Chen. 2022. \u201cReal-time elderly monitoring for senior safety by lightweight human action recognition.\u201d In Proc. 2022 IEEE 16th Int. Symp. on Medical Information and Communication Technology (ISMICT). New York: IEEE.","DOI":"10.1109\/ISMICT56646.2022.9828343"},{"key":"e_1_3_4_44_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2020.103356"},{"key":"e_1_3_4_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2024.3431628"},{"key":"e_1_3_4_46_1","doi-asserted-by":"publisher","DOI":"10.1061\/(ASCE)CP.1943-5487.0001024"},{"key":"e_1_3_4_47_1","doi-asserted-by":"crossref","unstructured":"Tran D. Q. Y. Jeon M. Park and S. Park. 2024. \u201cGPT-based logic reasoning for hazard identification in construction site using CCTV data.\u201d In Proc. 41st Int. Symp. on Automation and Robotics in Construction 291\u2013298. Cambridge UK: International Association for Automation and Robotics in Construction.","DOI":"10.22260\/ISARC2024\/0039"},{"key":"e_1_3_4_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.measurement.2023.113231"},{"key":"e_1_3_4_49_1","unstructured":"Vaswani A. N. Shazeer N. Parmar J. Uszkoreit L. Jones A. N. Gomez L. Kaiser and I. Polosukhin. 2017. \u201cAttention is all you need.\u201d Preprint submitted June 12 2017. https:\/\/arxiv.org\/abs\/1706.03762."},{"key":"e_1_3_4_50_1","unstructured":"Veli\u010dkovi\u0107 P. G. Cucurull A. Casanova A. Romero P. Li\u00f2 and Y. Bengio. 2018. \u201cGraph attention networks.\u201d Preprint submitted October 30 2017. https:\/\/arxiv.org\/abs\/1710.10903."},{"key":"e_1_3_4_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3259430"},{"key":"e_1_3_4_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2982225"},{"key":"e_1_3_4_53_1","doi-asserted-by":"crossref","unstructured":"Yao L. C. Mao and Y. Luo. 2019. \u201cGraph convolutional networks for text classification.\u201d In Vol. 30 of Proc. AAAI Conf. on Artificial Intelligence 7370\u20137377. Palo Alto CA: Association for the Advancement of Artificial Intelligence.","DOI":"10.1609\/aaai.v33i01.33017370"},{"key":"e_1_3_4_54_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2025.106178"},{"key":"e_1_3_4_55_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aei.2024.102446"},{"key":"e_1_3_4_56_1","doi-asserted-by":"crossref","unstructured":"Zhang F. Z. Y. Yuan D. Campbell Z. Zhong and S. Gould. 2023a. \u201cExploring predicate visual context in detecting of human-object interactions.\u201d In Proc. IEEE\/CVF Int. Conf. on Computer Vision (ICCV) 10411\u201310421. New York: IEEE.","DOI":"10.1109\/ICCV51070.2023.00955"},{"key":"e_1_3_4_57_1","doi-asserted-by":"crossref","unstructured":"Zhang M. X. Wu Z. Yuan Q. He and X. Huang. 2023b. \u201cHuman-object-object interaction: Towards human-centric complex interaction detection.\u201d In Proc. 31st ACM Int. Conf. on Multimedia MM \u201923 2233\u20132242. New York: Association for Computing Machinery.","DOI":"10.1145\/3581783.3611813"},{"key":"e_1_3_4_58_1","doi-asserted-by":"publisher","DOI":"10.1061\/(ASCE)CP.1943-5487.0000975"}],"container-title":["Journal of Computing in Civil Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/ascelibrary.org\/doi\/pdf\/10.1061\/JCCEE5.CPENG-7157","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T03:51:56Z","timestamp":1772855516000},"score":1,"resource":{"primary":{"URL":"https:\/\/ascelibrary.org\/doi\/10.1061\/JCCEE5.CPENG-7157"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,1]]},"references-count":57,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,5,1]]}},"alternative-id":["10.1061\/JCCEE5.CPENG-7157"],"URL":"https:\/\/doi.org\/10.1061\/jccee5.cpeng-7157","relation":{},"ISSN":["0887-3801","1943-5487"],"issn-type":[{"value":"0887-3801","type":"print"},{"value":"1943-5487","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,5,1]]},"assertion":[{"value":"2025-05-06","order":0,"name":"received","label":"Received","group":{"name":"publication_history","label":"Publication History"}},{"value":"2025-09-30","order":2,"name":"accepted","label":"Accepted","group":{"name":"publication_history","label":"Publication History"}},{"value":"2026-02-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}],"article-number":"04026025"}}