{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T05:06:00Z","timestamp":1765343160498,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","funder":[{"name":"Future of Work at the Human-Technology Frontier (FW-HTF)","award":["1839971"],"award-info":[{"award-number":["1839971"]}]},{"name":"Partnership for Innovation: Technology Transfer (PFI-TT)","award":["2329804"],"award-info":[{"award-number":["2329804"]}]},{"DOI":"10.13039\/501100006465","name":"Korea Creative Content Agency","doi-asserted-by":"publisher","award":["RS-2024-00345025"],"award-info":[{"award-number":["RS-2024-00345025"]}],"id":[{"id":"10.13039\/501100006465","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Institute of Information & communications Technology Planning & Evaluation (IITP)","award":["RS-2019-II19007"],"award-info":[{"award-number":["RS-2019-II19007"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754769","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:27:39Z","timestamp":1761377259000},"page":"52-61","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Occlusion-Aware Temporally Consistent Amodal Completion for 3D Human-Object Interaction Reconstruction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3154-1201","authenticated-orcid":false,"given":"Hyungjun","family":"Doh","sequence":"first","affiliation":[{"name":"Elmore Family School of Electrical and Computer Engineering, Purdue University, West Lafayette, IN, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5426-6216","authenticated-orcid":false,"given":"Dong In","family":"Lee","sequence":"additional","affiliation":[{"name":"Department of Aritificial Intelligence, Korea University, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6965-6938","authenticated-orcid":false,"given":"Seunggeun","family":"Chi","sequence":"additional","affiliation":[{"name":"Elmore Family School of Electrical and Computer Engineering, Purdue University, West Lafayette, IN, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1545-0058","authenticated-orcid":false,"given":"Pin-Hao","family":"Huang","sequence":"additional","affiliation":[{"name":"Honda Research Institute USA, San Jose, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1433-551X","authenticated-orcid":false,"given":"Kwonjoon","family":"Lee","sequence":"additional","affiliation":[{"name":"Honda Research Institute USA, San Jose, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7349-0018","authenticated-orcid":false,"given":"Sangpil","family":"Kim","sequence":"additional","affiliation":[{"name":"Department of Artificial Intelligence, Korea University, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8639-5135","authenticated-orcid":false,"given":"Karthik","family":"Ramani","sequence":"additional","affiliation":[{"name":"Elmore Family School of Electrical and Computer Engineering &amp; School of Mechanical Engineering, Purdue University, West Lafayette, IN, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"K. Bellock. [n.d.]. alphashape. https:\/\/github.com\/bellockk\/alphashape. GitHub repository accessed 2025-04-11."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01547"},{"volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 4567-4576","author":"Chen A.","key":"e_1_3_2_2_3_1","unstructured":"A. Chen, B. Smith, and C. Lee. 2023. Amodal 3D Shape from Partial Views. In Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 4567-4576."},{"key":"e_1_3_2_2_4_1","unstructured":"Seunggeun Chi Enna Sachdeva Pin-Hao Huang and Kwonjoon Lee. 2025. Contact-Aware Amodal Completion for Human-Object Interaction via Multi-Regional Inpainting. arXiv:2508.00427 [cs.CV] https:\/\/arxiv.org\/abs\/2508.00427"},{"key":"e_1_3_2_2_5_1","volume-title":"Flatten: optical flow-guided attention for consistent text-to-video editing. arXiv preprint arXiv:2310.05922","author":"Cong Yuren","year":"2023","unstructured":"Yuren Cong, Mengmeng Xu, Christian Simon, Shoufa Chen, Jiawei Ren, Yanping Xie, Juan-Manuel Perez-Rua, Bodo Rosenhahn, Tao Xiang, and Sen He. 2023. Flatten: optical flow-guided attention for consistent text-to-video editing. arXiv preprint arXiv:2310.05922 (2023)."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00675"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01965"},{"key":"e_1_3_2_2_8_1","volume-title":"Tokenflow: Consistent diffusion features for consistent video editing. arXiv preprint arXiv:2307.10373","author":"Geyer Michal","year":"2023","unstructured":"Michal Geyer, Omer Bar-Tal, Shai Bagon, and Tali Dekel. 2023. Tokenflow: Consistent diffusion features for consistent video editing. arXiv preprint arXiv:2307.10373 (2023)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00067"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16788-1_18"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-01984-1"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00583"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00318"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19824-3_24"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592433"},{"volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 789-798","author":"Kim M.","key":"e_1_3_2_2_16_1","unstructured":"M. Kim, J. Park, and K. Lee. 2023. Monocular Differentiable Rendering for Self-Supervised 3D Amodal Masks. In Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 789-798."},{"key":"e_1_3_2_2_17_1","unstructured":"Diederik P Kingma Max Welling et al. 2013. Auto-encoding variational bayes."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00055"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01267-0_11"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00107"},{"volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 2345-2354","author":"Li P.","key":"e_1_3_2_2_21_1","unstructured":"P. Li, Q. Zhang, and R. Others. 2022. Compositional Models for Amodal Layout Completion. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 2345-2354."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00494"},{"key":"e_1_3_2_2_23_1","volume-title":"Vdt: General-purpose video diffusion transformers via mask modeling. arXiv preprint arXiv:2305.13311","author":"Lu Haoyu","year":"2023","unstructured":"Haoyu Lu, Guoxing Yang, Nanyi Fei, Yuqi Huo, Zhiwu Lu, Ping Luo, and Mingyu Ding. 2023. Vdt: General-purpose video diffusion transformers via mask modeling. arXiv preprint arXiv:2305.13311 (2023)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01952"},{"key":"e_1_3_2_2_25_1","volume-title":"GS2Pose: Tow-stage 6D Object Pose Estimation Guided by Gaussian Splatting. arXiv preprint arXiv:2411.03807","author":"Mei Jilan","year":"2024","unstructured":"Jilan Mei, Junbo Li, and Cai Meng. 2024. GS2Pose: Tow-stage 6D Object Pose Estimation Guided by Gaussian Splatting. arXiv preprint arXiv:2411.03807 (2024)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503250"},{"key":"e_1_3_2_2_27_1","volume-title":"AIM 2024 sparse neural rendering challenge: Dataset and benchmark. arXiv preprint arXiv:2409","author":"Nazarczuk Michal","year":"2024","unstructured":"Michal Nazarczuk, Thomas Tanay, Sibi Catley-Chandar, Richard Shaw, Radu Timofte, and Eduardo P\u00e9rez-Pellitero. 2024. AIM 2024 sparse neural rendering challenge: Dataset and benchmark. arXiv preprint arXiv:2409.15041 (2024)."},{"key":"e_1_3_2_2_28_1","first-page":"1","article-title":"Learning Disentangled Shape-Texture for Amodal Completion","author":"Nguyen H.","year":"2022","unstructured":"H. Nguyen, T. Davis, and X. Xu. 2022. Learning Disentangled Shape-Texture for Amodal Completion. In Advances in Neural Information Processing Systems (NeurIPS). 1-12.","journal-title":"Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00377"},{"key":"e_1_3_2_2_30_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_2_31_1","volume-title":"Nicolas Carion, Chao-Yuan Wu, Ross Girshick, Piotr Doll\u00e1r, and Christoph Feichtenhofer.","author":"Ravi Nikhila","year":"2024","unstructured":"Nikhila Ravi, Valentin Gabeur, Yuan-Ting Hu, Ronghang Hu, Chaitanya Ryali, Tengyu Ma, Haitham Khedr, Roman R\u00e4dle, Chloe Rolland, Laura Gustafson, Eric Mintun, Junting Pan, Kalyan Vasudev Alwala, Nicolas Carion, Chao-Yuan Wu, Ross Girshick, Piotr Doll\u00e1r, and Christoph Feichtenhofer. 2024. SAM 2: Segment Anything in Images and Videos. arXiv preprint arXiv:2408.00714 (2024). https:\/\/arxiv.org\/abs\/2408.00714"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.445"},{"key":"e_1_3_2_2_34_1","first-page":"92184","article-title":"Occfusion: Rendering occluded humans with generative diffusion priors","volume":"37","author":"Sun Adam","year":"2024","unstructured":"Adam Sun, Tiange Xiang, Scott Delp, Fei-Fei Li, and Ehsan Adeli. 2024. Occfusion: Rendering occluded humans with generative diffusion priors. Advances in Neural Information Processing Systems, Vol. 37 (2024), 92184-92209.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00538"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00323"},{"volume-title":"RAFT: Recurrent All-Pairs Field Transforms for Optical Flow. In European Conference on Computer Vision (ECCV). 402-419","author":"Teed Z.","key":"e_1_3_2_2_37_1","unstructured":"Z. Teed and J. Deng. 2020. RAFT: Recurrent All-Pairs Field Transforms for Optical Flow. In European Conference on Computer Vision (ECCV). 402-419."},{"key":"e_1_3_2_2_38_1","volume-title":"European Conference on Computer Vision. Springer, 36-54","author":"Wang Yihan","year":"2024","unstructured":"Yihan Wang, Lahav Lipson, and Jia Deng. 2024. Sea-raft: Simple, efficient, accurate raft for optical flow. In European Conference on Computer Vision. Springer, 36-54."},{"key":"e_1_3_2_2_39_1","volume-title":"Image quality assessment: from error visibility to structural similarity","author":"Wang Zhou","year":"2004","unstructured":"Zhou Wang, Alan C Bovik, Hamid R Sheikh, and Eero P Simoncelli. 2004. Image quality assessment: from error visibility to structural similarity. IEEE transactions on image processing, Vol. 13, 4 (2004), 600-612."},{"volume-title":"European Conference on Computer Vision (ECCV). 341-356","author":"Wu J.","key":"e_1_3_2_2_40_1","unstructured":"J. Wu, Z. Yang, and H. Kim. 2022. Self-Supervised Amodal Reconstruction from Single Images. In European Conference on Computer Vision (ECCV). 341-356."},{"key":"e_1_3_2_2_41_1","volume-title":"Template Free Reconstruction of Human-object Interaction with Procedural Interaction Generation. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Xie Xianghui","year":"2024","unstructured":"Xianghui Xie, Bharat Lal Bhatnagar, Jan Eric Lenssen, and Gerard Pons-Moll. 2024. Template Free Reconstruction of Human-object Interaction with Procedural Interaction Generation. In IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00869"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3687759"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610548.3618160"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00684"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00961"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00245"},{"volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 1234-1243","author":"Zhou X.","key":"e_1_3_2_2_49_1","unstructured":"X. Zhou, Y. Li, Z. Wang, and T. Others. 2023b. Amodal Instance Segmentation with Transformers. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 1234-1243."},{"key":"e_1_3_2_2_50_1","volume-title":"European conference on computer vision. Springer, 145-163","author":"Zhu Zehao","year":"2024","unstructured":"Zehao Zhu, Zhiwen Fan, Yifan Jiang, and Zhangyang Wang. 2024. Fsgs: Real-time few-shot view synthesis using gaussian splatting. In European conference on computer vision. Springer, 145-163."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754769","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T05:02:44Z","timestamp":1765342964000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754769"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":50,"alternative-id":["10.1145\/3746027.3754769","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754769","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}