{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T09:07:14Z","timestamp":1765357634338,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,13]],"date-time":"2024-07-13T00:00:00Z","timestamp":1720828800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,13]]},"DOI":"10.1145\/3641519.3657409","type":"proceedings-article","created":{"date-parts":[[2024,7,12]],"date-time":"2024-07-12T10:39:28Z","timestamp":1720780768000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Object-level Scene Deocclusion"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8434-3224","authenticated-orcid":false,"given":"Zhengzhe","family":"Liu","sequence":"first","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0879-7440","authenticated-orcid":false,"given":"Qing","family":"Liu","sequence":"additional","affiliation":[{"name":"Adobe Research, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0628-2851","authenticated-orcid":false,"given":"Chirui","family":"Chang","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9954-6294","authenticated-orcid":false,"given":"Jianming","family":"Zhang","sequence":"additional","affiliation":[{"name":"Adobe Research, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7677-2646","authenticated-orcid":false,"given":"Daniil","family":"Pakhomov","sequence":"additional","affiliation":[{"name":"Adobe Research, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0415-1765","authenticated-orcid":false,"given":"Haitian","family":"Zheng","sequence":"additional","affiliation":[{"name":"Adobe Research, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1154-9907","authenticated-orcid":false,"given":"Zhe","family":"Lin","sequence":"additional","affiliation":[{"name":"Adobe Research, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6777-7445","authenticated-orcid":false,"given":"Daniel","family":"Cohen-Or","sequence":"additional","affiliation":[{"name":"Tel Aviv University, Israel"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5238-593X","authenticated-orcid":false,"given":"Chi-Wing","family":"Fu","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong"}]}],"member":"320","published-online":{"date-parts":[[2024,7,13]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"crossref","unstructured":"Jasmin Breitenstein and Tim Fingscheidt. 2022. Amodal cityscapes: a new dataset its generation and an amodal semantic segmentation challenge baseline. In IV.","DOI":"10.1109\/IV51971.2022.9827342"},{"key":"e_1_3_2_2_2_1","volume-title":"Monet: Unsupervised scene decomposition and representation. arXiv preprint arXiv:1901.11390","author":"Burgess P","year":"2019","unstructured":"Christopher\u00a0P Burgess, Loic Matthey, Nicholas Watters, Rishabh Kabra, Irina Higgins, Matt Botvinick, and Alexander Lerchner. 2019. Monet: Unsupervised scene decomposition and representation. arXiv preprint arXiv:1901.11390 (2019)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"crossref","unstructured":"Helisa Dhamo Nassir Navab and Federico Tombari. 2019. Object-driven multi-layer scene decomposition from a single image. In ICCV.","DOI":"10.1109\/ICCV.2019.00547"},{"key":"e_1_3_2_2_4_1","volume-title":"Diffusion models beat GANs on image synthesis. NeurIPS","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat GANs on image synthesis. NeurIPS (2021)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"crossref","unstructured":"Kiana Ehsani Roozbeh Mottaghi and Ali Farhadi. 2018. SeGAN: Segmenting and generating the invisible. In CVPR.","DOI":"10.1109\/CVPR.2018.00643"},{"key":"e_1_3_2_2_6_1","volume-title":"Genesis: Generative scene inference and sampling with object-centric latent representations. ICLR","author":"Engelcke Martin","year":"2020","unstructured":"Martin Engelcke, Adam\u00a0R Kosiorek, Oiwi\u00a0Parker Jones, and Ingmar Posner. 2020. Genesis: Generative scene inference and sampling with object-centric latent representations. ICLR (2020)."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"crossref","unstructured":"Patrick Follmann Rebecca K\u00f6nig Philipp H\u00e4rtinger Michael Klostermann and Tobias B\u00f6ttger. 2019. Learning to see the invisible: End-to-end trainable amodal instance segmentation. In WACV.","DOI":"10.1109\/WACV.2019.00146"},{"key":"e_1_3_2_2_8_1","volume-title":"Object-centric learning with slot attention. NeurIPS","author":"Francesco Locatello","year":"2020","unstructured":"Locatello Francesco, Weissenborn Dirk, Unterthiner Thomas, Mahendran Aravindh, Heigold Georg, Uszkoreit Jakob, Dosovitskiy Alexey, and Kipf Thomas. 2020. Object-centric learning with slot attention. NeurIPS (2020)."},{"key":"e_1_3_2_2_9_1","unstructured":"Klaus Greff Rapha\u00ebl\u00a0Lopez Kaufman Rishabh Kabra Nick Watters Christopher Burgess Daniel Zoran Loic Matthey Matthew Botvinick and Alexander Lerchner. 2019. Multi-object representation learning with iterative variational inference. In ICML."},{"key":"e_1_3_2_2_10_1","volume-title":"GANs trained by a two time-scale update rule converge to a local Nash equilibrium. NIPS","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. GANs trained by a two time-scale update rule converge to a local Nash equilibrium. NIPS (2017)."},{"key":"e_1_3_2_2_11_1","volume-title":"Denoising diffusion probabilistic models. NeurIPS","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. NeurIPS (2020)."},{"key":"e_1_3_2_2_12_1","volume-title":"NeurIPS Workshop","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho and Tim Salimans. 2022. Classifier-free diffusion guidance. NeurIPS Workshop (2022)."},{"key":"e_1_3_2_2_13_1","volume-title":"Sail-vos: Semantic amodal instance level video object segmentation-a synthetic dataset and baselines. In CVPR.","author":"Hu Yuan-Ting","year":"2019","unstructured":"Yuan-Ting Hu, Hong-Shuo Chen, Kexin Hui, Jia-Bin Huang, and Alexander\u00a0G Schwing. 2019. Sail-vos: Semantic amodal instance level video object segmentation-a synthetic dataset and baselines. In CVPR."},{"key":"e_1_3_2_2_14_1","volume-title":"Li Fei-Fei, C Lawrence\u00a0Zitnick, and Ross Girshick.","author":"Johnson Justin","year":"2017","unstructured":"Justin Johnson, Bharath Hariharan, Laurens Van Der\u00a0Maaten, Li Fei-Fei, C Lawrence\u00a0Zitnick, and Ross Girshick. 2017. CLEVR: A diagnostic dataset for compositional language and elementary visual reasoning. In CVPR."},{"key":"e_1_3_2_2_15_1","unstructured":"Abhishek Kar Shubham Tulsiani Joao Carreira and Jitendra Malik. 2015. Amodal completion and size constancy in natural scenes. In ICCV."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"crossref","unstructured":"Lei Ke Yu-Wing Tai and Chi-Keung Tang. 2021. Deep occlusion-aware instance segmentation with overlapping bilayers. In CVPR.","DOI":"10.1109\/CVPR46437.2021.00401"},{"key":"e_1_3_2_2_17_1","volume-title":"Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114","author":"Kingma P","year":"2013","unstructured":"Diederik\u00a0P Kingma and Max Welling. 2013. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)."},{"key":"e_1_3_2_2_18_1","volume-title":"Segment anything. arXiv preprint arXiv:2304.02643","author":"Kirillov Alexander","year":"2023","unstructured":"Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alexander\u00a0C Berg, Wan-Yen Lo, 2023. Segment anything. arXiv preprint arXiv:2304.02643 (2023)."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"crossref","unstructured":"Ke Li and Jitendra Malik. 2016. Amodal instance segmentation. In ECCV.","DOI":"10.1007\/978-3-319-46475-6_42"},{"key":"e_1_3_2_2_20_1","unstructured":"Tsung-Yi Lin Michael Maire Serge Belongie James Hays Pietro Perona Deva Ramanan Piotr Doll\u00e1r and C.\u00a0Lawrence Zitnick. 2014. Microsoft COCO: Common objects in context. In ECCV."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"crossref","unstructured":"Buyu Liu Bingbing Zhuang and Manmohan Chandraker. 2022. Weakly But Deeply Supervised Occlusion-Reasoned Parametric Road Layouts. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01649"},{"key":"e_1_3_2_2_22_1","volume-title":"Repaint: Inpainting using denoising diffusion probabilistic models. In CVPR.","author":"Lugmayr Andreas","year":"2022","unstructured":"Andreas Lugmayr, Martin Danelljan, Andres Romero, Fisher Yu, Radu Timofte, and Luc Van\u00a0Gool. 2022. Repaint: Inpainting using denoising diffusion probabilistic models. In CVPR."},{"key":"e_1_3_2_2_23_1","volume-title":"Monolayout: Amodal scene layout from a single image. In WACV.","author":"Mani Kaustubh","year":"2020","unstructured":"Kaustubh Mani, Swapnil Daga, Shubhika Garg, Sai\u00a0Shankar Narasimhan, Madhava Krishna, and Krishna\u00a0Murthy Jatavallabhula. 2020. Monolayout: Amodal scene layout from a single image. In WACV."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"crossref","unstructured":"Rohit Mohan and Abhinav Valada. 2022a. Amodal panoptic segmentation. In CVPR.","DOI":"10.1109\/CVPR52688.2022.02035"},{"key":"e_1_3_2_2_25_1","volume-title":"Perceiving the invisible: Proposal-free amodal panoptic segmentation. RAL","author":"Mohan Rohit","year":"2022","unstructured":"Rohit Mohan and Abhinav Valada. 2022b. Perceiving the invisible: Proposal-free amodal panoptic segmentation. RAL (2022)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"crossref","unstructured":"Tom Monnier Elliot Vincent Jean Ponce and Mathieu Aubry. 2021. Unsupervised layered image decomposition into object prototypes. In ICCV.","DOI":"10.1109\/ICCV48922.2021.00852"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"crossref","unstructured":"Medhini Narasimhan Erik Wijmans Xinlei Chen Trevor Darrell Dhruv Batra Devi Parikh and Amanpreet Singh. 2020. Seeing the un-scene: Learning amodal semantic maps for room navigation. In ECCV.","DOI":"10.1007\/978-3-030-58523-5_30"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00730"},{"key":"e_1_3_2_2_29_1","unstructured":"OpenAI. 2023. GPT-4V(ision) System Card. (2023)."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"crossref","unstructured":"Ege Ozguroglu Ruoshi Liu D\u00eddac Sur\u00eds Dian Chen Achal Dave Pavel Tokmakov and Carl Vondrick. 2024. pix2gestalt: Amodal Segmentation by Synthesizing Wholes. (2024).","DOI":"10.1109\/CVPR52733.2024.00377"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"crossref","unstructured":"Dim\u00a0P Papadopoulos Youssef Tamaazousti Ferda Ofli Ingmar Weber and Antonio Torralba. 2019. How to make a pizza: Learning a compositional layer-based GAN model. In CVPR.","DOI":"10.1109\/CVPR.2019.00819"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"crossref","unstructured":"Pulak Purkait Christopher Zach and Ian Reid. 2019. Seeing behind things: Extending semantic segmentation to occluded regions. In IROS.","DOI":"10.1109\/IROS40897.2019.8967582"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"crossref","unstructured":"Lu Qi Li Jiang Shu Liu Xiaoyong Shen and Jiaya Jia. 2019. Amodal instance segmentation with kins dataset. In CVPR.","DOI":"10.1109\/CVPR.2019.00313"},{"key":"e_1_3_2_2_34_1","volume-title":"Towards robust monocular depth estimation: Mixing datasets for zero-shot cross-dataset transfer. TPAMI","author":"Ranftl Ren\u00e9","year":"2020","unstructured":"Ren\u00e9 Ranftl, Katrin Lasinger, David Hafner, Konrad Schindler, and Vladlen Koltun. 2020. Towards robust monocular depth estimation: Mixing datasets for zero-shot cross-dataset transfer. TPAMI (2020)."},{"key":"e_1_3_2_2_35_1","unstructured":"Danilo\u00a0Jimenez Rezende Shakir Mohamed and Daan Wierstra. 2014. Stochastic backpropagation and approximate inference in deep generative models. In ICML."},{"key":"e_1_3_2_2_36_1","unstructured":"Kabra Rishabh Burgess Chris Matthey Loic Lopez\u00a0Kaufman Raphael Greff Klaus Reynolds Malcolm and Lerchner. Alexander. 2019. Multi-object datasets."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. High-resolution image synthesis with latent diffusion models. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_38_1","unstructured":"Jascha Sohl-Dickstein Eric Weiss Niru Maheswaranathan and Surya Ganguli. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In ICLM."},{"key":"e_1_3_2_2_39_1","volume-title":"Dreamcraft3D: Hierarchical 3D generation with bootstrapped diffusion prior. arXiv preprint arXiv:2310.16818","author":"Sun Jingxiang","year":"2023","unstructured":"Jingxiang Sun, Bo Zhang, Ruizhi Shao, Lizhen Wang, Wen Liu, Zhenda Xie, and Yebin Liu. 2023. Dreamcraft3D: Hierarchical 3D generation with bootstrapped diffusion prior. arXiv preprint arXiv:2310.16818 (2023)."},{"key":"e_1_3_2_2_40_1","unstructured":"Yihong Sun Adam Kortylewski and Alan Yuille. 2022. Amodal segmentation through out-of-task and out-of-distribution generalization with a Bayesian model. In CVPR."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"crossref","unstructured":"Roman Suvorov Elizaveta Logacheva Anton Mashikhin Anastasia Remizova Arsenii Ashukha Aleksei Silvestrov Naejin Kong Harshith Goka Kiwoong Park and Victor Lempitsky. 2022. Resolution-robust large mask inpainting with fourier convolutions. In WACV.","DOI":"10.1109\/WACV51458.2022.00323"},{"key":"e_1_3_2_2_42_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. In NIPS."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"crossref","unstructured":"Angtian Wang Yihong Sun Adam Kortylewski and Alan\u00a0L Yuille. 2020. Robust object detection under occlusion with context-aware compositionalnets. In CVPR.","DOI":"10.1109\/CVPR42600.2020.01266"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"crossref","unstructured":"Yuting Xiao Yanyu Xu Ziming Zhong Weixin Luo Jiawei Li and Shenghua Gao. 2021. Amodal segmentation based on visible region segmentation and shape prior. In AAAI.","DOI":"10.1609\/aaai.v35i4.16407"},{"key":"e_1_3_2_2_45_1","unstructured":"Chaohao Xie Shaohui Liu Chao Li Ming-Ming Cheng Wangmeng Zuo Xiao Liu Shilei Wen and Errui Ding. 2019. Image inpainting with learnable bidirectional attention maps. In ICCV."},{"key":"e_1_3_2_2_46_1","unstructured":"Xiaosheng Yan Feigege Wang Wenxi Liu Yuanlong Yu Shengfeng He and Jia Pan. 2019. Visualizing the invisible: Occluded vehicle segmentation and recovery. In ICCV."},{"key":"e_1_3_2_2_47_1","unstructured":"Jiahui Yu Zhe Lin Jimei Yang Xiaohui Shen Xin Lu and Thomas\u00a0S Huang. 2019. Free-form image inpainting with gated convolution. In ICCV."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"crossref","unstructured":"Xiaoding Yuan Adam Kortylewski Yihong Sun and Alan Yuille. 2021. Robust instance segmentation through reasoning about multi-object occlusion. In CVPR.","DOI":"10.1109\/CVPR46437.2021.01099"},{"key":"e_1_3_2_2_49_1","volume-title":"Amodal Ground Truth and Completion in the Wild. arXiv preprint arXiv:2312.17247","author":"Zhan Guanqi","year":"2023","unstructured":"Guanqi Zhan, Chuanxia Zheng, Weidi Xie, and Andrew Zisserman. 2023. Amodal Ground Truth and Completion in the Wild. arXiv preprint arXiv:2312.17247 (2023)."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"crossref","unstructured":"Xiaohang Zhan Xingang Pan Bo Dai Ziwei Liu Dahua Lin and Chen\u00a0Change Loy. 2020. Self-supervised scene de-occlusion. In CVPR.","DOI":"10.1109\/CVPR42600.2020.00384"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"crossref","unstructured":"Richard Zhang Phillip Isola Alexei\u00a0A Efros Eli Shechtman and Oliver Wang. 2018. The unreasonable effectiveness of deep features as a perceptual metric. In CVPR.","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"crossref","unstructured":"Ziheng Zhang Anpei Chen Ling Xie Jingyi Yu and Shenghua Gao. 2019. Learning semantics-aware distance map with semantics layering network for amodal instance segmentation. In ACM MM.","DOI":"10.1145\/3343031.3350911"},{"key":"e_1_3_2_2_54_1","volume-title":"Visiting the invisible: Layer-by-layer completed scene decomposition. IJCV","author":"Zheng Chuanxia","year":"2021","unstructured":"Chuanxia Zheng, Duy-Son Dao, Guoxian Song, Tat-Jen Cham, and Jianfei Cai. 2021. Visiting the invisible: Layer-by-layer completed scene decomposition. IJCV (2021)."},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"crossref","unstructured":"Bolei Zhou Hang Zhao Xavier Puig Sanja Fidler Adela Barriuso and Antonio Torralba. 2017. Scene parsing through ADE20k dataset. In CVPR.","DOI":"10.1109\/CVPR.2017.544"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"crossref","unstructured":"Qiang Zhou Shiyin Wang Yitong Wang Zilong Huang and Xinggang Wang. 2021. Human de-occlusion: Invisible perception and recovery for humans. In CVPR.","DOI":"10.1109\/CVPR46437.2021.00369"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"crossref","unstructured":"Yan Zhu Yuandong Tian Dimitris Metaxas and Piotr Doll\u00e1r. 2017. Semantic amodal segmentation. In CVPR.","DOI":"10.1109\/CVPR.2017.320"}],"event":{"name":"SIGGRAPH '24: Special Interest Group on Computer Graphics and Interactive Techniques Conference","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"],"location":"Denver CO USA","acronym":"SIGGRAPH '24"},"container-title":["Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3641519.3657409","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3641519.3657409","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:09:35Z","timestamp":1750295375000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3641519.3657409"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,13]]},"references-count":57,"alternative-id":["10.1145\/3641519.3657409","10.1145\/3641519"],"URL":"https:\/\/doi.org\/10.1145\/3641519.3657409","relation":{},"subject":[],"published":{"date-parts":[[2024,7,13]]},"assertion":[{"value":"2024-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}