{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T05:22:41Z","timestamp":1781587361197,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62132001"],"award-info":[{"award-number":["62132001"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Capital's Funds for Health Improvement and Research","award":["CFH 2024-2-40611"],"award-info":[{"award-number":["CFH 2024-2-40611"]}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755225","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:26:38Z","timestamp":1761377198000},"page":"9911-9919","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Multi-Agent Amodal Completion: Direct Synthesis with Fine-Grained Semantic Guidance"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-4525-8777","authenticated-orcid":false,"given":"Hongxing","family":"Fan","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, Beihang University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9100-7899","authenticated-orcid":false,"given":"Lipeng","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Software, Beihang University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4631-0172","authenticated-orcid":false,"given":"Haohua","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Software, Beihang University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1883-0777","authenticated-orcid":false,"given":"Zehuan","family":"Huang","sequence":"additional","affiliation":[{"name":"School of Software, Beihang University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9288-3823","authenticated-orcid":false,"given":"Jiangtao","family":"Wu","sequence":"additional","affiliation":[{"name":"School of Software, Beihang University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8525-9163","authenticated-orcid":false,"given":"Lu","family":"Sheng","sequence":"additional","affiliation":[{"name":"School of Software, Beihang University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Alimama-Creative. [n.d.]. Alimama-creative\/flux-controlnet-inpainting. https:\/\/github.com\/alimama-creative\/FLUX-Controlnet-Inpainting"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00608"},{"key":"e_1_3_2_1_3_1","unstructured":"Shuai Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Sibo Song Kai Dang Peng Wang Shijie Wang Jun Tang et al. 2025. Qwen2. 5-vl technical report. arXiv preprint arXiv:2502.13923 (2025)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00234"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02062"},{"key":"e_1_3_2_1_6_1","volume-title":"Attend-and-Excite: Attention-Based Semantic Guidance for Text-to-Image Diffusion Models. ACM transactions on Graphics (TOG)","author":"Chefer Hila","year":"2023","unstructured":"Hila Chefer, Yuval Alaluf, Yael Vinker, Lior Wolf, and Daniel Cohen-Or. 2023. Attend-and-Excite: Attention-Based Semantic Guidance for Text-to-Image Diffusion Models. ACM transactions on Graphics (TOG), Vol. 42, 4 (2023), 1-10."},{"key":"e_1_3_2_1_7_1","volume-title":"LayerFusion: Harmonized Multi-Layer Text-to-Image Generation with Generative Priors. arXiv preprint arXiv:2412.04460","author":"Dalva Yusuf","year":"2024","unstructured":"Yusuf Dalva, Yijun Li, Qing Liu, Nanxuan Zhao, Jianming Zhang, Zhe Lin, and Pinar Yanardag. 2024. LayerFusion: Harmonized Multi-Layer Text-to-Image Generation with Generative Priors. arXiv preprint arXiv:2412.04460 (2024)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00643"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19784-0_36"},{"key":"e_1_3_2_1_10_1","first-page":"43864","article-title":"Generating compositional scenes via Text-to-image RGBA Instance Generation","volume":"37","author":"Fontanella Alessandro","year":"2025","unstructured":"Alessandro Fontanella, Petru-Daniel Tudosiu, Yongxin Yang, Shifeng Zhang, and Sarah Parisot. 2025. Generating compositional scenes via Text-to-image RGBA Instance Generation. Advances in Neural Information Processing Systems, Vol. 37 (2025), 43864-43893.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.265"},{"key":"e_1_3_2_1_12_1","volume-title":"Generative adversarial nets. Advances in neural information processing systems","author":"Goodfellow Ian","year":"2014","unstructured":"Ian Goodfellow, Jean Pouget-Abadie, Mehdi Mirza, Bing Xu, David Warde-Farley, Sherjil Ozair, Aaron Courville, and Yoshua Bengio. 2014. Generative adversarial nets. Advances in neural information processing systems, Vol. 27 (2014)."},{"key":"e_1_3_2_1_13_1","volume-title":"Denoising diffusion probabilistic models. Advances in neural information processing systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems, Vol. 33 (2020), 6840-6851."},{"key":"e_1_3_2_1_14_1","volume-title":"DiffuMatting: Synthesizing Arbitrary Objects with Matting-Level Annotation. In European Conference on Computer Vision. Springer, 396-413","author":"Hu Xiaobin","year":"2024","unstructured":"Xiaobin Hu, Xu Peng, Donghao Luo, Xiaozhong Ji, Jinlong Peng, Zhengkai Jiang, Jiangning Zhang, Taisong Jin, Chengjie Wang, and Rongrong Ji. 2024. DiffuMatting: Synthesizing Arbitrary Objects with Matting-Level Annotation. In European Conference on Computer Vision. Springer, 396-413."},{"key":"e_1_3_2_1_15_1","unstructured":"Diederik P Kingma Max Welling et al. 2013. Auto-encoding variational bayes."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00915"},{"key":"e_1_3_2_1_17_1","first-page":"16246","article-title":"Variational amodal object completion","volume":"33","author":"Ling Huan","year":"2020","unstructured":"Huan Ling, David Acuna, Karsten Kreis, Seung Wook Kim, and Sanja Fidler. 2020. Variational amodal object completion. Advances in Neural Information Processing Systems, Vol. 33 (2020), 16246-16257.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_18_1","unstructured":"OpenAI. 2024. GPT-4o System Card. arXiv:2410.21276 [cs.CL]"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00377"},{"key":"e_1_3_2_1_20_1","volume-title":"European Conference on Computer Vision. Springer, 38-55","author":"Quattrini Fabio","year":"2024","unstructured":"Fabio Quattrini, Vittorio Pippi, Silvia Cascianelli, and Rita Cucchiara. 2024. Alfie: Democratising RGBA Image Generation With No $$$. In European Conference on Computer Vision. Springer, 38-55."},{"key":"e_1_3_2_1_21_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_1_22_1","unstructured":"Tianhe Ren Shilong Liu Ailing Zeng Jing Lin Kunchang Li He Cao Jiayu Chen Xinyu Huang Yukang Chen Feng Yan et al. 2024. Grounded SAM: Assembling Open-World Models for Diverse Visual Tasks. arXiv preprint arXiv:2401.14159 (2024)."},{"key":"e_1_3_2_1_23_1","volume-title":"GrabCut'' interactive foreground extraction using iterated graph cuts. ACM transactions on graphics (TOG)","author":"Rother Carsten","year":"2004","unstructured":"Carsten Rother, Vladimir Kolmogorov, and Andrew Blake. 2004. '' GrabCut'' interactive foreground extraction using iterated graph cuts. ACM transactions on graphics (TOG), Vol. 23, 3 (2004), 309-314."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3658157"},{"key":"e_1_3_2_1_25_1","first-page":"2","article-title":"Investigating global effects in visual occlusion: From a partly occluded square to the back of a tree-trunk","volume":"102","author":"van Lier Rob","year":"1999","unstructured":"Rob van Lier. 1999. Investigating global effects in visual occlusion: From a partly occluded square to the back of a tree-trunk. Acta Psychologica, Vol. 102, 2-3 (1999), 203-220.","journal-title":"Acta Psychologica"},{"key":"e_1_3_2_1_26_1","volume-title":"Image quality assessment: from error visibility to structural similarity","author":"Wang Zhou","year":"2004","unstructured":"Zhou Wang, Alan C Bovik, Hamid R Sheikh, and Eero P Simoncelli. 2004. Image quality assessment: from error visibility to structural similarity. IEEE transactions on image processing, Vol. 13, 4 (2004), 600-612."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00869"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00771"},{"key":"e_1_3_2_1_29_1","volume-title":"LR-GAN: Layered Recursive Generative Adversarial Networks for Image Generation. In International Conference on Learning Representations.","author":"Yang Jianwei","year":"2017","unstructured":"Jianwei Yang, Anitha Kannan, Dhruv Batra, and Devi Parikh. 2017. LR-GAN: Layered Recursive Generative Adversarial Networks for Image Generation. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.visres.2018.02.011"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02645"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00384"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3658150"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_1_35_1","volume-title":"Text2Layer: Layered image generation using latent diffusion model. arXiv preprint arXiv:2307.09781","author":"Zhang Xinyang","year":"2023","unstructured":"Xinyang Zhang, Wentian Zhao, Xin Lu, and Jeff Chien. 2023. Text2Layer: Layered image generation using latent diffusion model. arXiv preprint arXiv:2307.09781 (2023)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00369"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755225","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:49:12Z","timestamp":1765309752000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755225"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":36,"alternative-id":["10.1145\/3746027.3755225","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755225","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}