{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T15:34:42Z","timestamp":1758123282056,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681466","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"10592-10601","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["DiffHarmony++: Enhancing Image Harmonization with Harmony-VAE and Inverse Harmonization Model"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-8469-1123","authenticated-orcid":false,"given":"Pengfei","family":"Zhou","sequence":"first","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4798-4233","authenticated-orcid":false,"given":"Fangxiang","family":"Feng","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5259-7094","authenticated-orcid":false,"given":"Guang","family":"Liu","sequence":"additional","affiliation":[{"name":"Beijing Academy of Artificial Intelligence, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3543-6272","authenticated-orcid":false,"given":"Ruifan","family":"Li","sequence":"additional","affiliation":[{"name":"Beijing University of Post and Telecommunication, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2015-0674","authenticated-orcid":false,"given":"Xiaojie","family":"Wang","sequence":"additional","affiliation":[{"name":"Beijing University of Post and Telecommunication, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592450"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01799"},{"key":"e_1_3_2_1_3_1","unstructured":"Junyan Cao Wenyan Cong Li Niu Jianfu Zhang and Liqing Zhang. 2021. Deep image harmonization by bridging the reality gap. arXiv preprint arXiv:2103.17104."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25099"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611747"},{"key":"e_1_3_2_1_6_1","unstructured":"Jianqi Chen Zhengxia Zou Yilan Zhang Keyan Chen and Zhenwei Shi. 2023. Zero-shot image harmonization with generative model prior. arXiv preprint arXiv:2307.08182."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1179352.1141933"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME51207.2021.9428394"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01792"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00842"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2975979"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00377"},{"key":"e_1_3_2_1_13_1","article-title":"Transformer for image harmonization and beyond","author":"Guo Zonghui","year":"2022","unstructured":"Zonghui Guo, Zhaorui Gu, Bing Zheng, Junyu Dong, and Haiyong Zheng. 2022. Transformer for image harmonization and beyond. IEEE Transactions on Pattern Analysis and Machine Intelligence.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01460"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01610"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01909"},{"key":"e_1_3_2_1_17_1","first-page":"2","article-title":"Image harmonization with attention-based deep feature modulation","volume":"1","author":"Hao Guoqing","year":"2020","unstructured":"Guoqing Hao, Satoshi Iizuka, and Kazuhiro Fukui. 2020. Image harmonization with attention-based deep feature modulation. In BMVC. Vol. 1, 2.","journal-title":"BMVC."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_19_1","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, 33, 6840-- 6851.","journal-title":"Advances in Neural Information Processing Systems"},{"volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision, 4832--4841","author":"Yifan","key":"e_1_3_2_1_20_1","unstructured":"Yifan Jiang et al. 2021. Ssh: a self-supervised framework for image harmonization. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, 4832--4841."},{"key":"e_1_3_2_1_21_1","first-page":"26565","article-title":"Elucidating the design space of diffusion-based generative models","volume":"35","author":"Karras Tero","year":"2022","unstructured":"Tero Karras, Miika Aittala, Timo Aila, and Samuli Laine. 2022. Elucidating the design space of diffusion-based generative models. Advances in Neural Information Processing Systems, 35, 26565--26577.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19784-0_40"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00246"},{"key":"e_1_3_2_1_24_1","unstructured":"Diederik P Kingma and Max Welling. 2013. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Pierre-Yves Laffont Zhile Ren Xiaofeng Tao Chao Qian and James Hays. 2014. Transient attributes for high-level understanding and editing of outdoor scenes. ACM Transactions on graphics (TOG) 33 4 1--11.","DOI":"10.1145\/2601097.2601101"},{"key":"e_1_3_2_1_26_1","unstructured":"Jiajie Li Jian Wang Chen Wang and Jinjun Xiong. 2023. Image harmonization with diffusion model. arXiv preprint arXiv:2306.10441."},{"key":"e_1_3_2_1_27_1","unstructured":"Jiajie Li Jian Wang Chen Wang and Jinjun Xiong. 2023. Image harmonization with diffusion model. (2023). arXiv: 2306.10441 [cs.CV]."},{"key":"e_1_3_2_1_28_1","volume-title":"European Conference on Computer Vision. Springer, 334--349","author":"Liang Jingtang","year":"2022","unstructured":"Jingtang Liang, Xiaodong Cun, Chi-Man Pun, and Jue Wang. 2022. Spatialseparated curve rendering network for efficient and high-resolution image harmonization. In European Conference on Computer Vision. Springer, 334--349."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00924"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612451"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00169"},{"key":"e_1_3_2_1_33_1","unstructured":"Li Niu Wenyan Cong Liu Liu Yan Hong Bo Zhang Jing Liang and Liqing Zhang. 2021. Making images real again: a comprehensive survey on deep image composition. ArXiv abs\/2106.14490. https:\/\/api.semanticscholar.org\/CorpusID: 235658778."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00710"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28246"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_2_1_38_1","unstructured":"Hshmat Sahak Daniel Watson Chitwan Saharia and David Fleet. 2023. Denoising diffusion probabilistic models for robust image super-resolution in the wild. (2023). arXiv: 2302.07864 [cs.CV]."},{"key":"e_1_3_2_1_39_1","volume-title":"ACM SIGGRAPH 2022 Conference Proceedings.","author":"Saharia Chitwan","year":"2021","unstructured":"Chitwan Saharia, William Chan, Huiwen Chang, Chris A. Lee, Jonathan Ho, Tim Salimans, David J. Fleet, and Mohammad Norouzi. 2021. Palette: image-toimage diffusion models. ACM SIGGRAPH 2022 Conference Proceedings."},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, 1620--1629","author":"Sofiiuk Konstantin","year":"2021","unstructured":"Konstantin Sofiiuk, Polina Popenova, and Anton Konushin. 2021. Foregroundaware semantic representations for image harmonization. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, 1620--1629."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-61864-3_17"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/1778765.1778862"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.299"},{"key":"e_1_3_2_1_44_1","unstructured":"Jeya Maria Jose Valanarasu et al. 2022. Interactive portrait harmonization. arXiv preprint arXiv:2203.08216."},{"key":"e_1_3_2_1_45_1","unstructured":"Yikai Wang Chenjie Cao and Yanwei Fu. 2023. Towards stable and faithful inpainting. arXiv preprint arXiv:2312.04831."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01204"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548031"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20071-7_18"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"Su Xue Aseem Agarwala Julie Dorsey and Holly Rushmeier. 2012. Understanding and improving the realism of image composites. ACM Transactions on graphics (TOG) 31 4 1--10.","DOI":"10.1145\/2185520.2335435"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","unstructured":"Pengfei Zhou Fangxiang Feng and Xiaojie Wang. 2024. Diffharmony: latent diffusion model meets image harmonization. (2024). arXiv: 2404.06139 [cs.CV].","DOI":"10.1145\/3652583.3657616"},{"key":"e_1_3_2_1_52_1","unstructured":"Zixin Zhu Xuelu Feng Dongdong Chen Jianmin Bao Le Wang Yinpeng Chen Lu Yuan and Gang Hua. 2023. Designing a better asymmetric vqgan for stablediffusion. arXiv preprint arXiv:2306.04632."},{"key":"e_1_3_2_1_53_1","unstructured":"Ziyue Zhu Zhao Zhang Zheng Lin Ruiqi Wu Zhi Chai and Chun-Le Guo. 2022. Image harmonization by matching regional references. arXiv preprint arXiv:2204.04715."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681466","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681466","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:57:47Z","timestamp":1750294667000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681466"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":53,"alternative-id":["10.1145\/3664647.3681466","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681466","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}