{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:21:10Z","timestamp":1765340470659,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100014219","name":"National Science Fund for Distinguished Young Scholars","doi-asserted-by":"publisher","award":["62025603"],"award-info":[{"award-number":["62025603"]}],"id":[{"id":"10.13039\/501100014219","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U21B2037, U22B2051, U23A20383, U21A20472, 62176222, 62176223, 62176226, 62072386, 62072387, 62072389, 62002305, 62272401"],"award-info":[{"award-number":["U21B2037, U22B2051, U23A20383, U21A20472, 62176222, 62176223, 62176226, 62072386, 62072387, 62072389, 62002305, 62272401"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Natural Science Foundation of Fujian Province of China","award":["2021J06003, 2022J06001"],"award-info":[{"award-number":["2021J06003, 2022J06001"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754920","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:47:18Z","timestamp":1761374838000},"page":"11259-11268","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Generate Aligned Anomaly: Region-Guided Few-Shot Anomaly Image-Mask Pair Synthesis for Industrial Inspection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-3266-6155","authenticated-orcid":false,"given":"Yilin","family":"Lu","sequence":"first","affiliation":[{"name":"Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3873-3860","authenticated-orcid":false,"given":"Jianghang","family":"Lin","sequence":"additional","affiliation":[{"name":"Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6026-5571","authenticated-orcid":false,"given":"Linhuang","family":"Xie","sequence":"additional","affiliation":[{"name":"Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5847-4749","authenticated-orcid":false,"given":"Kai","family":"Zhao","sequence":"additional","affiliation":[{"name":"vivo, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4325-6858","authenticated-orcid":false,"given":"Yansong","family":"Qu","sequence":"additional","affiliation":[{"name":"Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0800-0609","authenticated-orcid":false,"given":"Shengchuan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7645-9606","authenticated-orcid":false,"given":"Liujuan","family":"Cao","sequence":"additional","affiliation":[{"name":"Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9163-2932","authenticated-orcid":false,"given":"Rongrong","family":"Ji","sequence":"additional","affiliation":[{"name":"Key Laboratory of Multimedia Trusted Perception and Efficient Computing, Ministry of Education of China, Xiamen University, Xiamen, Fujian, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00453"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01796"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01578-9"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00982"},{"volume-title":"Computer Graphics Forum","author":"Bermano Amit H","key":"e_1_3_2_1_5_1","unstructured":"Amit H Bermano, Rinon Gal, Yuval Alaluf, Ron Mokady, Yotam Nitzan, Omer Tov, Oren Patashnik, and Daniel Cohen-Or. 2022. State-of-the-Art in the Architecture, Methods and Applications of StyleGAN. In Computer Graphics Forum, Vol. 41. Wiley Online Library, 591-611."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1080\/03610927408827101"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_33"},{"key":"e_1_3_2_1_8_1","volume-title":"Sub-image anomaly detection with deep pyramid correspondences. arXiv preprint arXiv:2005.02357","author":"Cohen Niv","year":"2020","unstructured":"Niv Cohen and Yedid Hoshen. 2020. Sub-image anomaly detection with deep pyramid correspondences. arXiv preprint arXiv:2005.02357 (2020)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.1979.4766909"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00951"},{"key":"e_1_3_2_1_11_1","volume-title":"Diffusion models beat gans on image synthesis. Advances in neural information processing systems","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. Advances in neural information processing systems, Vol. 34 (2021), 8780-8794."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00724"},{"key":"e_1_3_2_1_13_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25132"},{"key":"e_1_3_2_1_15_1","volume-title":"An image is worth one word: Personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618","author":"Gal Rinon","year":"2022","unstructured":"Rinon Gal, Yuval Alaluf, Yuval Atzmon, Or Patashnik, Amit H Bermano, Gal Chechik, and Daniel Cohen-Or. 2022. An image is worth one word: Personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618 (2022)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_17_1","volume-title":"Denoising diffusion probabilistic models. Advances in neural information processing systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems, Vol. 33 (2020), 6840-6851."},{"key":"e_1_3_2_1_18_1","volume-title":"Anomalyxfusion: Multi-modal anomaly synthesis with diffusion. arXiv preprint arXiv:2404.19444","author":"Hu Jie","year":"2024","unstructured":"Jie Hu, Yawen Huang, Yilin Lu, Guoyang Xie, Guannan Jiang, Yefeng Zheng, and Zhichao Lu. 2024a. Anomalyxfusion: Multi-modal anomaly synthesis with diffusion. arXiv preprint arXiv:2404.19444 (2024)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i8.28696"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01878"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Ying Jin Jinlong Peng Qingdong He Teng Hu Hao Chen Jiafu Wu Wenbing Zhu Mingmin Chi Jun Liu Yabiao Wang et al. 2024. DualAnoDiff: Dual-Interrelated Diffusion Model for Few-Shot Anomaly Image Generation. arXiv preprint arXiv:2408.13509 (2024).","DOI":"10.1109\/CVPR52734.2025.02832"},{"key":"e_1_3_2_1_22_1","volume-title":"Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114","author":"Kingma Diederik P","year":"2013","unstructured":"Diederik P Kingma. 2013. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)."},{"key":"e_1_3_2_1_23_1","volume-title":"Segment Anything. In Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 4015-4026","author":"Kirillov Alexander","year":"2023","unstructured":"Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alexander C. Berg, Wan-Yen Lo, Piotr Dollar, and Ross Girshick. 2023. Segment Anything. In Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). 4015-4026."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00954"},{"key":"e_1_3_2_1_25_1","volume-title":"Director3d: Real-world camera trajectory and 3d scene generation from text. Advances in neural information processing systems","author":"Li Xinyang","year":"2024","unstructured":"Xinyang Li, Zhangyu Lai, Linning Xu, Yansong Qu, Liujuan Cao, Shengchuan Zhang, Bo Dai, and Rongrong Ji. 2024. Director3d: Real-world camera trajectory and 3d scene generation from text. Advances in neural information processing systems, Vol. 37 (2024), 75125-75151."},{"key":"e_1_3_2_1_26_1","volume-title":"SynergyAmodal: Deocclude Anything with Text Control. arXiv preprint arXiv:2504.19506","author":"Li Xinyang","year":"2025","unstructured":"Xinyang Li, Chengjie Yi, Jiawei Lai, Mingbao Lin, Yansong Qu, Shengchuan Zhang, and Liujuan Cao. 2025. SynergyAmodal: Deocclude Anything with Text Control. arXiv preprint arXiv:2504.19506 (2025)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01954"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01481"},{"key":"e_1_3_2_1_29_1","first-page":"1611","article-title":"Defect image sample generation with GAN for improving defect recognition","volume":"17","author":"Niu Shuanlong","year":"2020","unstructured":"Shuanlong Niu, Bin Li, Xinggang Wang, and Hui Lin. 2020. Defect image sample generation with GAN for improving defect recognition. IEEE Transactions on Automation Science and Engineering, Vol. 17, 3 (2020), 1611-1622.","journal-title":"IEEE Transactions on Automation Science and Engineering"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2002.1017623"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01060"},{"key":"e_1_3_2_1_32_1","volume-title":"Explainable deep few-shot anomaly detection with deviation networks. arXiv preprint arXiv:2108.00462","author":"Pang Guansong","year":"2021","unstructured":"Guansong Pang, Choubo Ding, Chunhua Shen, and Anton van den Hengel. 2021. Explainable deep few-shot anomaly detection with deviation networks. arXiv preprint arXiv:2108.00462 (2021)."},{"key":"e_1_3_2_1_33_1","volume-title":"Drag Your Gaussian: Effective Drag-Based Editing with Score Distillation for 3D Gaussian Splatting. arXiv preprint arXiv:2501.18672","author":"Qu Yansong","year":"2025","unstructured":"Yansong Qu, Dian Chen, Xinyang Li, Xiaofan Li, Shengchuan Zhang, Liujuan Cao, and Rongrong Ji. 2025. Drag Your Gaussian: Effective Drag-Based Editing with Score Distillation for 3D Gaussian Splatting. arXiv preprint arXiv:2501.18672 (2025)."},{"key":"e_1_3_2_1_34_1","volume-title":"International conference on machine learning. PMLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748-8763."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01321"},{"key":"e_1_3_2_1_36_1","volume-title":"Pivotal tuning for latent-based editing of real images. ACM Transactions on graphics (TOG)","author":"Roich Daniel","year":"2022","unstructured":"Daniel Roich, Ron Mokady, Amit H Bermano, and Daniel Cohen-Or. 2022. Pivotal tuning for latent-based editing of real images. ACM Transactions on graphics (TOG), Vol. 42, 1 (2022), 1-13."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_38_1","first-page":"234","volume-title":"Munich","author":"Ronneberger Olaf","year":"2015","unstructured":"Olaf Ronneberger, Philipp Fischer, and Thomas Brox. 2015a. U-net: Convolutional networks for biomedical image segmentation. In Medical image computing and computer-assisted intervention-MICCAI 2015: 18th international conference, Munich, Germany, October 5-9, 2015, proceedings, part III 18. Springer, 234-241."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01392"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00195"},{"key":"e_1_3_2_1_42_1","volume-title":"Burcu Karagol Ayan, Tim Salimans, et al.","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily L Denton, Kamyar Ghasemipour, Raphael Gontijo Lopes, Burcu Karagol Ayan, Tim Salimans, et al., 2022. Photorealistic text-to-image diffusion models with deep language understanding. Advances in neural information processing systems, Vol. 35 (2022), 36479-36494."},{"key":"e_1_3_2_1_43_1","volume-title":"Improved techniques for training gans. Advances in neural information processing systems","author":"Salimans Tim","year":"2016","unstructured":"Tim Salimans, Ian Goodfellow, Wojciech Zaremba, Vicki Cheung, Alec Radford, and Xi Chen. 2016. Improved techniques for training gans. Advances in neural information processing systems, Vol. 29 (2016)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1002\/9780470175637"},{"key":"e_1_3_2_1_45_1","volume-title":"Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502","author":"Song Jiaming","year":"2020","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2020. Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)."},{"key":"e_1_3_2_1_46_1","volume-title":"Generative modeling by estimating gradients of the data distribution. Advances in neural information processing systems","author":"Song Yang","year":"2019","unstructured":"Yang Song and Stefano Ermon. 2019. Generative modeling by estimating gradients of the data distribution. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459838"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58520-4_29"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01109"},{"key":"e_1_3_2_1_50_1","volume-title":"SegGPT: Segmenting Everything In Context. arXiv preprint arXiv:2304.03284","author":"Wang Xinlong","year":"2023","unstructured":"Xinlong Wang, Xiaosong Zhang, Yue Cao, Wen Wang, Chunhua Shen, and Tiejun Huang. 2023. SegGPT: Segmenting Everything In Context. arXiv preprint arXiv:2304.03284 (2023)."},{"key":"e_1_3_2_1_51_1","volume-title":"Gan inversion: A survey","author":"Xia Weihao","year":"2022","unstructured":"Weihao Xia, Yulun Zhang, Yujiu Yang, Jing-Hao Xue, Bolei Zhou, and Ming-Hsuan Yang. 2022. Gan inversion: A survey. IEEE transactions on pattern analysis and machine intelligence, Vol. 45, 3 (2022), 3121-3138."},{"key":"e_1_3_2_1_52_1","volume-title":"ACM Multimedia","author":"Yue Pengfei","year":"2024","unstructured":"Pengfei Yue, Jianghang Lin, Shengchuan Zhang, Jie Hu, Yilin Lu, Hongwei Niu, Haixin Ding, Yan Zhang, GUANNAN JIANG, Liujuan Cao, et al., [n.d.]. Adaptive Selection based Referring Image Segmentation. In ACM Multimedia 2024."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00822"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00822"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00257"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01562"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00624"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754920","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:18:50Z","timestamp":1765340330000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754920"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":57,"alternative-id":["10.1145\/3746027.3754920","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754920","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}