{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T06:09:36Z","timestamp":1766124576176,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1145\/3769534.3769595","type":"proceedings-article","created":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T06:05:38Z","timestamp":1766124338000},"page":"1-5","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Unspoken Details: Inferring Hidden Causality and Retrieving Domain-Specific Knowledge for Image Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-1344-8162","authenticated-orcid":false,"given":"Wen","family":"You","sequence":"first","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8091-3760","authenticated-orcid":false,"given":"Zhijun","family":"Ma","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1026-3317","authenticated-orcid":false,"given":"Zeteng","family":"Lin","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3473-5155","authenticated-orcid":false,"given":"Troy TianYu","family":"Lin","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,12,18]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Wenhu Chen Hexiang Hu Chitwan Saharia and William\u00a0W Cohen. 2022. Re-imagen: Retrieval-augmented text-to-image generator. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2209.14491 (2022)."},{"key":"e_1_3_3_1_3_2","first-page":"6840","volume-title":"Advances in Neural Information Processing Systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. In Advances in Neural Information Processing Systems , H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.F. Balcan, and H.\u00a0Lin (Eds.), Vol.\u00a033. Curran Associates, Inc., 6840\u20136851. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf"},{"key":"e_1_3_3_1_4_2","unstructured":"Kaiyi Huang Kaiyue Sun Enze Xie Zhenguo Li and Xihui Liu. 2023. T2i-compbench: A comprehensive benchmark for open-world compositional text-to-image generation. Advances in Neural Information Processing Systems 36 (2023) 78723\u201378747."},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Jeff Johnson Matthijs Douze and Herv\u00e9 J\u00e9gou. 2019. Billion-scale similarity search with GPUs. IEEE Transactions on Big Data 7 3 (2019) 535\u2013547.","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","unstructured":"Fr\u00e9d\u00e9ric Kaplan. 2015. A Map for Big Data Research in Digital Humanities. Frontiers in Digital Humanities Volume 2 - 2015 (2015). 10.3389\/fdigh.2015.00001","DOI":"10.3389\/fdigh.2015.00001"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01317"},{"key":"e_1_3_3_1_8_2","first-page":"9459","volume-title":"Advances in Neural Information Processing Systems","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela. 2020. Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. In Advances in Neural Information Processing Systems , H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.F. Balcan, and H.\u00a0Lin (Eds.), Vol.\u00a033. Curran Associates, Inc., 9459\u20139474. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/6b493230205f780e1bc26945df7481e5-Paper.pdf"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","unstructured":"Xiaochuan Li Baoyu Fan Runze Zhang Liang Jin Di Wang Zhenhua Guo Yaqian Zhao and Rengang Li. 2024. Image Content Generation with Causal Reasoning. Proceedings of the AAAI Conference on Artificial Intelligence 38 12 (Mar. 2024) 13646\u201313654. 10.1609\/aaai.v38i12.29269","DOI":"10.1609\/aaai.v38i12.29269"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02156"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Troy\u00a0TianYu Lin James She Yu-Ao Wang and Kang Zhang. 2025. Future ink: The collision of AI and Chinese calligraphy. ACM Journal on Computing and Cultural Heritage 18 1 (2025) 1\u201317.","DOI":"10.1145\/3700882"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Troy\u00a0TianYu Lin Boyan Zheng Haichuan Lin Wen You Kang Zhang and Chen Liang. 2025. Floating Strokes: A Spatial Interpretation and Modeling Method of Chinese Calligraphy. Proceedings of the ACM on Computer Graphics and Interactive Techniques 8 3 (2025) 1\u201312.","DOI":"10.1145\/3736782"},{"key":"e_1_3_3_1_13_2","unstructured":"Aman Madaan Niket Tandon Prakhar Gupta Skyler Hallinan Luyu Gao Sarah Wiegreffe Uri Alon Nouha Dziri Shrimai Prabhumoye Yiming Yang et\u00a0al. 2023. Self-refine: Iterative refinement with self-feedback. Advances in Neural Information Processing Systems 36 (2023) 46534\u201346594."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00649"},{"key":"e_1_3_3_1_15_2","unstructured":"Dustin Podell Zion English Kyle Lacey Andreas Blattmann Tim Dockhorn Jonas M\u00fcller Joe Penna and Robin Rombach. 2023. Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.01952 (2023)."},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_3_1_17_2","unstructured":"Chitwan Saharia William Chan Saurabh Saxena Lala Li Jay Whang Emily\u00a0L Denton Kamyar Ghasemipour Raphael Gontijo\u00a0Lopes Burcu Karagol\u00a0Ayan Tim Salimans et\u00a0al. 2022. Photorealistic text-to-image diffusion models with deep language understanding. Advances in neural information processing systems 35 (2022) 36479\u201336494."},{"key":"e_1_3_3_1_18_2","unstructured":"Rotem Shalev-Arkushin Rinon Gal Amit\u00a0H Bermano and Ohad Fried. 2025. ImageRAG: Dynamic image retrieval for reference-guided image generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.09411 (2025)."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"Bingyuan Wang Qifeng Chen and Zeyu Wang. 2025. Diffusion-based visual art creation: A survey and new perspectives. Comput. Surveys 57 10 (2025) 1\u201337.","DOI":"10.1145\/3728459"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/VR59515.2025.00067"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3615522.3615557"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"crossref","unstructured":"Yu-Ao Wang James She Troy\u00a0TianYu Lin and Kang Zhang. 2025. AI Visual Art History: An Art Movement with Expanded Artistic Horizon. ACM Transactions on Multimedia Computing Communications and Applications 21 7 (2025) 1\u201316.","DOI":"10.1145\/3726868"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"}],"event":{"name":"VINCI 2025: Proceedings of the 18th International Symposium on Visual Information Communication and Interaction","location":"Linz Austria","acronym":"VINCI 2025"},"container-title":["Proceedings of the 18th International Symposium on Visual Information Communication and Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3769534.3769595","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T06:06:43Z","timestamp":1766124403000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3769534.3769595"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12]]},"references-count":22,"alternative-id":["10.1145\/3769534.3769595","10.1145\/3769534"],"URL":"https:\/\/doi.org\/10.1145\/3769534.3769595","relation":{},"subject":[],"published":{"date-parts":[[2025,12]]},"assertion":[{"value":"2025-12-18","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}