{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,11]],"date-time":"2026-06-11T22:27:12Z","timestamp":1781216832464,"version":"3.54.1"},"reference-count":52,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["623B2094"],"award-info":[{"award-number":["623B2094"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.01736","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"18682-18692","source":"Crossref","is-referenced-by-count":4,"title":["Less-to-More Generalization: Unlocking More Controllability by In-Context Generation"],"prefix":"10.1109","author":[{"given":"Shaojin","family":"Wu","sequence":"first","affiliation":[{"name":"ByteDance,Intelligent Creation Team,China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mengqi","family":"Huang","sequence":"additional","affiliation":[{"name":"ByteDance,Intelligent Creation Team,China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wenxu","family":"Wu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yufeng","family":"Cheng","sequence":"additional","affiliation":[{"name":"ByteDance,Intelligent Creation Team,China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fei","family":"Ding","sequence":"additional","affiliation":[{"name":"ByteDance,Intelligent Creation Team,China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Qian","family":"He","sequence":"additional","affiliation":[{"name":"ByteDance,Intelligent Creation Team,China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Training a Helpful and Harmless Assistant With Reinforcement Learning From Human Feedback","author":"Bai","year":"2022","journal-title":"arXiv preprint arXiv:2204.05862"},{"key":"ref2","article-title":"Imagen 3","author":"Baldridge","year":"2024","journal-title":"arXiv preprint arXiv:2408.07009"},{"key":"ref3","article-title":"Weak-to-Strong Generalization: Eliciting Strong Capabilities With Weak Supervision","author":"Burns","year":"2023","journal-title":"arXiv preprint arXiv:2312.09390"},{"key":"ref4","article-title":"Diffusion Self-Distillation for Zero-Shot Customized Image Generation","author":"Cai","year":"2024","journal-title":"arXiv preprint arXiv:2411.18616"},{"key":"ref5","article-title":"Re-Imagen: Retrieval-Augmented Text-to-Image Generator","author":"Chen","year":"2022","journal-title":"arXiv preprint arXiv:2209.14491"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00868"},{"key":"ref7","first-page":"19822","article-title":"Cogview: Mastering Text-to-Image Generation Via Transformers","volume":"34","author":"Ding","year":"2021","journal-title":"NIPS"},{"key":"ref8","article-title":"Scaling Rectified Flow Transformers for High-Resolution Image Synthesis","volume-title":"ICML","author":"Esser","year":"2024"},{"key":"ref9","article-title":"An Image Is Worth One Word: Personalizing Text-to-Image Generation Using Textual Inversion","author":"Gal","year":"2022","journal-title":"arXiv preprint arXiv:2208.01618"},{"key":"ref10","author":"Glaese","year":"2022","journal-title":"Improving Alignment of Dialogue Agents Via Targeted Human Judgements"},{"key":"ref11","article-title":"Pulid: Pure and Lightning ID Customization Via Contrastive Alignment","volume-title":"NIPS","author":"Guo","year":"2024"},{"key":"ref12","article-title":"Vico: Plug-and-Play Visual Condition for Personalized Text-to-Image Generation","author":"Hao","year":"2023","journal-title":"arXiv preprint arXiv:2306.00971"},{"key":"ref13","first-page":"6840","article-title":"Denoising Diffusion Probabilistic Models","volume":"33","author":"Ho","year":"2020","journal-title":"NIPS"},{"key":"ref14","article-title":"Lora: Low-Rank Adaptation of Large Language Models","author":"Hu","year":"2021","journal-title":"arXiv preprint arXiv:2106.09685"},{"key":"ref15","article-title":"In-Context Lora for Diffusion Transformers","author":"Huang","year":"2024","journal-title":"arXiv preprint arXiv:2410.23775"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00714"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i4.32386"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00848"},{"key":"ref20","volume-title":"Flux: Official Inference Repository for Flux. 1 Models","year":"2024"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.52202\/075280-1312"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657469"},{"key":"ref23","article-title":"Realcustom++: Representing Images as Real-Word for Real-Time Customization","author":"Mao","year":"2024","journal-title":"arXiv preprint arXiv:2408.09744"},{"key":"ref24","article-title":"Glide: Towards Photorealistic Image Generation and Editing With Text-Guided Diffusion Models","author":"Nichol","year":"2021","journal-title":"arXiv preprint arXiv:2112.10741"},{"key":"ref25","author":"Oquab","year":"2023","journal-title":"Dinov2: Learning Robust Visual Features Without Supervision"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.52202\/068431-2011"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"ref28","article-title":"SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis","volume-title":"ICLR","author":"Podell","year":"2024"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-91907-7_15"},{"key":"ref30","first-page":"8748","article-title":"Learning Transferable Visual Models From Natural Language Supervision","volume-title":"ICML","author":"Radford","year":"2021"},{"key":"ref31","first-page":"88218831","article-title":"Zero-Shot Text-to-Image Generation","volume-title":"ICML","author":"Ramesh","year":"2021"},{"issue":"2","key":"ref32","first-page":"3","article-title":"Hierarchical Text-Conditional Image Generation With Clip Latents","volume":"1","author":"Ramesh","year":"2022","journal-title":"arXiv preprint arXiv:2204.06125"},{"key":"ref33","first-page":"1060","article-title":"Generative Adversarial Text to Image Synthesis","volume-title":"ICML","author":"Reed","year":"2016"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"ref37","first-page":"36479","article-title":"Photorealistic Text-to-Image Diffusion Models With Deep Language Understanding","volume":"35","author":"Saharia","year":"2022","journal-title":"NIPS"},{"key":"ref38","article-title":"Self-Critiquing Models for Assisting Human Evaluators","author":"Saunders","year":"2022","journal-title":"arXiv preprint arXiv:2206.05802"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00852"},{"key":"ref40","first-page":"22562265","article-title":"Deep Unsupervised Learning Using Nonequilibrium Thermodynamics","volume-title":"ICML","author":"Sohl-Dickstein","year":"2015"},{"key":"ref41","article-title":"Ominicontrol: Minimal and Universal Control for Diffusion Transformer","volume":"3","author":"Tan","year":"2024","journal-title":"arXiv preprint arXiv:2411.15098"},{"key":"ref42","volume-title":"x-flux","year":"2025"},{"key":"ref43","article-title":"Instantid: Zero-Shot Identity-Preserving Generation in Seconds","author":"Wang","year":"2024","journal-title":"arXiv preprint arXiv:2401.07519"},{"key":"ref44","article-title":"MS-Diffusion: Multi-Subject Zero-Shot Image Personalization With Layout Guidance","volume-title":"ICLR","author":"Wang","year":"2025"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.52202\/068431-1800"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01461"},{"key":"ref47","article-title":"Vmix: Improving Text-to-Image Diffusion Model With Cross-Attention Mixing Control","author":"Wu","year":"2024","journal-title":"arXiv preprint arXiv:2412.20800"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52734.2025.01241"},{"key":"ref49","article-title":"Ipadapter: Text Compatible Image Prompt Adapter for Text-to-Image Diffusion Models","author":"Ye","year":"2023","journal-title":"arXiv preprint arXiv:2308.06721"},{"issue":"3","key":"ref50","first-page":"5","article-title":"Scaling Autoregressive Models for Content-Rich Text-to-Image Generation","volume":"2","author":"Yu","year":"2022","journal-title":"arXiv preprint arXiv:2206.10789"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00883"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00771"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11444897.pdf?arnumber=11444897","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T05:19:36Z","timestamp":1777612776000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11444897\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":52,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.01736","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}