{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:05:15Z","timestamp":1750309515731,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the Key R&D Program of Shandong Porvince, China","award":["Grant no. 2023CXGC010801"],"award-info":[{"award-number":["Grant no. 2023CXGC010801"]}]},{"name":"the Oversea Innovation Team Project of the 20 Regulations for New Universities funding program of Jinan","award":["Grant no. 2021GXRC073"],"award-info":[{"award-number":["Grant no. 2021GXRC073"]}]},{"name":"the Shandong Province Excellent Young Scientists Fund Program (Overseas)","award":["Grant no. 2022HWYQ-048"],"award-info":[{"award-number":["Grant no. 2022HWYQ-048"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681185","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"3180-3188","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["InstantAS: Minimum Coverage Sampling for Arbitrary-Size Image Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9970-191X","authenticated-orcid":false,"given":"Changshuo","family":"Wang","sequence":"first","affiliation":[{"name":"Shandong University, Jinan, Shandong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3793-0847","authenticated-orcid":false,"given":"Mingzhe","family":"Yu","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, Shandong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3872-9062","authenticated-orcid":false,"given":"Lei","family":"Wu","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, Shandong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0273-5946","authenticated-orcid":false,"given":"Lei","family":"Meng","sequence":"additional","affiliation":[{"name":"Shandong University &amp; Shandong Research Institute of Industrial Technology, Jinan, Shandong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3828-9834","authenticated-orcid":false,"given":"Xiang","family":"Li","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, Shandong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7290-5659","authenticated-orcid":false,"given":"Xiangxu","family":"Meng","sequence":"additional","affiliation":[{"name":"Shandong University, Jinan, Shandong, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/0304--4149(82)90051--5"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01389"},{"key":"e_1_3_2_1_3_1","volume-title":"Multidiffusion: Fusing diffusion paths for controlled image generation.","author":"Bar-Tal Omer","year":"2023","unstructured":"Omer Bar-Tal, Lior Yariv, Yaron Lipman, and Tali Dekel. 2023. Multidiffusion: Fusing diffusion paths for controlled image generation. (2023)."},{"key":"e_1_3_2_1_4_1","unstructured":"DeepFloydLab. 2023. Deepfloyd if. (2023). https:\/\/github.com\/deep-floyd\/IF"},{"key":"e_1_3_2_1_5_1","volume-title":"Conference and Workshop on Neural Information Processing Systems","volume":"34","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. Conference and Workshop on Neural Information Processing Systems, Vol. 34 (2021)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"e_1_3_2_1_8_1","volume-title":"ElasticDiffusion: Training-free Arbitrary Size Image Generation. arXiv preprint arXiv:2311.18822","author":"Haji-Ali Moayed","year":"2023","unstructured":"Moayed Haji-Ali, Guha Balakrishnan, and Vicente Ordonez. 2023. ElasticDiffusion: Training-free Arbitrary Size Image Generation. arXiv preprint arXiv:2311.18822 (2023)."},{"key":"e_1_3_2_1_9_1","volume-title":"The Twelfth International Conference on Learning Representations.","author":"He Yingqing","year":"2024","unstructured":"Yingqing He, Shaoshu Yang, Haoxin Chen, Xiaodong Cun, Menghan Xia, Yong Zhang, Xintao Wang, Ran He, Qifeng Chen, and Ying Shan. 2024. Scalecrafter: Tuning-free higher-resolution visual generation with diffusion models. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_10_1","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, Vol. 33 (2020), 6840--6851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_11_1","unstructured":"Jonathan Ho and Tim Salimans. 2021. Classifier-Free Diffusion Guidance. (2021). https:\/\/openreview.net\/forum?id=qw8AKxfYbI"},{"key":"e_1_3_2_1_12_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"e_1_3_2_1_14_1","volume-title":"Mixture of diffusers for scene composition and high resolution image generation. arXiv preprint arXiv:2302.02412","author":"Jim\u00e9nez \u00c1lvaro Barbero","year":"2023","unstructured":"\u00c1lvaro Barbero Jim\u00e9nez. 2023. Mixture of diffusers for scene composition and high resolution image generation. arXiv preprint arXiv:2302.02412 (2023)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00582"},{"key":"e_1_3_2_1_18_1","volume-title":"Kingma and Max Welling","author":"Diederik","year":"2013","unstructured":"Diederik P. Kingma and Max Welling. 2013. Auto-Encoding Variational Bayes. CoRR, Vol. abs\/1312.6114 (2013). https:\/\/api.semanticscholar.org\/CorpusID:216078090"},{"key":"e_1_3_2_1_19_1","volume-title":"SyncDiffusion: Coherent Montage via Synchronized Joint Diffusions. arXiv preprint arXiv:2306.05178","author":"Lee Yuseung","year":"2023","unstructured":"Yuseung Lee, Kunho Kim, Hyunjin Kim, and Minhyuk Sung. 2023. SyncDiffusion: Coherent Montage via Synchronized Joint Diffusions. arXiv preprint arXiv:2306.05178 (2023)."},{"key":"e_1_3_2_1_20_1","volume-title":"T2i-adapter: Learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint arXiv:2302.08453","author":"Mou Chong","year":"2023","unstructured":"Chong Mou, Xintao Wang, Liangbin Xie, Jian Zhang, Zhongang Qi, Ying Shan, and Xiaohu Qie. 2023. T2i-adapter: Learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint arXiv:2302.08453 (2023)."},{"key":"e_1_3_2_1_21_1","volume-title":"International Conference on Machine Learning. PMLR, 8162--8171","author":"Nichol Alexander Quinn","year":"2021","unstructured":"Alexander Quinn Nichol and Prafulla Dhariwal. 2021. Improved denoising diffusion probabilistic models. In International Conference on Machine Learning. PMLR, 8162--8171."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01112"},{"key":"e_1_3_2_1_23_1","volume-title":"SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis. arXiv preprint arXiv:2307.01952","author":"Podell Dustin","year":"2023","unstructured":"Dustin Podell, Zion English, Kyle Lacey, Andreas Blattmann, Tim Dockhorn, Jonas M\u00fcller, Joe Penna, and Robin Rombach. 2023. SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis. arXiv preprint arXiv:2307.01952 (2023)."},{"key":"e_1_3_2_1_24_1","volume-title":"International Conference on Machine Learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_25_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_27_1","volume-title":"U-net: Convolutional networks for biomedical image segmentation","author":"Ronneberger Olaf","year":"2015","unstructured":"Olaf Ronneberger, Philipp Fischer, and Thomas Brox. 2015. U-net: Convolutional networks for biomedical image segmentation. In MICCAI. Springer, 234--241."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"e_1_3_2_1_29_1","first-page":"25278","article-title":"Laion-5b: An open large-scale dataset for training next generation image-text models","volume":"35","author":"Schuhmann Christoph","year":"2022","unstructured":"Christoph Schuhmann, Romain Beaumont, Richard Vencu, Cade Gordon, Ross Wightman, Mehdi Cherti, Theo Coombes, Aarush Katta, Clayton Mullis, Mitchell Wortsman, et al. 2022. Laion-5b: An open large-scale dataset for training next generation image-text models. Advances in Neural Information Processing Systems, Vol. 35 (2022), 25278--25294.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_30_1","volume-title":"International conference on machine learning. PMLR, 2256--2265","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In International conference on machine learning. PMLR, 2256--2265."},{"key":"e_1_3_2_1_31_1","unstructured":"Jiaming Song Chenlin Meng and Stefano Ermon. 2021. Denoising Diffusion Implicit Models. (2021). https:\/\/openreview.net\/forum?id=St1giarCHLP"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_33_1","volume-title":"Conference and Workshop on Neural Information Processing Systems","volume":"33","author":"Vahdat Arash","year":"2020","unstructured":"Arash Vahdat and Jan Kautz. 2020. NVAE: A deep hierarchical variational autoencoder. Conference and Workshop on Neural Information Processing Systems, Vol. 33 (2020)."},{"key":"e_1_3_2_1_34_1","volume-title":"Conference and Workshop on Neural Information Processing Systems","volume":"30","author":"Den Oord Aaron Van","year":"2017","unstructured":"Aaron Van Den Oord, Oriol Vinyals, et al. 2017. Neural discrete representation learning. Conference and Workshop on Neural Information Processing Systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_35_1","volume-title":"MagicScroll: Nontypical Aspect-Ratio Image Generation for Visual Storytelling via Multi-Layered Semantic-Aware Denoising. arXiv preprint arXiv:2312.10899","author":"Wang Bingyuan","year":"2023","unstructured":"Bingyuan Wang, Hengyu Meng, Zeyu Cai, Lanjiong Li, Yue Ma, Qifeng Chen, and Zeyu Wang. 2023. MagicScroll: Nontypical Aspect-Ratio Image Generation for Visual Storytelling via Multi-Layered Semantic-Aware Denoising. arXiv preprint arXiv:2312.10899 (2023)."},{"key":"e_1_3_2_1_36_1","volume-title":"Kelvin CK Chan, and Chen Change Loy","author":"Wang Jianyi","year":"2023","unstructured":"Jianyi Wang, Zongsheng Yue, Shangchen Zhou, Kelvin CK Chan, and Chen Change Loy. 2023. Exploiting diffusion prior for real-world image super-resolution. arXiv preprint arXiv:2305.07015 (2023)."},{"key":"e_1_3_2_1_37_1","volume-title":"Pretraining is all you need for image-to-image translation. arXiv preprint arXiv:2205.12952","author":"Wang Tengfei","year":"2022","unstructured":"Tengfei Wang, Ting Zhang, Bo Zhang, Hao Ouyang, Dong Chen, Qifeng Chen, and Fang Wen. 2022. Pretraining is all you need for image-to-image translation. arXiv preprint arXiv:2205.12952 (2022)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00917"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01763"},{"key":"e_1_3_2_1_40_1","volume-title":"Adding conditional control to text-to-image diffusion models. arXiv preprint arXiv:2302.05543","author":"Zhang Lvmin","year":"2023","unstructured":"Lvmin Zhang and Maneesh Agrawala. 2023. Adding conditional control to text-to-image diffusion models. arXiv preprint arXiv:2302.05543 (2023)."},{"key":"e_1_3_2_1_41_1","volume-title":"Any-size-diffusion: Toward efficient text-driven synthesis for any-size hd images. arXiv preprint arXiv:2308.16582","author":"Zheng Qingping","year":"2023","unstructured":"Qingping Zheng, Yuanfan Guo, Jiankang Deng, Jianhua Han, Ying Li, Songcen Xu, and Hang Xu. 2023. Any-size-diffusion: Toward efficient text-driven synthesis for any-size hd images. arXiv preprint arXiv:2308.16582 (2023)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681185","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681185","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:02Z","timestamp":1750295882000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681185"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":42,"alternative-id":["10.1145\/3664647.3681185","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681185","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}