{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T07:50:10Z","timestamp":1773820210340,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T00:00:00Z","timestamp":1752969600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,20]]},"DOI":"10.1145\/3690624.3709404","type":"proceedings-article","created":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T18:42:22Z","timestamp":1743792142000},"page":"2313-2324","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["YaART: Yet Another ART Rendering Technology"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9381-0541","authenticated-orcid":false,"given":"Sergey","family":"Kastryulin","sequence":"first","affiliation":[{"name":"Yandex, Moscow, Russian Federation &amp; Skoltech, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8863-0451","authenticated-orcid":false,"given":"Artem","family":"Konev","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5114-5039","authenticated-orcid":false,"given":"Alexander","family":"Shishenya","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2515-9478","authenticated-orcid":false,"given":"Eugene","family":"Lyapustin","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5968-9850","authenticated-orcid":false,"given":"Artem","family":"Khurshudov","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3592-8271","authenticated-orcid":false,"given":"Alexander","family":"Tselousov","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9202-4025","authenticated-orcid":false,"given":"Nikita","family":"Vinokurov","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2420-9620","authenticated-orcid":false,"given":"Denis","family":"Kuznedelev","sequence":"additional","affiliation":[{"name":"Skoltech, Moscow, Russian Federation &amp; Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2435-2978","authenticated-orcid":false,"given":"Alexander","family":"Markovich","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-4024-1654","authenticated-orcid":false,"given":"Grigoriy","family":"Livshits","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0162-5616","authenticated-orcid":false,"given":"Alexey","family":"Kirillov","sequence":"additional","affiliation":[{"name":"Moscow State University, Moscow, Russian Federation &amp; Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-9465-7966","authenticated-orcid":false,"given":"Anastasiia","family":"Tabisheva","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9591-359X","authenticated-orcid":false,"given":"Liubov","family":"Chubarova","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8746-7643","authenticated-orcid":false,"given":"Marina","family":"Kaminskaia","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8367-0066","authenticated-orcid":false,"given":"Alexander","family":"Ustyuzhanin","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0676-5075","authenticated-orcid":false,"given":"Artemii","family":"Shvetsov","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8217-7815","authenticated-orcid":false,"given":"Daniil","family":"Shlenskii","sequence":"additional","affiliation":[{"name":"Skoltech, Moscow, Russian Federation &amp; Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5750-8001","authenticated-orcid":false,"given":"Valerii","family":"Startsev","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1989-668X","authenticated-orcid":false,"given":"Dmitrii","family":"Kornilov","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0397-6070","authenticated-orcid":false,"given":"Mikhail","family":"Romanov","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7660-3666","authenticated-orcid":false,"given":"Dmitry","family":"Baranchuk","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1830-8252","authenticated-orcid":false,"given":"Artem","family":"Babenko","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3086-8858","authenticated-orcid":false,"given":"Sergei","family":"Ovcharenko","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6694-5398","authenticated-orcid":false,"given":"Valentin","family":"Khrulkov","sequence":"additional","affiliation":[{"name":"Yandex, Moscow, Russian Federation"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Dimitrov","author":"Arkhipkin V.","year":"2023","unstructured":"Arkhipkin, V., Filatov, A., Vasilev, V., Maltseva, A., Azizov, S., Pavlov, I., Agafonova, J., Kuznetsov, A., Dimitrov, D.: Kandinsky 3.0 technical report. arXiv preprint arXiv:2312.03511 (2023)"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02171"},{"key":"e_1_3_2_2_3_1","unstructured":"Betker J. Goh G. Jing L. Brooks T. Wang J. Li L. Ouyang L. Zhuang J. Lee J. Guo Y. et al.: Improving image generation with better captions. Computer Science. https:\/\/cdn. openai. com\/papers\/dall-e-3. pdf 2(3) 8 (2023)"},{"key":"e_1_3_2_2_4_1","volume-title":"Levine","author":"Black K.","year":"2023","unstructured":"Black, K., Janner, M., Du, Y., Kostrikov, I., Levine, S.: Training diffusion models with reinforcement learning. arXiv preprint arXiv:2305.13301 (2023)"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"crossref","unstructured":"Borji A.: Pros and cons of gan evaluation measures. Computer vision and image understanding 179 41--65 (2019)","DOI":"10.1016\/j.cviu.2018.10.009"},{"key":"e_1_3_2_2_6_1","volume-title":"Krueger","author":"Caballero E.","year":"2022","unstructured":"Caballero, E., Gupta, K., Rish, I., Krueger, D.: Broken neural scaling laws. arXiv preprint arXiv:2210.14891 (2022)"},{"key":"e_1_3_2_2_7_1","unstructured":"Chen J. Yu J. Ge C. Yao L. Xie E. Wu Y. Wang Z. Kwok J. Luo P. Lu H. et al.: Pixart-alpha: Fast training of diffusion transformer for photorealistic text-to-image synthesis. arXiv preprint arXiv:2310.00426 (2023)"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00276"},{"key":"e_1_3_2_2_9_1","unstructured":"Chung H.W. Hou L. Longpre S. Zoph B. Tay Y. Fedus W. Li Y. Wang X. Dehghani M. Brahma S. et al.: Scaling instruction-finetuned language models. arXiv preprint arXiv:2210.11416 (2022)"},{"key":"e_1_3_2_2_10_1","volume-title":"Fleet","author":"Clark K.","year":"2023","unstructured":"Clark, K., Vicol, P., Swersky, K., Fleet, D.J.: Directly fine-tuning diffusion models on differentiable rewards. arXiv preprint arXiv:2309.17400 (2023)"},{"key":"e_1_3_2_2_11_1","unstructured":"Dai X. Hou J. Ma C.Y. Tsai S. Wang J. Wang R. Zhang P. Vandenhende S. Wang X. Dubey A. et al.: Emu: Enhancing image generation models using photogenic needles in a haystack. arXiv preprint arXiv:2309.15807 (2023)"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_2_13_1","volume-title":"Toutanova","author":"Devlin J.","year":"1810","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"e_1_3_2_2_14_1","volume-title":"Nichol","author":"Dhariwal P.","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat gans on image synthesis. Advances in neural information processing systems 34, 8780--8794 (2021)"},{"key":"e_1_3_2_2_15_1","volume-title":"Lee","author":"Fan Y.","year":"2024","unstructured":"Fan, Y.,Watkins, O., Du, Y., Liu, H., Ryu, M., Boutilier, C., Abbeel, P., Ghavamzadeh, M., Lee, K., Lee, K.: Reinforcement learning for fine-tuning text-to-image diffusion models. Advances in Neural Information Processing Systems 36 (2024)"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/132"},{"key":"e_1_3_2_2_17_1","volume-title":"Choi","author":"Hessel J.","year":"2021","unstructured":"Hessel, J., Holtzman, A., Forbes, M., Bras, R.L., Choi, Y.: Clipscore: A referencefree evaluation metric for image captioning. arXiv preprint arXiv:2104.08718 (2021)."},{"key":"e_1_3_2_2_18_1","volume-title":"Hochreiter","author":"Heusel M.","year":"2017","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems 30 (2017)"},{"key":"e_1_3_2_2_19_1","first-page":"6840","volume-title":"Abbeel","author":"Ho J.","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M., Lin, H. (eds.) Advances in Neural Information Processing Systems. vol. 33, pp. 6840--6851. Curran Associates, Inc. (2020), https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf"},{"key":"e_1_3_2_2_20_1","volume-title":"Training compute-optimal large language models. arXiv preprint arXiv:2203.15556","author":"Hoffmann J.","year":"2022","unstructured":"Hoffmann, J., Borgeaud, S., Mensch, A., Buchatskaya, E., Cai, T., Rutherford, E., Casas, D.d.L., Hendricks, L.A.,Welbl, J., Clark, A., et al.: Training compute-optimal large language models. arXiv preprint arXiv:2203.15556 (2022)."},{"key":"e_1_3_2_2_21_1","volume-title":"Chen","author":"Hu E.J.","year":"2021","unstructured":"Hu, E.J., Shen, Y., Wallis, P., Allen-Zhu, Z., Li, Y., Wang, S., Wang, L., Chen, W.: Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.5143773"},{"key":"e_1_3_2_2_23_1","volume-title":"Khrulkov","author":"Kastryulin S.","year":"2024","unstructured":"Kastryulin, S., Konev, A., Shishenya, A., Lyapustin, E., Khurshudov, A., Tselousov, A., Vinokurov, N., Kuznedelev, D., Markovich, A., Livshits, G., Kirillov, A., Tabisheva, A., Chubarova, L., Kaminskaia, M., Ustyuzhanin, A., Shvetsov, A., Shlenskii, D., Startsev, V., Kornilov, D., Romanov, M., Babenko, A., Ovcharenko, S., Khrulkov, V.: YaART: Yet Another ART Rendering Technology (2024), https:\/\/arxiv.org\/abs\/2404.05666"},{"key":"e_1_3_2_2_24_1","volume-title":"Ho","author":"Kingma D.","year":"2021","unstructured":"Kingma, D., Salimans, T., Poole, B., Ho, J.: Variational diffusion models. Advances in neural information processing systems 34, 21696--21707 (2021)"},{"key":"e_1_3_2_2_25_1","volume-title":"Levy","author":"Kirstain Y.","year":"2024","unstructured":"Kirstain, Y., Polyak, A., Singer, U., Matiana, S., Penna, J., Levy, O.: Pick-a-pic: An open dataset of user preferences for text-to-image generation. Advances in Neural Information Processing Systems 36 (2024)"},{"key":"e_1_3_2_2_26_1","volume-title":"ICML deep learning workshop.","author":"Koch G.","year":"2015","unstructured":"Koch, G., Zemel, R., Salakhutdinov, R., et al.: Siamese neural networks for one-shot image recognition. In: ICML deep learning workshop. vol. 2. Lille (2015)"},{"key":"e_1_3_2_2_27_1","volume-title":"Gu","author":"Lee K.","year":"2023","unstructured":"Lee, K., Liu, H., Ryu, M.,Watkins,O., Du, Y., Boutilier, C., Abbeel, P., Ghavamzadeh, M., Gu, S.S.: Aligning text-to-image models using human feedback. arXiv preprint arXiv:2302.12192 (2023)"},{"key":"e_1_3_2_2_28_1","volume-title":"Yang","author":"Lee S.H.","year":"2024","unstructured":"Lee, S.H., Li, Y., Ke, J., Yoo, I., Zhang, H., Yu, J.,Wang, Q., Deng, F., Entis, G., He, J., Li, G., Kim, S., Essa, I., Yang, F.: Parrot: Pareto-optimal multi-reward reinforcement learning framework for text-to-image generation (2024)"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00898"},{"key":"e_1_3_2_2_30_1","volume-title":"Milanfar","author":"Mei K.","year":"2024","unstructured":"Mei, K., Tu, Z., Delbracio, M., Talebi, H., Patel, V.M., Milanfar, P.: Bigger is not always better: Scaling properties of latent diffusion models. arXiv preprint arXiv:2404.01367 (2024)"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6247954"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"e_1_3_2_2_33_1","volume-title":"The Twelfth International Conference on Learning Representations (2023)","author":"Pernias P.","unstructured":"Pernias, P., Rampas, D., Richter, M.L., Pal, C., Aubreville, M.: W\u00fcrstchen: An efficient architecture for large-scale text-to-image diffusion models. In: The Twelfth International Conference on Learning Representations (2023)"},{"key":"e_1_3_2_2_34_1","volume-title":"Rombach","author":"Podell D.","year":"1952","unstructured":"Podell, D., English, Z., Lacey, K., Blattmann, A., Dockhorn, T., M\u00fcller, J., Penna, J., Rombach, R.: Sdxl: Improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)"},{"key":"e_1_3_2_2_35_1","unstructured":"Pressman J.D. Crowson K. Contributors S.C.: Simulacra aesthetic captions. Tech. Rep. Version 1.0 Stability AI (2022) url https:\/\/github.com\/JD-P\/simulacraaesthetic- captions"},{"key":"e_1_3_2_2_36_1","volume-title":"Gulin","author":"Prokhorenkova L.","year":"2018","unstructured":"Prokhorenkova, L., Gusev, G., Vorobev, A., Dorogush, A.V., Gulin, A.: Catboost: unbiased boosting with categorical features. Advances in neural information processing systems 31 (2018)"},{"key":"e_1_3_2_2_37_1","first-page":"8748","volume-title":"International conference on machine learning.","author":"Radford A.","year":"2021","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et al.: Learning transferable visual models from natural language supervision. In: International conference on machine learning. pp. 8748--8763. PMLR (2021)"},{"key":"e_1_3_2_2_38_1","volume-title":"Chen","author":"Ramesh A.","year":"2022","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical textconditional image generation with clip latents. arXiv preprint arXiv:2204.06125 1(2), 3 (2022)"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3406703"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_41_1","first-page":"234","volume-title":"T.: U-net: Convolutional networks for biomedical image segmentation. In: Medical Image Computing and Computer-Assisted Intervention--MICCAI 2015:  18th International Conference","author":"Ronneberger O.","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: Convolutional networks for biomedical image segmentation. In: Medical Image Computing and Computer-Assisted Intervention--MICCAI 2015: 18th International Conference, Munich, Germany, October 5--9, 2015, Proceedings, Part III 18. pp. 234--241. Springer (2015)"},{"key":"e_1_3_2_2_42_1","first-page":"36479","volume":"35","author":"Saharia C.","year":"2022","unstructured":"Saharia, C., Chan, W., Saxena, S., Li, L., Whang, J., Denton, E.L., Ghasemipour, K., Gontijo Lopes, R., Karagol Ayan, B., Salimans, T., et al.: Photorealistic text-toimage diffusion models with deep language understanding. Advances in Neural Information Processing Systems 35, 36479--36494 (2022).","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_43_1","volume-title":"Gelly","author":"Sajjadi M.S.","year":"2018","unstructured":"Sajjadi, M.S., Bachem, O., Lucic, M., Bousquet, O., Gelly, S.: Assessing generative models via precision and recall. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_2_2_44_1","volume-title":"Klimov","author":"Schulman J.","year":"2017","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00517"},{"key":"e_1_3_2_2_46_1","volume-title":"Polosukhin","author":"Vaswani A.","year":"2017","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, ?., Polosukhin, I.: Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_2_47_1","volume-title":"Naik","author":"Wallace B.","year":"2023","unstructured":"Wallace, B., Dang, M., Rafailov, R., Zhou, L., Lou, A., Purushwalkam, S., Ermon, S., Xiong, C., Joty, S., Naik, N.: Diffusion model alignment using direct preference optimization (2023)."},{"key":"e_1_3_2_2_48_1","volume-title":"Li","author":"Yang K.","year":"2023","unstructured":"Yang, K., Tao, J., Lyu, J., Ge, C., Chen, J., Li, Q., Shen, W., Zhu, X., Li, X.: Using human feedback to fine-tune diffusion models without any reward model. arXiv preprint arXiv:2311.13231 (2023)."},{"key":"e_1_3_2_2_49_1","volume-title":"Xiao","author":"Yu F.","unstructured":"Yu, F., Zhang, Y., Song, S., Seff, A., Xiao, J.: Lsun: Construction of a large-scale image dataset using deep learning with humans in the loop. arXiv preprint arXiv:1506.03365 (2015)."},{"key":"e_1_3_2_2_50_1","volume-title":"Scaling autoregressive models for content-rich text-to-image generation. arXiv preprint arXiv:2206.10789 2(3), 5","author":"Yu J.","year":"2022","unstructured":"Yu, J., Xu, Y., Koh, J.Y., Luong, T., Baid, G., Wang, Z., Vasudevan, V., Ku, A., Yang, Y., Ayan, B.K., et al.: Scaling autoregressive models for content-rich text-to-image generation. arXiv preprint arXiv:2206.10789 2(3), 5 (2022)."},{"key":"e_1_3_2_2_51_1","volume-title":"Anandkumar","author":"Zheng H.","year":"2023","unstructured":"Zheng, H., Nie,W., Vahdat, A., Anandkumar, A.: Fast training of diffusion models with masked transformers. arXiv preprint arXiv:2306.09305 (2023)."}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Toronto ON Canada","acronym":"KDD '25","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709404","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3690624.3709404","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T15:33:55Z","timestamp":1755358435000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709404"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,20]]},"references-count":51,"alternative-id":["10.1145\/3690624.3709404","10.1145\/3690624"],"URL":"https:\/\/doi.org\/10.1145\/3690624.3709404","relation":{},"subject":[],"published":{"date-parts":[[2025,7,20]]},"assertion":[{"value":"2025-07-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}