{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,11]],"date-time":"2026-01-11T01:55:56Z","timestamp":1768096556691,"version":"3.49.0"},"reference-count":80,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2025,2,12]],"date-time":"2025-02-12T00:00:00Z","timestamp":1739318400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,12]],"date-time":"2025-02-12T00:00:00Z","timestamp":1739318400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62441604"],"award-info":[{"award-number":["62441604"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62476093"],"award-info":[{"award-number":["62476093"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s11263-025-02353-2","type":"journal-article","created":{"date-parts":[[2025,2,12]],"date-time":"2025-02-12T15:28:24Z","timestamp":1739374104000},"page":"3891-3917","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Smaller But Better: Unifying Layout Generation with Smaller Large Language Models"],"prefix":"10.1007","volume":"133","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1857-5473","authenticated-orcid":false,"given":"Peirong","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Jiaxin","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Jiahuan","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Hongliang","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5456-0957","authenticated-orcid":false,"given":"Lianwen","family":"Jin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,2,12]]},"reference":[{"key":"2353_CR1","doi-asserted-by":"crossref","unstructured":"Ainslie, J., Lee-Thorp, J., de\u00a0Jong, M., Zemlyanskiy, Y., Lebron, F., & Sanghai, S. (2023). GQA: Training Generalized Multi-Query Transformer Models from Multi-Head Checkpoints. In: EMNLP. pp. 4895\u20134901","DOI":"10.18653\/v1\/2023.emnlp-main.298"},{"key":"2353_CR2","unstructured":"Anil, R., Dai, A.M., et\u00a0al. (2023) PaLM 2 Technical Report. arXiv preprint arXiv:2305.10403"},{"key":"2353_CR3","doi-asserted-by":"crossref","unstructured":"Arroyo, D.M., Postels, J., & Tombari, F. (2021). Variational Transformer Networks for Layout Generation. In: CVPR. pp. 13642\u201313652","DOI":"10.1109\/CVPR46437.2021.01343"},{"issue":"322","key":"2353_CR4","doi-asserted-by":"publisher","first-page":"542","DOI":"10.1080\/01621459.1968.11009273","volume":"63","author":"S Blumenthal","year":"1968","unstructured":"Blumenthal, S. (1968). Multinomial sampling with partially categorized data. Journal of the American Statistical Association, 63(322), 542\u2013551.","journal-title":"Journal of the American Statistical Association"},{"key":"2353_CR5","unstructured":"Brown, T., Mann, B., et\u00a0al. (2020). Language Models are Few-Shot Learners. In: NeurIPS. vol.\u00a033, pp. 1877\u20131901"},{"key":"2353_CR6","doi-asserted-by":"crossref","unstructured":"Chai, S., Zhuang, L., Yan, F. (2023). LayoutDM: Transformer-Based Diffusion Model for Layout Generation. In: CVPR. pp. 18349\u201318358","DOI":"10.1109\/CVPR52729.2023.01760"},{"issue":"240","key":"2353_CR7","first-page":"1","volume":"24","author":"A Chowdhery","year":"2023","unstructured":"Chowdhery, A., Narang, S., et al. (2023). PaLM: Scaling Language Modeling with Pathways. Journal of Machine Learning Research, 24(240), 1\u2013113.","journal-title":"Journal of Machine Learning Research"},{"key":"2353_CR8","unstructured":"Chung, H.W., Hou, L., et\u00a0al. (2022). Scaling Instruction-Finetuned Language Models. arXiv preprint arXiv:2210.11416"},{"key":"2353_CR9","doi-asserted-by":"crossref","unstructured":"Deka, B., Huang, Z., Franzen, C., Hibschman, J., Afergan, D., Li, Y., Nichols, J., & Kumar, R. (2017) Rico: A Mobile App Dataset for Building Data-Driven Design Applications. In: UIST. p. 845-854","DOI":"10.1145\/3126594.3126651"},{"key":"2353_CR10","unstructured":"Devlin, J., Chang, M.W., Lee, K., & Toutanova, K. (2019). BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In: NAACL. pp. 4171\u20134186"},{"key":"2353_CR11","unstructured":"Dubey, A., Jauhri, A., Pandey, A., Kadian, A., Al-Dahle, A., Letman, A., Mathur, A., Schelten, A., Yang, A., Fan, A., et\u00a0al. (2024). The LLaMA 3 Herd of Models. arXiv preprint arXiv:2407.21783"},{"key":"2353_CR12","doi-asserted-by":"crossref","unstructured":"Fan, A., Lewis, M., & Dauphin, Y. (2018). Hierarchical Neural Story Generation. In: ACL. pp. 889\u2013898","DOI":"10.18653\/v1\/P18-1082"},{"key":"2353_CR13","unstructured":"Feng, W., Zhu, W., Fu, T.j., Jampani, V., Akula, A., He, X., Basu, S., Wang, X.E., & Wang, W.Y. (2024) LayoutGPT: Compositional Visual Planning and Generation with Large Language Models. NeurIPS 36"},{"key":"2353_CR14","doi-asserted-by":"crossref","unstructured":"Fu, H., Cai, B., et\u00a0al. (2021). 3D-FRONT: 3D Furnished Rooms With Layouts and Semantics. In: ICCV. pp. 10933\u201310942","DOI":"10.1109\/ICCV48922.2021.01075"},{"key":"2353_CR15","unstructured":"Gunasekar, S., Zhang, Y., et\u00a0al. (2023). Textbooks Are All You Need. arXiv preprint arXiv:2306.11644"},{"key":"2353_CR16","doi-asserted-by":"crossref","unstructured":"Guo, S., Jin, Z., Sun, F., Li, J., Li, Z., Shi, Y., & Cao, N. (2021). Vinci: an intelligent graphic design system for generating advertising posters. In: CHI. pp. 1\u201317","DOI":"10.1145\/3411764.3445117"},{"key":"2353_CR17","doi-asserted-by":"crossref","unstructured":"Gupta, K., Lazarow, J., Achille, A., Davis, L.S., Mahadevan, V., & Shrivastava, A. (2021). LayoutTransformer: Layout Generation and Completion With Self-Attention. In: ICCV. pp. 1004\u20131014","DOI":"10.1109\/ICCV48922.2021.00104"},{"key":"2353_CR18","doi-asserted-by":"crossref","unstructured":"Haurilet, M., Roitberg, A., Martinez, M., & Stiefelhagen, R. (2019). WiSe - Slide Segmentation in the Wild. In: ICDAR","DOI":"10.1109\/ICDAR.2019.00062"},{"key":"2353_CR19","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., & Hochreiter, S.: GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium. In: NeurIPS. vol.\u00a030"},{"key":"2353_CR20","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., & Abbeel, P. (2020). Denoising Diffusion Probabilistic Models. In: NeurIPS., 33, 6840\u20136851.","journal-title":"Denoising Diffusion Probabilistic Models. In: NeurIPS."},{"key":"2353_CR21","unstructured":"Holtzman, A., Buys, J., Du, L., Forbes, M., & Choi, Y. (2020). The Curious Case of Neural Text Degeneration. In: ICLR"},{"key":"2353_CR22","doi-asserted-by":"crossref","unstructured":"Horita, D., Inoue, N., Kikuchi, K., Yamaguchi, K., & Aizawa, K. (2024) Retrieval-Augmented Layout Transformer for Content-Aware Layout Generation. In: CVPR. pp. 67\u201376","DOI":"10.1109\/CVPR52733.2024.00015"},{"key":"2353_CR23","unstructured":"Hu, E.J., yelong shen, Wallis, P., Allen-Zhu, Z., Li, Y., Wang, S., Wang, L., & Chen, W. (2022). LoRA: Low-Rank Adaptation of Large Language Models. In: ICLR"},{"key":"2353_CR24","doi-asserted-by":"crossref","unstructured":"Hui, M., Zhang, Z., Zhang, X., Xie, W., Wang, Y., & Lu, Y. (2023). Unifying Layout Generation With a Decoupled Diffusion Model. In: CVPR. pp. 1942\u20131951","DOI":"10.1109\/CVPR52729.2023.00193"},{"key":"2353_CR25","doi-asserted-by":"crossref","unstructured":"Inoue, N., Kikuchi, K., Simo-Serra, E., Otani, M., & Yamaguchi, K. (2023) LayoutDM: Discrete Diffusion Model for Controllable Layout Generation. In: CVPR. pp. 10167\u201310176","DOI":"10.1109\/CVPR52729.2023.00980"},{"key":"2353_CR26","doi-asserted-by":"crossref","unstructured":"Jiang, Z., Guo, J., et\u00a0al. (2023). LayoutFormer++: Conditional Graphic Layout Generation via Constraint Serialization and Decoding Space Restriction. In: CVPR. pp. 18403\u201318412","DOI":"10.1109\/CVPR52729.2023.01765"},{"key":"2353_CR27","doi-asserted-by":"publisher","first-page":"1096","DOI":"10.1609\/aaai.v36i1.19994","volume":"36","author":"Z Jiang","year":"2022","unstructured":"Jiang, Z., Sun, S., Zhu, J., Lou, J. G., & Zhang, D. (2022). Coarse-to-Fine Generative Modeling for Graphic Layouts. AAAI, 36, 1096\u20131103.","journal-title":"AAAI"},{"key":"2353_CR28","unstructured":"Jinze, B., Shuai, B., et\u00a0al. (2023). Qwen Technical Report. arXiv preprint arXiv:2309.16609"},{"key":"2353_CR29","doi-asserted-by":"crossref","unstructured":"Jyothi, A.A., Durand, T., He, J., Sigal, L., & Mori, G. (2019) LayoutVAE: Stochastic Scene Layout Generation From a Label Set. In: ICCV","DOI":"10.1109\/ICCV.2019.00999"},{"key":"2353_CR30","doi-asserted-by":"crossref","unstructured":"Kikuchi, K., Simo-Serra, E., Otani, M., & Yamaguchi, K. (2021). Constrained Graphic Layout Generation via Latent Optimization. In: ACM MM. pp. 88\u201496","DOI":"10.1145\/3474085.3475497"},{"key":"2353_CR31","doi-asserted-by":"crossref","unstructured":"Kong, X., Jiang, L., Chang, H., Zhang, H., Hao, Y., Gong, H., & Essa, I. (2022). BLT: Bidirectional Layout Transformer for Controllable Layout Generation. In: ECCV. pp. 474\u2013490","DOI":"10.1007\/978-3-031-19790-1_29"},{"key":"2353_CR32","doi-asserted-by":"crossref","unstructured":"Lee, H.Y., Jiang, L., Essa, I., Le, P.B., Gong, H., Yang, M.H., & Yang, W. (2020). Neural Design Network: Graphic Layout Generation with Constraints. In: ECCV. pp. 491\u2013506","DOI":"10.1007\/978-3-030-58580-8_29"},{"key":"2353_CR33","unstructured":"Levesque, H., Davis, E., & Morgenstern, L. (2012). The Winograd Schema Challenge. In: KR"},{"key":"2353_CR34","doi-asserted-by":"crossref","unstructured":"Levi, E., Brosh, E., Mykhailych, M., & Perez, M. (2023). DLT: Conditioned layout generation with Joint Discrete-Continuous Diffusion Layout Transformer. In: ICCV. pp. 2106\u20132115","DOI":"10.1109\/ICCV51070.2023.00201"},{"key":"2353_CR35","unstructured":"Li, J., Xu, T., Zhang, J., Hertzmann, A., & Yang, J. (2019). LayoutGAN: Generating Graphic Layouts with Wireframe Discriminator. In: ICLR"},{"issue":"10","key":"2353_CR36","doi-asserted-by":"publisher","first-page":"4039","DOI":"10.1109\/TVCG.2020.2999335","volume":"27","author":"J Li","year":"2021","unstructured":"Li, J., Yang, J., Zhang, J., Liu, C., Wang, C., & Xu, T. (2021). Attribute-Conditioned Layout GAN for Automatic Graphic Design. IEEE Transactions on Visualization and Computer Graphics, 27(10), 4039\u20134048.","journal-title":"IEEE Transactions on Visualization and Computer Graphics"},{"key":"2353_CR37","unstructured":"Li, Y., Bubeck, S., Eldan, R., Del\u00a0Giorno, A., Gunasekar, S., & Lee, Y.T. (2023). Textbooks Are All You Need II: Phi-1.5 Technical Report. arXiv preprint arXiv:2309.05463"},{"key":"2353_CR38","doi-asserted-by":"crossref","unstructured":"Lin, J., Guo, J., Sun, S., Xu, W., Liu, T., Lou, J.G., Zhang, D. (2023). A Parse-Then-Place Approach for Generating Graphic Layouts from Textual Descriptions. In: ICCV. pp. 23622\u201323631","DOI":"10.1109\/ICCV51070.2023.02159"},{"key":"2353_CR39","unstructured":"Lin, J., Guo, J., Sun, S., Yang, Z., Lou, J.G., & Zhang, D. (2024). LayoutPrompter: Awaken the Design Ability of Large Language Models. Advances in Neural Information Processing Systems (NeurIPS) 36"},{"key":"2353_CR40","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Maire, M., et\u00a0al. (2014). Microsoft COCO: Common Objects in Context. In: ECCV. pp. 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"2353_CR41","unstructured":"Loshchilov, I., Hutter, F. (2018). Decoupled Weight Decay Regularization. In: ICLR"},{"key":"2353_CR42","doi-asserted-by":"crossref","unstructured":"Monica\u00a0Haurilet, Z.A.H., Stiefelhagen, R. (2019) SPaSe - Multi-Label Page Segmentation for Presentation Slides. In: WACV","DOI":"10.1109\/WACV.2019.00082"},{"key":"2353_CR43","doi-asserted-by":"crossref","unstructured":"Nauata, N., Chang, K.H., Cheng, C.Y., Mori, G., & Furukawa, Y. (2020). House-GAN: Relational Generative Adversarial Networks for Graph-Constrained House Layout Generation. In: ECCV. pp. 162\u2013177. Springer","DOI":"10.1007\/978-3-030-58452-8_10"},{"key":"2353_CR44","unstructured":"Nijkamp, E., Pang, B., Hayashi, H., Tu, L., Wang, H., Zhou, Y., Savarese, S., & Xiong, C. (2023) CodeGen: An Open Large Language Model for Code with Multi-Turn Program Synthesis. In: ICLR"},{"issue":"8","key":"2353_CR45","doi-asserted-by":"publisher","first-page":"1200","DOI":"10.1109\/TVCG.2014.48","volume":"20","author":"P O\u2019Donovan","year":"2014","unstructured":"O\u2019Donovan, P., Agarwala, A., & Hertzmann, A. (2014). Learning layouts for single-pagegraphic designs. IEEE Transactions on Visualization and Computer Graphics, 20(8), 1200\u20131213.","journal-title":"IEEE Transactions on Visualization and Computer Graphics"},{"key":"2353_CR46","doi-asserted-by":"crossref","unstructured":"O\u2019Donovan, P., Agarwala, A., & Hertzmann, A. (2015). DesignScape: Design with Interactive Layout Suggestions. In: CHI. p. 1221-1224","DOI":"10.1145\/2702123.2702149"},{"key":"2353_CR47","unstructured":"OpenAI (2023). GPT-4 Technical Report. arXiv preprint arXiv:2303.08774"},{"key":"2353_CR48","unstructured":"Ouyang, L., Wu, J., et\u00a0al. (2022) Training Language Models to Follow Instructions with Human Feedback. In: NeurIPS. vol.\u00a035, pp. 27730\u201327744"},{"key":"2353_CR49","first-page":"8024","volume":"32","author":"A Paszke","year":"2019","unstructured":"Paszke, A., Gross, S., et al. (2019). PyTorch: An Imperative Style. High-Performance Deep Learning Library. In: NeurIPS., 32, 8024\u20138035.","journal-title":"High-Performance Deep Learning Library. In: NeurIPS."},{"key":"2353_CR50","doi-asserted-by":"crossref","unstructured":"Patil, A.G., Ben-Eliezer, O., Perel, O., & Averbuch-Elor, H. (2020). READ: Recursive Autoencoders for Document Layout Generation. In: CVPRW","DOI":"10.1109\/CVPRW50498.2020.00280"},{"key":"2353_CR51","unstructured":"Peng, B., Li, C., He, P., Galley, M., & Gao, J. (2023). Instruction Tuning with GPT-4. arXiv preprint arXiv:2304.03277"},{"issue":"2","key":"2353_CR52","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1109\/TVCG.2020.3030448","volume":"27","author":"C Qian","year":"2021","unstructured":"Qian, C., Sun, S., Cui, W., Lou, J. G., Zhang, H., & Zhang, D. (2021). Retrieve-Then-Adapt: Example-Based automatic generation for proportion-related infographics. IEEE Transactions on Visualization and Computer Graphics, 27(2), 443\u2013452.","journal-title":"IEEE Transactions on Visualization and Computer Graphics"},{"key":"2353_CR53","unstructured":"Radford, A., Narasimhan, K., et\u00a0al. (2018). Improving Language Understanding by Generative Pre-Training"},{"issue":"8","key":"2353_CR54","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., et al. (2019). Language Models are Unsupervised Multitask Learners. OpenAI blog, 1(8), 9.","journal-title":"OpenAI blog"},{"key":"2353_CR55","unstructured":"Raffel, C., Shazeer, N., Roberts, A., Lee, K., Narang, S., Matena, M., Zhou, Y., Li, W., & Liu, P.J. (2020). Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. Journal of Machine Learning Research 21(1)"},{"key":"2353_CR56","doi-asserted-by":"crossref","unstructured":"Rahman, S., Sermuga\u00a0Pandian, V.P., & Jarke, M. (2021). RUITE: Refining UI Layout Aesthetics Using Transformer Encoder. In: IUI. pp. 81\u201483","DOI":"10.1145\/3397482.3450716"},{"key":"2353_CR57","doi-asserted-by":"crossref","unstructured":"Rasley, J., Rajbhandari, S., Ruwase, O., & He, Y. (2020). DeepSpeed: System Optimizations Enable Training Deep Learning Models with Over 100 Billion Parameters. In: KDD. p. 3505-3506","DOI":"10.1145\/3394486.3406703"},{"key":"2353_CR58","unstructured":"Roemmele, M., Bejan, C.A., & Gordon, A.S. (2011). Choice of Plausible Alternatives: An Evaluation of Commonsense Causal Reasoning. In: AAAI Spring Symposium Series"},{"key":"2353_CR59","unstructured":"Roziere, B., Gehring, J., et\u00a0al. (2023). Code LLaMA: Open Foundation Models for Code. arXiv preprint arXiv:2308.12950"},{"key":"2353_CR60","doi-asserted-by":"crossref","unstructured":"Sennrich, R., Haddow, B., & Birch, A. (2016). Neural Machine Translation of Rare Words with Subword Units. In: ACL. vol. 1: Long Papers, pp. 1715\u20131725","DOI":"10.18653\/v1\/P16-1162"},{"key":"2353_CR61","doi-asserted-by":"crossref","unstructured":"Steinbiss, V., Tran, B.H., Ney, H. (1994). Improvements in Beam Search. In: ICSLP","DOI":"10.21437\/ICSLP.1994-538"},{"key":"2353_CR62","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.127063","volume":"568","author":"J Su","year":"2024","unstructured":"Su, J., Ahmed, M., Lu, Y., Pan, S., Bo, W., & Liu, Y. (2024). RoFormer: Enhanced transformer with Rotary Position Embedding. Neurocomputing, 568, 127063.","journal-title":"Neurocomputing"},{"key":"2353_CR63","unstructured":"Sutskever, I., Vinyals, O., & Le, Q.V. (2014). Sequence to Sequence Learning with Neural Networks. In: NeurIPS. vol.\u00a027"},{"key":"2353_CR64","unstructured":"Tang, Z., Wu, C., Li, J., & Duan, N. (2024). LayoutNUWA: Revealing the Hidden Layout Expertise of Large Language Models. In: ICLR"},{"key":"2353_CR65","unstructured":"Touvron, H., Lavril, T., et\u00a0al. (2023a). LLaMA: Open and Efficient Foundation Language Models. arXiv preprint arXiv:2302.13971"},{"key":"2353_CR66","unstructured":"Touvron, H., Martin, L., et\u00a0al. (2023b). LLaMA 2: Open Foundation and Fine-Tuned Chat Models. arXiv preprint arXiv:2307.09288"},{"key":"2353_CR67","unstructured":"UnfilteredAI: NSFW-3B: A Dark, Unrestricted AI Model (2024), https:\/\/huggingface.co\/UnfilteredAI"},{"key":"2353_CR68","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L.u., Polosukhin, I. (2017). Attention is All you Need. In: NeurIPS. vol.\u00a030, pp. 6000\u20136010"},{"key":"2353_CR69","doi-asserted-by":"crossref","unstructured":"Wolf, T., Debut, L., et\u00a0al. (2020). Transformers: State-of-the-Art Natural Language Processing. In: EMNLP. pp. 38\u201345. Online","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"2353_CR70","unstructured":"Xie, J., Ye, K., Li, Y., Li, Y., Lin, K.Q., Zheng, Y., Shen, L., & Shou, M.Z. (2023). Learning Visual Prior via Generative Pre-Training. In: NeurIPS. vol.\u00a036, pp. 70562\u201370580"},{"key":"2353_CR71","unstructured":"Xue, H., Salim, F.D. (2023). PromptCast: A New Prompt-Based Learning Paradigm for Time Series Forecasting. IEEE Transactions on Knowledge and Data Engineering pp. 1\u201314"},{"key":"2353_CR72","unstructured":"Yang, A., Yang, B., et\u00a0al.: Qwen2 Technical Report. arXiv preprint arXiv:2407.10671 (2024)"},{"key":"2353_CR73","doi-asserted-by":"crossref","unstructured":"Yu, X., Chen, Z., Ling, Y., Dong, S., Liu, Z., Lu, Y. (2023). Temporal Data Meets LLM\u2013Explainable Financial Time Series Forecasting. arXiv preprint arXiv:2306.11025","DOI":"10.18653\/v1\/2023.emnlp-industry.69"},{"key":"2353_CR74","unstructured":"Zhang, B., Sennrich, R. (2019). Root Mean Square Layer Normalization. In: Advances in Neural Information Processing Systems. vol.\u00a032"},{"key":"2353_CR75","doi-asserted-by":"crossref","unstructured":"Zhang, J., Guo, J., Sun, S., Lou, J.G., Zhang, D. (2023). LayoutDiffusion: Improving Graphic Layout Generation by Discrete Diffusion Probabilistic Models. In: ICCV","DOI":"10.1109\/ICCV51070.2023.00664"},{"key":"2353_CR76","unstructured":"Zhang, P., Zeng, G., Wang, T., & Lu, W. (2024) TinyLLaMA: An Open-Source Small Language Model. arXiv preprint arXiv:2401.02385"},{"key":"2353_CR77","unstructured":"Zhang, S., Dong, L., et\u00a0al.: Instruction Tuning for Large Language Models: A Survey. arXiv preprint arXiv:2308.10792 (2023)"},{"key":"2353_CR78","unstructured":"Zhao, W.X., Zhou, K., et\u00a0al. (2023). A Survey of Large Language Models. arXiv preprint arXiv:2303.18223"},{"key":"2353_CR79","doi-asserted-by":"crossref","unstructured":"Zheng, X., Qiao, X., Cao, Y., Lau, R.W.H. (2019). Content-Aware Generative Modeling of Graphic Design Layouts. ACM Transactions on Graphics 38(4)","DOI":"10.1145\/3306346.3322971"},{"key":"2353_CR80","doi-asserted-by":"crossref","unstructured":"Zhong, X., Tang, J., & Jimeno\u00a0Yepes, A. (2019). PubLayNet: Largest Dataset Ever for Document Layout Analysis. In: ICDAR. pp. 1015\u20131022","DOI":"10.1109\/ICDAR.2019.00166"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02353-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02353-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02353-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,7]],"date-time":"2025-06-07T06:04:12Z","timestamp":1749276252000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02353-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,12]]},"references-count":80,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["2353"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02353-2","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,12]]},"assertion":[{"value":"31 March 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 January 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 February 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}