{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T01:50:03Z","timestamp":1772502603779,"version":"3.50.1"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T00:00:00Z","timestamp":1726444800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T00:00:00Z","timestamp":1726444800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272436"],"award-info":[{"award-number":["62272436"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62121002"],"award-info":[{"award-number":["62121002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Alibaba Group"},{"DOI":"10.13039\/501100001809","name":"National Nature Science Foundation of China","doi-asserted-by":"crossref","award":["62232006"],"award-info":[{"award-number":["62232006"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1007\/s13042-024-02355-5","type":"journal-article","created":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T13:02:30Z","timestamp":1726491750000},"page":"1607-1624","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Self-refined variational transformer for image-conditioned layout generation"],"prefix":"10.1007","volume":"16","author":[{"given":"Yunning","family":"Cao","sequence":"first","affiliation":[]},{"given":"Chuanbin","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Ye","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Min","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Tiezheng","family":"Ge","sequence":"additional","affiliation":[]},{"given":"Yuning","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Hongtao","family":"Xie","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,16]]},"reference":[{"issue":"4","key":"2355_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3447239","volume":"54","author":"W-H Cheng","year":"2021","unstructured":"Cheng W-H, Song S, Chen C-Y, Hidayati SC, Liu J (2021) Fashion meets computer vision: A survey. ACM Computing Surveys (CSUR) 54(4):1\u201341","journal-title":"ACM Computing Surveys (CSUR)"},{"key":"2355_CR2","doi-asserted-by":"crossref","unstructured":"Hidayati SC, Hsu C-C, Chang Y-T, Hua K-L, Fu J, Cheng W-H (2018) What dress fits me best? fashion recommendation on the clothing style for personal body shape. In: Proceedings of the 26th ACM International Conference on Multimedia, pp. 438\u2013446","DOI":"10.1145\/3240508.3240546"},{"key":"2355_CR3","doi-asserted-by":"crossref","unstructured":"Wang J, Shuai H-H, Li Y-H, Cheng W-H (2023) Human-object interaction detection: An overview. IEEE Consumer Electronics Magazine","DOI":"10.1109\/MCE.2023.3343919"},{"key":"2355_CR4","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2014) Generative adversarial nets. Advances in neural information processing systems 27"},{"key":"2355_CR5","unstructured":"Wang C-H, Huang K-Y, Yao Y, Chen J-C, Shuai H-H, Cheng W-H (2022) Lightweight deep learning: An overview. IEEE consumer electronics magazine"},{"key":"2355_CR6","unstructured":"Lopez R, Regier J, Jordan MI, Yosef N (2018) Information constraints on auto-encoding variational bayes. Advances in Neural Information Processing Systems 31"},{"issue":"4","key":"2355_CR7","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3306346.3322971","volume":"38","author":"X Zheng","year":"2019","unstructured":"Zheng X, Qiao X, Cao Y, Lau RW (2019) Content-aware generative modeling of graphic design layouts. ACM Transactions on Graphics (TOG) 38(4):1\u201315","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"2355_CR8","unstructured":"Li J, Yang J, Hertzmann A, Zhang J, Xu T (2019) Layoutgan: Generating graphic layouts with wireframe discriminators. 7th International Conference on Learning Representations, ICLR 2019, 1\u201316"},{"key":"2355_CR9","doi-asserted-by":"crossref","unstructured":"Gupta K, Lazarow J, Achille A, Davis LS, Mahadevan V, Shrivastava A (2021) Layouttransformer: Layout generation and completion with self-attention. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1004\u20131014","DOI":"10.1109\/ICCV48922.2021.00104"},{"key":"2355_CR10","doi-asserted-by":"publisher","unstructured":"Li J, Yang J, Zhang J, Liu C, Wang C, Xu T (2021) Attribute-conditioned layout gan for automatic graphic design. IEEE Transactions on Visualization and Computer Graphics 27, 4039\u20134048 https:\/\/doi.org\/10.1109\/TVCG.2020.2999335","DOI":"10.1109\/TVCG.2020.2999335"},{"key":"2355_CR11","doi-asserted-by":"crossref","unstructured":"Lee H-Y, Jiang L, Essa I, Le PB, Gong H, Yang M-H, Yang W (2020) Neural design network: Graphic layout generation with constraints. In: European Conference on Computer Vision, pp. 491\u2013506. Springer","DOI":"10.1007\/978-3-030-58580-8_29"},{"key":"2355_CR12","doi-asserted-by":"crossref","unstructured":"Arroyo DM, Postels J, Tombari F (2021) Variational transformer networks for layout generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13642\u201313652","DOI":"10.1109\/CVPR46437.2021.01343"},{"key":"2355_CR13","doi-asserted-by":"crossref","unstructured":"Jyothi AA, Durand T, He J, Sigal L, Mori G (2019) Layoutvae: Stochastic scene layout generation from a label set. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9895\u20139904","DOI":"10.1109\/ICCV.2019.00999"},{"key":"2355_CR14","doi-asserted-by":"crossref","unstructured":"Hsiao Y-S, Sanchez-Riera J, Lim T, Hua K-L, Cheng W-H (2014) Lared: A large rgb-d extensible hand gesture dataset. In: Proceedings of the 5th ACM Multimedia Systems Conference, pp. 53\u201358","DOI":"10.1145\/2557642.2563669"},{"key":"2355_CR15","doi-asserted-by":"crossref","unstructured":"Cui Y, Ren W, Cao X, Knoll A (2023) Focal network for image restoration. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13001\u201313011","DOI":"10.1109\/ICCV51070.2023.01195"},{"key":"2355_CR16","doi-asserted-by":"crossref","unstructured":"Cui Y, Ren W, Yang S, Cao X, Knoll A (2023) Irnext: Rethinking convolutional network design for image restoration. In: International Conference on Machine Learning","DOI":"10.1109\/ICCV51070.2023.01195"},{"key":"2355_CR17","doi-asserted-by":"publisher","first-page":"1426","DOI":"10.1609\/aaai.v38i2.27907","volume":"38","author":"Y Cui","year":"2024","unstructured":"Cui Y, Ren W, Knoll A (2024) Omni-kernel network for image restoration. Proceedings of the AAAI Conference on Artificial Intelligence 38:1426\u20131434","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"2355_CR18","doi-asserted-by":"crossref","unstructured":"Meng D, Chen X, Fan Z, Zeng G, Li H, Yuan Y, Sun L, Wang J (2021) Conditional detr for fast training convergence. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3651\u20133660","DOI":"10.1109\/ICCV48922.2021.00363"},{"key":"2355_CR19","unstructured":"Liu S, Li F, Zhang H, Yang X, Qi X, Su H, Zhu J, Zhang L (2021) Dab-detr: Dynamic anchor boxes are better queries for detr. In: International Conference on Learning Representations"},{"key":"2355_CR20","doi-asserted-by":"crossref","unstructured":"Hsu HY, He X, Peng Y, Kong H, Zhang Q (2023) Posterlayout: A new benchmark and approach for content-aware visual-textual presentation layout. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6018\u20136026","DOI":"10.1109\/CVPR52729.2023.00583"},{"key":"2355_CR21","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Advances in neural information processing systems 30"},{"issue":"1","key":"2355_CR22","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1109\/TNN.2008.2005605","volume":"20","author":"F Scarselli","year":"2008","unstructured":"Scarselli F, Gori M, Tsoi AC, Hagenbuchner M, Monfardini G (2008) The graph neural network model. IEEE Trans Neural Networks 20(1):61\u201380","journal-title":"IEEE Trans Neural Networks"},{"key":"2355_CR23","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European Conference on Computer Vision, pp. 213\u2013229. Springer","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"2355_CR24","doi-asserted-by":"crossref","unstructured":"Guo L, Liu J, Zhu X, Yao P, Lu S, Lu H (2020) Normalized and geometry-aware self-attention network for image captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10327\u201310336","DOI":"10.1109\/CVPR42600.2020.01034"},{"key":"2355_CR25","doi-asserted-by":"crossref","unstructured":"Zhang X, Sun X, Luo Y, Ji J, Zhou Y, Wu Y, Huang F, Ji R (2021) Rstnet: Captioning with adaptive attention on visual and non-visual words. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15465\u201315474","DOI":"10.1109\/CVPR46437.2021.01521"},{"key":"2355_CR26","doi-asserted-by":"crossref","unstructured":"Sio CH, Ma Y-J, Shuai H-H, Chen J-C, Cheng W-H (2020) S2siamfc: Self-supervised fully convolutional siamese network for visual tracking. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 1948\u20131957","DOI":"10.1145\/3394171.3413611"},{"issue":"9","key":"2355_CR27","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3560815","volume":"55","author":"P Liu","year":"2023","unstructured":"Liu P, Yuan W, Fu J, Jiang Z, Hayashi H, Neubig G (2023) Pre-train, prompt, and predict: A systematic survey of prompting methods in natural language processing. ACM Comput Surv 55(9):1\u201335","journal-title":"ACM Comput Surv"},{"key":"2355_CR28","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown T, Mann B, Ryder N, Subbiah M, Kaplan JD, Dhariwal P, Neelakantan A, Shyam P, Sastry G, Askell A et al (2020) Language models are few-shot learners. Adv Neural Inf Process Syst 33:1877\u20131901","journal-title":"Adv Neural Inf Process Syst"},{"key":"2355_CR29","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1162\/tacl_a_00324","volume":"8","author":"Z Jiang","year":"2020","unstructured":"Jiang Z, Xu FF, Araki J, Neubig G (2020) How can we know what language models know? Transactions of the Association for Computational Linguistics 8:423\u2013438","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"2355_CR30","doi-asserted-by":"crossref","unstructured":"Li XL, Liang P (2021) Prefix-tuning: Optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"2355_CR31","doi-asserted-by":"crossref","unstructured":"Lester B, Al-Rfou R, Constant N (2021) The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"2355_CR32","unstructured":"Sohn K, Lee H, Yan X (2015) Learning structured output representation using deep conditional generative models. Advances in neural information processing systems 28"},{"key":"2355_CR33","doi-asserted-by":"crossref","unstructured":"Walker J, Doersch C, Gupta A, Hebert M (2016) An uncertain future: Forecasting from static images using variational autoencoders. In: European Conference on Computer Vision, pp. 835\u2013851. Springer","DOI":"10.1007\/978-3-319-46478-7_51"},{"key":"2355_CR34","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S, et al. (2020) An image is worth 16x16 words: Transformers for image recognition at scale. In: International Conference on Learning Representations"},{"key":"2355_CR35","unstructured":"Kingma DP, Salimans T, Welling M (2015) Variational dropout and the local reparameterization trick. Advances in neural information processing systems 28"},{"key":"2355_CR36","doi-asserted-by":"crossref","unstructured":"Bowman S, Vilnis L, Vinyals O, Dai A, Jozefowicz R, Bengio S (2016) Generating sentences from a continuous space. In: Proceedings of The 20th SIGNLL Conference on Computational Natural Language Learning, pp. 10\u201321","DOI":"10.18653\/v1\/K16-1002"},{"key":"2355_CR37","unstructured":"Higgins I, Matthey L, Pal A, Burgess C, Glorot X, Botvinick M, Mohamed S, Lerchner A (2016) beta-vae: Learning basic visual concepts with a constrained variational framework"},{"key":"2355_CR38","doi-asserted-by":"publisher","unstructured":"Fu H, Li C, Liu X, Gao J, Celikyilmaz A, Carin L (2019) Cyclical annealing schedule: A simple approach to mitigating KL vanishing. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 240\u2013250. Association for Computational Linguistics, Minneapolis, Minnesota. https:\/\/doi.org\/10.18653\/v1\/N19-1021","DOI":"10.18653\/v1\/N19-1021"},{"key":"2355_CR39","unstructured":"Loshchilov I, Hutter F (2017) Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101"},{"key":"2355_CR40","unstructured":"Paszke A, Gross S, Massa F, Lerer A, Bradbury J, Chanan G, Killeen T, Lin Z, Gimelshein N, Antiga L, et al. (2019) Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32"},{"key":"2355_CR41","unstructured":"Heusel M, Ramsauer H, Unterthiner T, Nessler B, Hochreiter S (2017) Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems 30"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02355-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-024-02355-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02355-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,19]],"date-time":"2025-02-19T18:26:40Z","timestamp":1739989600000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-024-02355-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,16]]},"references-count":41,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2025,3]]}},"alternative-id":["2355"],"URL":"https:\/\/doi.org\/10.1007\/s13042-024-02355-5","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,16]]},"assertion":[{"value":"3 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 August 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 September 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}