{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T15:56:06Z","timestamp":1778169366248,"version":"3.51.4"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"29","license":[{"start":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T00:00:00Z","timestamp":1757203200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T00:00:00Z","timestamp":1757203200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s00521-025-11565-z","type":"journal-article","created":{"date-parts":[[2025,9,7]],"date-time":"2025-09-07T19:58:13Z","timestamp":1757275093000},"page":"24243-24264","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["ViT-DtC: vision transformer-based design-to-code framework for code generation from generated UI designs and hand-drawn sketches"],"prefix":"10.1007","volume":"37","author":[{"given":"Areeg","family":"Ahmed","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6903-272X","authenticated-orcid":false,"given":"Shahira","family":"Azab","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9593-6909","authenticated-orcid":false,"given":"Sherin M.","family":"Moussa","sequence":"additional","affiliation":[]},{"given":"Yasser","family":"Abdelhamid","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,7]]},"reference":[{"key":"11565_CR1","unstructured":"Norman D (2013) The design of everyday things: revised and expanded edition. Basic Books"},{"key":"11565_CR2","unstructured":"Krug S (2014) Don't make me think, revisited: a common sense approach to web usability. New Riders"},{"key":"11565_CR3","unstructured":"P. Anderson S (2011) Seductive interaction design: creating playful, fun, and effective user experiences. New Riders"},{"key":"11565_CR4","unstructured":"Cooper A, Reimann R, Cronin D, Noessel C (2014) About face: the essentials of interaction design. John Wiley & Sons"},{"key":"11565_CR5","doi-asserted-by":"publisher","first-page":"1095","DOI":"10.3897\/jucs.2020.058","volume":"26","author":"BD De Souza","year":"2020","unstructured":"De Souza BD, Von Wangenheim CG, Von Wangenheim A, Hauck JCR (2020) Recent progress in automated code generation from GUI images using machine learning techniques. J Univ Comput Sci 26:1095\u20131127. https:\/\/doi.org\/10.3897\/jucs.2020.058","journal-title":"J Univ Comput Sci"},{"key":"11565_CR6","doi-asserted-by":"publisher","unstructured":"Dave H, Sonje S, Pardeshi J, Chaudhari S, Raundale P (2021) A survey on artificial intelligence based techniques to convert user interface design mock-ups to code. In: International conference on artificial intelligence and smart systems (ICAIS), Coimbatore, India, 2021, pp. 28\u201333, https:\/\/doi.org\/10.1109\/ICAIS50930.2021.9395994","DOI":"10.1109\/ICAIS50930.2021.9395994"},{"key":"11565_CR7","doi-asserted-by":"crossref","unstructured":"Wasserman, A (2010) Software engineering issues for mobile application development. In Proceedings of the FSE\/SDP Workshop on Future of Software Engineering Research (pp. 397\u2013400). Association for Computing Machinery.","DOI":"10.1145\/1882362.1882443"},{"key":"11565_CR8","doi-asserted-by":"crossref","unstructured":"Beltramelli T (2017) pix2code: Generating Code from a Graphical User Interface Screenshot. In: arXiv.org. https:\/\/arxiv.org\/abs\/1705.07962","DOI":"10.1145\/3220134.3220135"},{"key":"11565_CR9","doi-asserted-by":"publisher","unstructured":"Sethi A, Sankaran A, Panwar N, Khare S, Mani S (2018) DLPaper2Code: Auto-Generation of Code from Deep Learning Research Papers. Proceedings of the AAAI Conference on Artificial Intelligence 32(1). https:\/\/doi.org\/10.1609\/aaai.v32i1.12326","DOI":"10.1609\/aaai.v32i1.12326"},{"key":"11565_CR10","doi-asserted-by":"crossref","unstructured":"Zhu Z, Xue Z, Yuan Z (2019) Automatic Graphics Program Generation using Attention-Based Hierarchical Decoder. In: Lecture Notes in Computer Science. pp 181\u2013196","DOI":"10.1007\/978-3-030-20876-9_12"},{"key":"11565_CR11","doi-asserted-by":"publisher","unstructured":"Asiroglu B, Mete BR, Y\u0131ld\u0131z E, Nal\u00e7akan Y, Sezen A, Da\u011ftekin M, ensari t (2019) automatic html code generation from mock-up images using machine learning techniques. In 2019 scientific meeting on electrical-electronics & biomedical engineering and computer science (EBBT), Istanbul, Turkey, 2019, pp. 1-4, https:\/\/doi.org\/10.1109\/EBBT.2019.8741736","DOI":"10.1109\/EBBT.2019.8741736"},{"key":"11565_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.jnca.2020.102636","volume":"161","author":"XW Pang","year":"2020","unstructured":"Pang XW, Zhou Y, Li P, Lin W, Wu W, Wang J (2020) A novel syntax-aware automatic graphics code generation with attention-based deep neural network. J Netw Comput Appl 161:102636. https:\/\/doi.org\/10.1016\/j.jnca.2020.102636","journal-title":"J Netw Comput Appl"},{"key":"11565_CR13","doi-asserted-by":"publisher","unstructured":"Teng Z, Fu Q, White J, Schmidt DC (2021) Sketch2Vis: generating data visualizations from hand-drawn sketches with deep learning. In: 2021 20th IEEE international conference on machine learning and applications (ICMLA). https:\/\/doi.org\/10.1109\/icmla52953.2021.00141","DOI":"10.1109\/icmla52953.2021.00141"},{"key":"11565_CR14","unstructured":"De Souza Baul\u00e9 D, Von Wangenheim CG, Von Wangenheim A, Hauck JCR, J\u00fanior ECV (2021) Automatic code generation from sketches of mobile applications in end-user development using deep learning. In: arXiv.org. https:\/\/arxiv.org\/abs\/2103.05704"},{"key":"11565_CR15","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun Y, Bottou L, Bengio Y, Haffner P (1998) Gradient-based learning applied to document recognition. Proc IEEE 86:2278\u20132324. https:\/\/doi.org\/10.1109\/5.726791","journal-title":"Proc IEEE"},{"key":"11565_CR16","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S, Uszkoreit J, Houlsby N (2021) An Image is Worth 16x16 words: transformers for image recognition at scale. In: arXiv.org. https:\/\/arxiv.org\/pdf\/2010.11929"},{"key":"11565_CR17","unstructured":"Brown TB, Mann B, Ryder N, et al. (2020) Language models are few-shot learners. In: Advances in Neural Information Processing Systems, vol. 33, pp. 1877\u20131901. https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf"},{"key":"11565_CR18","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9:1735\u20131780. https:\/\/doi.org\/10.1162\/neco.1997.9.8.1735","journal-title":"Neural Comput"},{"key":"11565_CR19","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1038\/323533a0","volume":"323","author":"DE Rumelhart","year":"1986","unstructured":"Rumelhart DE, Hinton GE, Williams RJ (1986) Learning representations by back-propagating errors. Nature 323:533\u2013536. https:\/\/doi.org\/10.1038\/323533a0","journal-title":"Nature"},{"key":"11565_CR20","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. In: arXiv.org. https:\/\/arxiv.org\/abs\/1706.03762"},{"key":"11565_CR21","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) Imagenet classification with deep convolutional neural networks. Commun ACM 60:84\u201390. https:\/\/doi.org\/10.1145\/3065386","journal-title":"Commun ACM"},{"key":"11565_CR22","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for Large-Scale image recognition. In: arXiv.org. https:\/\/arxiv.org\/abs\/1409.1556"},{"key":"11565_CR23","unstructured":"He K, Zhang X, Ren S, Sun J (2015) Deep residual learning for image recognition. In: arXiv.org. https:\/\/arxiv.org\/abs\/1512.03385"},{"key":"11565_CR24","unstructured":"(2020) Arquivos@UFSC. https:\/\/arquivos.ufsc.br\/d\/4592545e03a34ae2836c\/. [Last accessed 1 Nov 2023]."},{"key":"11565_CR25","unstructured":"microsoft ailab\/Sketch2Code\/model\/images at master \u00b7 microsoft\/ailab. In: GitHub. https:\/\/github.com\/microsoft\/ailab\/tree\/master\/Sketch2Code\/model\/images. [Last accessed 1 Nov 2023]."},{"key":"11565_CR26","unstructured":"(2020) Sketch2Code. In: Kaggle. https:\/\/www.kaggle.com\/datasets\/biniamad\/sketch2code. [Last accessed 2 Nov 2023]."},{"key":"11565_CR27","unstructured":"(2022) Sketch2Code. In: Kaggle. https:\/\/www.kaggle.com\/datasets\/vshantam\/sketch2code\/. [Last accessed 2 Nov 2023]."},{"key":"11565_CR28","unstructured":"Balog M, Gaunt AL, Brockschmidt M, Nowozin S, Tarlow D (2017) DeepCoder: learning to write programs. In: arXiv.org. https:\/\/arxiv.org\/abs\/1611.01989"},{"key":"11565_CR29","unstructured":"Murali V, Qi L, Chaudhuri S, Jermaine C (2018) Neural sketch learning for conditional program generation. In: arXiv.org. https:\/\/arxiv.org\/abs\/1703.05698"},{"key":"11565_CR30","first-page":"10825","volume":"32","author":"EC Shin","year":"2019","unstructured":"Shin EC, Allamanis M, Brockschmidt M, Polozov A (2019) Program synthesis and semantic parsing with learned code idioms. Neural Inform Process Syst 32:10825\u201310835","journal-title":"Neural Inform Process Syst"},{"key":"11565_CR31","doi-asserted-by":"publisher","first-page":"8984","DOI":"10.1609\/aaai.v34i05.6430","volume":"34","author":"Z Sun","year":"2020","unstructured":"Sun Z, Zhu Q, Xiong Y, Sun Y, Mou L, Zhang L (2020) Treegen: a tree-based transformer architecture for code generation. Proc AAAI Conf Artif Intell 34:8984\u20138991. https:\/\/doi.org\/10.1609\/aaai.v34i05.6430","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"11565_CR32","unstructured":"Guo T, Gao H (2019) Content enhanced BERT-based Text-to-SQL generation. In: arXiv.org. https:\/\/arxiv.org\/abs\/1910.07179."},{"key":"11565_CR33","doi-asserted-by":"publisher","unstructured":"Gemmell C, Rossetto F, Dalton J (2020) Relevance transformer: generating concise code snippets with relevance feedback. In: Proceedings of the 43rd international conference on research and development in information retrieval. https:\/\/doi.org\/10.1145\/3397271.3401215","DOI":"10.1145\/3397271.3401215"},{"key":"11565_CR34","unstructured":"Perez L, Ottens L, Viswanathan S (2021) Automatic code generation using pre-trained language models. In: arXiv.org. https:\/\/arxiv.org\/abs\/2102.10535"},{"key":"11565_CR35","unstructured":"Chen M, Tworek J, Jun H, et al. (2021) Evaluating large language models trained on code. In: arXiv.org. https:\/\/arxiv.org\/abs\/2107.03374"},{"key":"11565_CR36","unstructured":"Redmon J, Farhadi A (2018) YOLOV3: an incremental improvement. In: arXiv.org. https:\/\/arxiv.org\/abs\/1804.02767"},{"key":"11565_CR37","unstructured":"ImageNet. https:\/\/www.image-net.org\/. [Last accessed 2 Nov 2023]."},{"key":"11565_CR38","unstructured":"Shen Y, Tan S, Sordoni A, Courville A (2018) Ordered neurons: integrating tree structures into recurrent neural networks. In: arXiv.org. https:\/\/arxiv.org\/abs\/1810.09536"},{"key":"11565_CR39","unstructured":"Li R, Zhang Y, Yang D (2024) Sketch2Code: evaluating vision-language models for interactive web design prototyping. In: arXiv.org. https:\/\/arxiv.org\/abs\/2410.16232"},{"key":"11565_CR40","unstructured":"Si C, Zhang Y, Li R, Yang Z, Liu R, Yang D (2024) Design2Code: Benchmarking multimodal code generation for automated front-end engineering. In: arXiv.org. https:\/\/arxiv.org\/abs\/2403.03163"},{"key":"11565_CR41","unstructured":"Wan Y, Wang C, Dong Y, Wang W, Li S, Huo Y, Lyu M R (2024) Automatically generating UI code from screenshot: a divide-and-conquer-based approach. In: arXiv.org. https:\/\/arxiv.org\/abs\/2406.16386"},{"key":"11565_CR42","unstructured":"Zhou T, Zhao Y, Hou X, Sun X, Chen K, Wang H (2024) Bridging design and development with automated declarative UI code generation. In: arXiv.org. https:\/\/arxiv.org\/abs\/2409.11667"},{"key":"11565_CR43","doi-asserted-by":"publisher","DOI":"10.3389\/fcomp.2024.1397805","author":"H Zafar","year":"2024","unstructured":"Zafar H, Khan SUR, Mashkoor A, Un Nisa H (2024) Mobicat: a model-driven engineering approach for automatic GUI code generation for Android applications. Front Comput Sci. https:\/\/doi.org\/10.3389\/fcomp.2024.1397805","journal-title":"Front Comput Sci"},{"key":"11565_CR44","doi-asserted-by":"crossref","unstructured":"Samir M, Elsayed A, Marie M I (2024) A model for automatic code generation from high fidelity graphical user interface mockups using deep learning techniques. In: International Journal of Advanced Computer Science and Applications. https:\/\/thesai.org\/Publications\/ViewPaper?Code=IJACSA&Issue=3&SerialNo=69&Volume=15","DOI":"10.14569\/IJACSA.2024.0150369"},{"key":"11565_CR45","unstructured":"Anchen (2023) Enhancing ChatGPT\u2019s capabilities: reading images and generating HTML code from sketches using an image-to-DSL Model with ChatGPT. In: Medium. https:\/\/medium.com\/@anchen.li\/enhancing-chatgpts-capabilities-reading-images-and-generating-html-code-from-sketches-using-an-c3d30cf07b5c. [Last accessed 1 Nov 2023]"},{"key":"11565_CR46","unstructured":"Loshchilov I, Hutter F (2019) Decoupled weight decay regularization. In: arXiv.org. https:\/\/arxiv.org\/abs\/1711.05101"},{"issue":"8","key":"11565_CR47","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford A, Wu J, Child R, Luan D, Amodei D, Sutskever I (2019) Language models are unsupervised multitask learners. Open AI Blog 1(8):9","journal-title":"Open AI Blog"},{"key":"11565_CR48","unstructured":"Fine-Tune ViT for image classification with transformers. https:\/\/huggingface.co\/blog\/fine-tune-vit. [Last accessed 1 Nov 2023]."},{"key":"11565_CR49","unstructured":"google\/vit-base-patch16\u2013224-in21k. https:\/\/huggingface.co\/google\/vit-base-patch16-224-in21k. [Last accessed 8 Jun 2025]."},{"key":"11565_CR50","unstructured":"google\/vit-base-patch16\u2013224. https:\/\/huggingface.co\/google\/vit-base-patch16-224. [Last accessed 8 Jun 2025]."},{"key":"11565_CR51","unstructured":"(n.d.) CrossEntropyLoss. In: PyTorch. https:\/\/pytorch.org\/docs\/stable\/generated\/torch.nn.CrossEntropyLoss.html. [Last accessed 11 Jun 2025]."},{"key":"11565_CR52","unstructured":"[Modified dataset link]. https:\/\/1drv.ms\/f\/s!AjuHTycMwde3khzSSP1zWFdmvyNI?e=ox2afT. [Last accessed 5 Feb 2024]."}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11565-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-025-11565-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11565-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T05:23:39Z","timestamp":1759209819000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-025-11565-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,7]]},"references-count":52,"journal-issue":{"issue":"29","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["11565"],"URL":"https:\/\/doi.org\/10.1007\/s00521-025-11565-z","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,7]]},"assertion":[{"value":"29 March 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 August 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 September 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}