{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:35:38Z","timestamp":1778081738425,"version":"3.51.4"},"publisher-location":"Cham","reference-count":63,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031928079","type":"print"},{"value":"9783031928086","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-92808-6_18","type":"book-chapter","created":{"date-parts":[[2025,5,27]],"date-time":"2025-05-27T15:59:21Z","timestamp":1748361561000},"page":"278-295","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Khattat: Enhancing Readability and\u00a0Concept Representation of\u00a0Semantic Typography"],"prefix":"10.1007","author":[{"given":"Ahmed","family":"Hussein","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alaa","family":"Elsetohy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sama","family":"Hadhoud","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tameem","family":"Bakr","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yasser","family":"Rohaim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Badr","family":"AlKhamissi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"18_CR1","unstructured":"Adobe Systems Inc.: PostScript Language Reference Manual. Addison-Wesley (1990)"},{"key":"18_CR2","doi-asserted-by":"crossref","unstructured":"Bai, Y., Huang, Z., Gao, W., Yang, S., Liu, J.: Intelligent artistic typography: a comprehensive review of artistic text design and generation (2024). https:\/\/arxiv.org\/abs\/2407.14774","DOI":"10.1561\/116.20240037"},{"key":"18_CR3","unstructured":"Betker, J., et al.: Improving image generation with better captions. arXiv preprint (2023). openAI"},{"key":"18_CR4","doi-asserted-by":"publisher","unstructured":"Blattmann, A., Rombach, R., Oktay, K., Ommer, B.: Retrieval-augmented diffusion models (2022). https:\/\/doi.org\/10.48550\/ARXIV.2204.11824, https:\/\/arxiv.org\/abs\/2204.11824","DOI":"10.48550\/ARXIV.2204.11824"},{"key":"18_CR5","unstructured":"Contributors, H.: Harfbuzz. https:\/\/github.com\/harfbuzz\/harfbuzz (2024). version 8.1.1"},{"key":"18_CR6","doi-asserted-by":"publisher","unstructured":"Corneanu, C., Gadde, R., Martinez, A.M.: LatentPaint: image inpainting in latent space with diffusion models. In: 2024 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 4322\u20134331 (2024). https:\/\/doi.org\/10.1109\/WACV57701.2024.00428","DOI":"10.1109\/WACV57701.2024.00428"},{"key":"18_CR7","unstructured":"Corporation, M., Incorporated, A.S.: OpenType Specification (2020). https:\/\/docs.microsoft.com\/en-us\/typography\/opentype\/spec\/, version 1.8.4"},{"key":"18_CR8","unstructured":"Delaunay, B.e.a.: Sur la sph\u00e8re vide. Izvestiya Akademii Nauk SSSR, Seriya Matematicheskaya 7, 793\u2013800 (1934). in French"},{"key":"18_CR9","unstructured":"Esser, P., et al.: Scaling rectified flow transformers for high-resolution image synthesis (2024). https:\/\/arxiv.org\/abs\/2403.03206"},{"key":"18_CR10","unstructured":"Frans, K., Soros, L.B., Witkowski, O.: ClipDraw: exploring text-to-drawing synthesis through language-image encoders (2021). https:\/\/arxiv.org\/abs\/2106.14843"},{"key":"18_CR11","doi-asserted-by":"publisher","unstructured":"Gao, S., et al.: Implicit diffusion models for continuous super-resolution. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10021\u201310030 (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.00966","DOI":"10.1109\/CVPR52729.2023.00966"},{"key":"18_CR12","doi-asserted-by":"publisher","unstructured":"Gatys, L.A., Ecker, A.S., Bethge, M.: Image style transfer using convolutional neural networks. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2414\u20132423 (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.265","DOI":"10.1109\/CVPR.2016.265"},{"key":"18_CR13","unstructured":"Gemini\u00a0Team, year=2024, e.a.p.u.: Gemini: A family of highly capable multimodal models"},{"key":"18_CR14","doi-asserted-by":"publisher","unstructured":"Gong, Y., et al.: Interactive story visualization with multiple characters. In: SIGGRAPH Asia 2023 Conference Papers. SA 2023, Association for Computing Machinery, New York, NY, USA (2023). https:\/\/doi.org\/10.1145\/3610548.3618184","DOI":"10.1145\/3610548.3618184"},{"key":"18_CR15","doi-asserted-by":"crossref","unstructured":"He, J.Y., et al.: WordArt Designer: user-driven artistic typography synthesis using large language models. In: Conference on Empirical Methods in Natural Language Processing (2023). https:\/\/api.semanticscholar.org\/CorpusID:264590526","DOI":"10.18653\/v1\/2023.emnlp-industry.23"},{"key":"18_CR16","doi-asserted-by":"publisher","unstructured":"Hermanto, Y.: Semantic interpretation in experimental typography creation. KnE Soc. Sci. (2023). https:\/\/doi.org\/10.18502\/kss.v8i15.13939","DOI":"10.18502\/kss.v8i15.13939"},{"key":"18_CR17","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models (2020). https:\/\/arxiv.org\/abs\/2006.11239"},{"key":"18_CR18","doi-asserted-by":"publisher","unstructured":"Iluz, S., Vinker, Y., Hertz, A., Berio, D., Cohen-Or, D., Shamir, A.: Word-as-image for semantic typography. ACM Trans. Graph. 42(4) (2023). https:\/\/doi.org\/10.1145\/3592123","DOI":"10.1145\/3592123"},{"key":"18_CR19","doi-asserted-by":"crossref","unstructured":"Jain, A., Xie, A., Abbeel, P.: VectorFusion: Text-to-SVG by abstracting pixel-based diffusion models (2022). https:\/\/arxiv.org\/abs\/2211.11319","DOI":"10.1109\/CVPR52729.2023.00190"},{"key":"18_CR20","unstructured":"Jeong, H., Kwon, G., Ye, J.C.: Zero-shot generation of coherent storybook from plain text story using diffusion models (2023). https:\/\/arxiv.org\/abs\/2302.03900"},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"Johnson, J., Alahi, A., Fei-Fei, L.: Perceptual losses for real-time style transfer and super-resolution (2016). https:\/\/arxiv.org\/abs\/1603.08155","DOI":"10.1007\/978-3-319-46475-6_43"},{"key":"18_CR22","unstructured":"Katzir, O., Patashnik, O., Cohen-Or, D., Lischinski, D.: Noise-free score distillation (2023)"},{"key":"18_CR23","unstructured":"Kawar, B., Elad, M., Ermon, S., Song, J.: Denoising diffusion restoration models. In: Advances in Neural Information Processing Systems (2022)"},{"issue":"1\u20132","key":"18_CR24","first-page":"81","volume":"30","author":"MG Kendall","year":"1939","unstructured":"Kendall, M.G.: The problem of m rankings. Biometrika 30(1\u20132), 81\u201389 (1939)","journal-title":"Biometrika"},{"key":"18_CR25","doi-asserted-by":"crossref","unstructured":"khattak, M.U., Ferjad, M., Muzzamal, N., Gool, L.V., Tombari, F.: Learning to prompt with text only supervision for vision-language models. arXiv:2401.02418 (2024)","DOI":"10.1609\/aaai.v39i4.32444"},{"key":"18_CR26","unstructured":"Kojima, T., Gu, S.S., Reid, M., Matsuo, Y., Iwasawa, Y.: Large language models are zero-shot reasoners. In: Proceedings of the 36th International Conference on Neural Information Processing Systems. NIPS 2022, Curran Associates Inc., Red Hook, NY, USA (2024)"},{"key":"18_CR27","unstructured":"Larsen, A.B.L., S\u00f8nderby, S.K., Larochelle, H., Winther, O.: Autoencoding beyond pixels using a learned similarity metric. In: Balcan, M.F., Weinberger, K.Q. (eds.) Proceedings of The 33rd International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a048, pp. 1558\u20131566. PMLR, New York, New York, USA (2016). https:\/\/proceedings.mlr.press\/v48\/larsen16.html"},{"key":"18_CR28","doi-asserted-by":"publisher","unstructured":"Li, T.M., Luk\u00e1\u010d, M., Gharbi, M., Ragan-Kelley, J.: Differentiable vector graphics rasterization for editing and learning. ACM Trans. Graph. 39, 1\u201315 (2020). https:\/\/doi.org\/10.1145\/3414685.3417871","DOI":"10.1145\/3414685.3417871"},{"key":"18_CR29","doi-asserted-by":"publisher","unstructured":"Liu, P., Yuan, W., Fu, J., Jiang, Z., Hayashi, H., Neubig, G.: Pre-train, prompt, and predict: a systematic survey of prompting methods in natural language processing. ACM Comput. Surv. 55(9) (2023). https:\/\/doi.org\/10.1145\/3560815","DOI":"10.1145\/3560815"},{"key":"18_CR30","doi-asserted-by":"publisher","unstructured":"Liu, Y., Iter, D., Xu, Y., Wang, S., Xu, R., Zhu, C.: G-EVAL: NLG evaluation using GPT-4 with better human alignment. In: Bouamor, H., Pino, J., Bali, K. (eds.) Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pp. 2511\u20132522. Association for Computational Linguistics, Singapore (2023). https:\/\/doi.org\/10.18653\/v1\/2023.emnlp-main.153, https:\/\/aclanthology.org\/2023.emnlp-main.153","DOI":"10.18653\/v1\/2023.emnlp-main.153"},{"key":"18_CR31","unstructured":"Liu, Z., et al.: Dynamic typography: bringing text to life via video diffusion prior (2024). https:\/\/arxiv.org\/abs\/2404.11614"},{"key":"18_CR32","unstructured":"Minaee, S., et al.: Large language models: a survey (2024). https:\/\/arxiv.org\/abs\/2402.06196"},{"key":"18_CR33","doi-asserted-by":"publisher","unstructured":"O\u2019Donovan, P., Lundefinedbeks, J., Agarwala, A., Hertzmann, A.: Exploratory font selection using crowdsourced attributes. ACM Trans. Graph. 33(4) (2014). https:\/\/doi.org\/10.1145\/2601097.2601110","DOI":"10.1145\/2601097.2601110"},{"key":"18_CR34","unstructured":"OpenAI: GPT-4 technical report (2024). https:\/\/arxiv.org\/abs\/2303.08774"},{"key":"18_CR35","unstructured":"Paruchuri, V.: Surya (2024). https:\/\/github.com\/VikParuchuri\/surya, commit: 3e0669b11bb79923f7644d843fcd3fe34c86608e"},{"key":"18_CR36","unstructured":"Penney, R.: TrueType Fundamentals. Apple Computer, Inc. (1996). https:\/\/www.truetype-typography.com"},{"key":"18_CR37","unstructured":"Pihlgren, G.G., et al.: A systematic performance analysis of deep perceptual loss networks: breaking transfer learning conventions (2024). https:\/\/arxiv.org\/abs\/2302.04032"},{"key":"18_CR38","unstructured":"Poole, B., Jain, A., Barron, J.T., Mildenhall, B.: DreamFusion: Text-to-3D using 2D diffusion (2022). https:\/\/arxiv.org\/abs\/2209.14988"},{"key":"18_CR39","doi-asserted-by":"crossref","unstructured":"Qiu, J., et al.: SnapNTell: enhancing entity-centric visual question answering with retrieval augmented multimodal LLM (2024). https:\/\/arxiv.org\/abs\/2403.04735","DOI":"10.18653\/v1\/2024.findings-emnlp.14"},{"key":"18_CR40","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models (2021)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"18_CR41","doi-asserted-by":"publisher","unstructured":"Saharia, C., et al.: Palette: image-to-image diffusion models. In: ACM SIGGRAPH 2022 Conference Proceedings. SIGGRAPH 2022, Association for Computing Machinery, New York, NY, USA (2022). https:\/\/doi.org\/10.1145\/3528233.3530757","DOI":"10.1145\/3528233.3530757"},{"key":"18_CR42","unstructured":"Shen, Y., Song, K., Tan, X., Li, D., Lu, W., Zhuang, Y.: HuggIngGPT: solving AI tasks with ChatGPT and its friends in hugging face. In: Proceedings of the 37th International Conference on Neural Information Processing Systems. NIPS 2023, Curran Associates Inc., Red Hook, NY, USA (2024)"},{"key":"18_CR43","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"18_CR44","unstructured":"at\u00a0StabilityAI, D.L.: DeepFloyd IF: a novel state-of-the-art open-source text-to-image model with a high degree of photorealism and language understanding (2023). https:\/\/www.deepfloyd.ai\/deepfloyd-if. Accessed 8 Nov 2023"},{"key":"18_CR45","doi-asserted-by":"crossref","unstructured":"Tanveer, M., Wang, Y., Mahdavi-Amiri, A., Zhang, H.: Ds-Fusion: artistic typography via discriminated and stylized diffusion (2023)","DOI":"10.1109\/ICCV51070.2023.00041"},{"key":"18_CR46","doi-asserted-by":"publisher","unstructured":"Tatsukawa, Y., Shen, I.C., Qi, A., Koyama, Y., Igarashi, T., Shamir, A.: FontClip: a semantic typography visual-language model for multilingual font applications. Comput. Graph. Forum 43(2), e15043 (2024). https:\/\/doi.org\/10.1111\/cgf.15043, https:\/\/onlinelibrary.wiley.com\/doi\/abs\/10.1111\/cgf.15043","DOI":"10.1111\/cgf.15043"},{"key":"18_CR47","unstructured":"Tendulkar, P., Krishna, K., Selvaraju, R.R., Parikh, D.: Trick or treat : thematic reinforcement for artistic typography. ArXiv abs\/1903.07820 (2019). https:\/\/api.semanticscholar.org\/CorpusID:83458775"},{"key":"18_CR48","doi-asserted-by":"crossref","unstructured":"Wang, Y., et al.: SinSR: diffusion-based image super-resolution in a single step. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 25796\u201325805 (2024)","DOI":"10.1109\/CVPR52733.2024.02437"},{"key":"18_CR49","unstructured":"Wang, Z., et al.: Prolificdreamer: High-fidelity and diverse text-to-3D generation with variational score distillation. In: Advances in Neural Information Processing Systems (NeurIPS) (2023)"},{"key":"18_CR50","unstructured":"Wu, C., Yin, S., Qi, W., Wang, X., Tang, Z., Duan, N.: Visual ChatGPT: talking, drawing and editing with visual foundation models (2023). https:\/\/arxiv.org\/abs\/2303.04671"},{"key":"18_CR51","doi-asserted-by":"publisher","unstructured":"Xia, B., et al.: Diffir: Efficient diffusion model for image restoration. In: 2023 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 13049\u201313059 (2023). https:\/\/doi.org\/10.1109\/ICCV51070.2023.01204","DOI":"10.1109\/ICCV51070.2023.01204"},{"key":"18_CR52","doi-asserted-by":"publisher","unstructured":"Xiao, S., Wang, L., Ma, X., Zeng, W.: TypeDance: creating semantic typographic logos from image through personalized generation. In: Proceedings of the CHI Conference on Human Factors in Computing Systems. CHI 2024, ACM (2024). https:\/\/doi.org\/10.1145\/3613904.3642185","DOI":"10.1145\/3613904.3642185"},{"key":"18_CR53","doi-asserted-by":"publisher","unstructured":"Xie, S., Zhang, Z., Lin, Z., Hinz, T., Zhang, K.: SmartBrush: text and shape guided object inpainting with diffusion model. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 22428\u201322437 (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.02148","DOI":"10.1109\/CVPR52729.2023.02148"},{"key":"18_CR54","doi-asserted-by":"crossref","unstructured":"Xing, X., Zhou, H., Wang, C., Zhang, J., Xu, D., Yu, Q.: SvgDreamer: text guided SVG generation with diffusion model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4546\u20134555 (2024)","DOI":"10.1109\/CVPR52733.2024.00435"},{"issue":"6","key":"18_CR55","doi-asserted-by":"publisher","first-page":"1348","DOI":"10.1109\/TMI.2018.2827462","volume":"37","author":"Q Yang","year":"2018","unstructured":"Yang, Q., et al.: Low-dose CT image denoising using a generative adversarial network with Wasserstein distance and perceptual loss. IEEE Trans. Med. Imag. 37(6), 1348\u20131357 (2018). https:\/\/doi.org\/10.1109\/TMI.2018.2827462","journal-title":"IEEE Trans. Med. Imag."},{"key":"18_CR56","doi-asserted-by":"crossref","unstructured":"Yang, S., Liu, J., Yang, W., Guo, Z.: Context-aware unsupervised text stylization. In: Proceedings of the 26th ACM International Conference on Multimedia (2018). https:\/\/api.semanticscholar.org\/CorpusID:53034237","DOI":"10.1145\/3240508.3240580"},{"key":"18_CR57","unstructured":"Yang, X., Chen, B., Tam, Y.C.: Arithmetic reasoning with LLM: prolog generation & permutation (2024). https:\/\/arxiv.org\/abs\/2405.17893"},{"key":"18_CR58","unstructured":"Yue, Z., Wang, J., Loy, C.C.: ResShift: efficient diffusion model for image super-resolution by residual shifting. In: Proceedings of the 37th International Conference on Neural Information Processing Systems. NIPS 2023, Curran Associates Inc., Red Hook, NY, USA (2024)"},{"key":"18_CR59","unstructured":"Zhang, J., Wang, Y., Xiao, W., Luo, Z.: Synthesizing ornamental typefaces. Comput. Graph. Forum 36 (2017). https:\/\/api.semanticscholar.org\/CorpusID:33770033"},{"key":"18_CR60","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models"},{"key":"18_CR61","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric (2018). https:\/\/arxiv.org\/abs\/1801.03924","DOI":"10.1109\/CVPR.2018.00068"},{"key":"18_CR62","doi-asserted-by":"publisher","unstructured":"Zhu, Y., et al.: Denoising diffusion models for plug-and-play image restoration. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), pp. 1219\u20131229 (2023). https:\/\/doi.org\/10.1109\/CVPRW59228.2023.00129","DOI":"10.1109\/CVPRW59228.2023.00129"},{"key":"18_CR63","doi-asserted-by":"crossref","unstructured":"Zou, C., et al.: Legible compact calligrams. ACM Trans. Graph. (TOG) 35, 1\u201312 (2016). https:\/\/api.semanticscholar.org\/CorpusID:5536512","DOI":"10.1145\/2897824.2925887"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-92808-6_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,27]],"date-time":"2025-05-27T15:59:35Z","timestamp":1748361575000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-92808-6_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031928079","9783031928086"],"references-count":63,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-92808-6_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}