{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T08:15:52Z","timestamp":1774685752367,"version":"3.50.1"},"publisher-location":"Cham","reference-count":26,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031734960","type":"print"},{"value":"9783031734977","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,16]],"date-time":"2024-11-16T00:00:00Z","timestamp":1731715200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,16]],"date-time":"2024-11-16T00:00:00Z","timestamp":1731715200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73497-7_3","type":"book-chapter","created":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T04:01:20Z","timestamp":1731643280000},"page":"28-40","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Leveraging LLMs for\u00a0On-the-Fly Instruction Guided Image Editing"],"prefix":"10.1007","author":[{"given":"Rodrigo","family":"Santos","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jo\u00e3o","family":"Silva","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ant\u00f3nio","family":"Branco","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,16]]},"reference":[{"key":"3_CR1","unstructured":"Almazrouei, E., Alobeidli, H., Alshamsi, A., et\u00a0al.: The Falcon series of language models: towards open frontier models (2023)"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: InstructPix2Pix: learning to follow image editing instructions. In: Proceedings of the IEEE\/CVF, pp. 18392\u201318402 (2023)","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"3_CR3","unstructured":"Brown, T., Mann, B., et\u00a0al.: Language models are few-shot learners. arXiv:2005.14165 (2020)"},{"key":"3_CR4","doi-asserted-by":"crossref","unstructured":"Cheng, Y., Gan, Z., Li, Y., et\u00a0al.: Sequential attention GAN for interactive image editing. In: Proceedings of the 28th ACM, pp. 4383\u20134391 (2020)","DOI":"10.1145\/3394171.3413551"},{"key":"3_CR5","unstructured":"Gunasekar, S., et\u00a0al.: Textbooks are all you need. arXiv:2306.11644 (2023)"},{"key":"3_CR6","unstructured":"Hertz, A., Mokady, R., Tenenbaum, J., et\u00a0al.: Prompt-to-prompt image editing with cross attention control. arXiv:2208.01626 (2022)"},{"key":"3_CR7","unstructured":"Jiang, A.Q., Sablayrolles, A., Mensch, A., et\u00a0al.: Mistral 7B (2023)"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Jiang, W., Xu, N., et\u00a0al.: Language-guided global image editing via cross-modal cyclic mechanism. In: Proceedings of the IEEE\/CVF, pp. 2115\u20132124 (2021)","DOI":"10.1109\/ICCV48922.2021.00212"},{"key":"3_CR9","unstructured":"Li, J., Li, D., et\u00a0al.: Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In: ICML, pp. 12888\u201312900 (2022)"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Mokady, R., Hertz, A., et\u00a0al.: Null-text inversion for editing real images using guided diffusion models. In: Proceedings of the IEEE\/CVF, pp. 6038\u20136047 (2023)","DOI":"10.1109\/CVPR52729.2023.00585"},{"key":"3_CR11","unstructured":"Os\u00f3rio, et\u00a0al.: Portulan extraglue datasets and models: kick-starting a benchmark for the neural processing of Portuguese. In: BUCC Workshop, pp. 24\u201334 (2024)"},{"key":"3_CR12","doi-asserted-by":"crossref","unstructured":"Parmar, G., Kumar\u00a0Singh, K., Zhang, R., et\u00a0al.: Zero-shot image-to-image translation. In: ACM SIGGRAPH 2023 Conference Proceedings, pp. 1\u201311 (2023)","DOI":"10.1145\/3588432.3591513"},{"key":"3_CR13","unstructured":"Radford, A., Kim, J.W., Hallacy, C., et\u00a0al.: Learning transferable visual models from natural language supervision. arXiv:2103.00020 (2021)"},{"key":"3_CR14","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., et\u00a0al.: Hierarchical text-conditional image generation with CLIP latents. arXiv:2204.06125 (2022)"},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Rodrigues, J., Gomes, L., Silva, J., Branco, A., Santos, R., et\u00a0al.: Advancing neural encoding of Portuguese with transformer Albertina pt. In: EPIA, pp. 441\u2013453 (2023)","DOI":"10.1007\/978-3-031-49008-8_35"},{"key":"3_CR16","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., et\u00a0al.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"3_CR17","doi-asserted-by":"crossref","unstructured":"Santos, R., Branco, A., Silva, J.: Language driven image editing via transformers. In: 2022 IEEE 34th ICTAI, pp. 909\u2013914 (2022)","DOI":"10.1109\/ICTAI56018.2022.00139"},{"key":"3_CR18","unstructured":"Santos, R., Branco, A., Silva, J.R.: Cost-effective language driven image editing with LX-DRIM. In: Proceedings of the 1st MMMPIE Workshop, pp. 31\u201343 (2022)"},{"key":"3_CR19","unstructured":"Santos, R., Rodrigues, J., et\u00a0al.: Fostering the ecosystem of open neural encoders for Portuguese with Albertina PT* family. In: SIGUL workshop, pp. 105\u2013114 (2024)"},{"key":"3_CR20","unstructured":"Santos, R., Silva, J., et\u00a0al.: Advancing generative AI for Portuguese with open decoder Gerv\u00e1sio PT*. In: SIGUL Workshop, pp. 16\u201326 (2024)"},{"key":"3_CR21","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv:2010.02502 (2020)"},{"key":"3_CR22","unstructured":"Touvron, H., Martin, L., Stone, K., et\u00a0al.: LLaMA\u00a02: open foundation and fine-tuned chat models. arXiv:2307.09288 (2023)"},{"key":"3_CR23","unstructured":"Yang, C., Wang, X., Lu, Y., et\u00a0al.: Large language models as optimizers. arXiv:2309.03409 (2023)"},{"key":"3_CR24","unstructured":"Zhang, K., Mo, L., Chen, W., et\u00a0al.: MagicBrush: a manually annotated dataset for instruction-guided image editing. NeurIPS 36 (2024)"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"Zhang, S., Yang, X., Feng, Y., et\u00a0al.: HIVE: harnessing human feedback for instructional visual editing. arXiv:2303.09618 (2023)","DOI":"10.1109\/CVPR52733.2024.00862"},{"key":"3_CR26","unstructured":"Zhuang, P., Koyejo, O., Schwing, A.G.: Enjoy your editing: controllable GANs for image editing via latent space navigation. arXiv:2102.01187 (2021)"}],"container-title":["Lecture Notes in Computer Science","Progress in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73497-7_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T05:07:18Z","timestamp":1731647238000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73497-7_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,16]]},"ISBN":["9783031734960","9783031734977"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73497-7_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,16]]},"assertion":[{"value":"16 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"EPIA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"EPIA Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Viana do Castelo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"epia2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/epia2024.pt","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}