{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T14:03:48Z","timestamp":1772719428947,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,12,10]],"date-time":"2023-12-10T00:00:00Z","timestamp":1702166400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,12,10]]},"DOI":"10.1145\/3610548.3618180","type":"proceedings-article","created":{"date-parts":[[2023,12,11]],"date-time":"2023-12-11T12:28:40Z","timestamp":1702297720000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":22,"title":["Diffusing Colors: Image Colorization with Text Guided Diffusion"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5414-0680","authenticated-orcid":false,"given":"Nir","family":"Zabari","sequence":"first","affiliation":[{"name":"Lightricks, Israel"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4421-8477","authenticated-orcid":false,"given":"Aharon","family":"Azulay","sequence":"additional","affiliation":[{"name":"Lightricks, Israel"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4134-3472","authenticated-orcid":false,"given":"Alexey","family":"Gorkor","sequence":"additional","affiliation":[{"name":"Lightricks, Israel"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9288-5392","authenticated-orcid":false,"given":"Tavi","family":"Halperin","sequence":"additional","affiliation":[{"name":"Lightricks, Israel"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7109-4006","authenticated-orcid":false,"given":"Ohad","family":"Fried","sequence":"additional","affiliation":[{"name":"Reichman University, Israel"}]}],"member":"320","published-online":{"date-parts":[[2023,12,11]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Jason Antic. 2019. DeOldify: A open-source project for colorizing old images (and video). (2019)."},{"key":"e_1_3_2_2_2_1","volume-title":"Blended Latent Diffusion. arXiv preprint arXiv:2206.02779","author":"Avrahami Omri","year":"2022","unstructured":"Omri Avrahami, Ohad Fried, and Dani Lischinski. 2022a. Blended Latent Diffusion. arXiv preprint arXiv:2206.02779 (2022)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01762"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01767"},{"key":"e_1_3_2_2_5_1","volume-title":"Cold Diffusion: Inverting Arbitrary Image Transforms Without Noise. ArXiv abs\/2208.09392","author":"Bansal Arpit","year":"2022","unstructured":"Arpit Bansal, Eitan Borgnia, Hong-Min Chu, Jie Li, Hamideh Kazemi, Furong Huang, Micah Goldblum, Jonas Geiping, and Tom Goldstein. 2022. Cold Diffusion: Inverting Arbitrary Image Transforms Without Noise. ArXiv abs\/2208.09392 (2022)."},{"key":"e_1_3_2_2_6_1","volume-title":"Language Models are Few-Shot Learners. ArXiv abs\/2005.14165","author":"Brown B.","year":"2020","unstructured":"Tom\u00a0B. Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, T.\u00a0J. Henighan, Rewon Child, Aditya Ramesh, Daniel\u00a0M. Ziegler, Jeff Wu, Clemens Winter, Christopher Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. 2020. Language Models are Few-Shot Learners. ArXiv abs\/2005.14165 (2020)."},{"key":"e_1_3_2_2_7_1","volume-title":"COCO-Stuff: Thing and Stuff Classes in Context. 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Caesar Holger","year":"2016","unstructured":"Holger Caesar, Jasper R.\u00a0R. Uijlings, and Vittorio Ferrari. 2016. COCO-Stuff: Thing and Stuff Classes in Context. 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2016), 1209\u20131218."},{"key":"e_1_3_2_2_8_1","volume-title":"L-CoDer: Language-Based Colorization with Color-Object Decoupling Transformer. In European Conference on Computer Vision.","author":"Chang Zheng","year":"2022","unstructured":"Zheng Chang, Shuchen Weng, Yu Li, Si Li, and Boxin Shi. 2022. L-CoDer: Language-Based Colorization with Color-Object Decoupling Transformer. In European Conference on Computer Vision."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_2_10_1","volume-title":"Diffusion Models Beat GANs on Image Synthesis. ArXiv abs\/2105.05233","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alex Nichol. 2021. Diffusion Models Beat GANs on Image Synthesis. ArXiv abs\/2105.05233 (2021)."},{"key":"e_1_3_2_2_11_1","volume-title":"StyleGAN-NADA: CLIP-Guided Domain Adaptation of Image Generators. ArXiv abs\/2108.00946","author":"Gal Rinon","year":"2021","unstructured":"Rinon Gal, Or Patashnik, Haggai Maron, Gal Chechik, and Daniel Cohen-Or. 2021. StyleGAN-NADA: CLIP-Guided Domain Adaptation of Image Generators. ArXiv abs\/2108.00946 (2021)."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1117\/12.477378"},{"key":"e_1_3_2_2_13_1","volume-title":"Prompt-to-Prompt Image Editing with Cross Attention Control. ArXiv abs\/2208.01626","author":"Hertz Amir","year":"2022","unstructured":"Amir Hertz, Ron Mokady, Jay\u00a0M. Tenenbaum, Kfir Aberman, Yael Pritch, and Daniel Cohen-Or. 2022. Prompt-to-Prompt Image Editing with Cross Attention Control. ArXiv abs\/2208.01626 (2022)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295408"},{"key":"e_1_3_2_2_15_1","volume-title":"Imagen Video: High Definition Video Generation with Diffusion Models. ArXiv abs\/2210.02303","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho, William Chan, Chitwan Saharia, Jay Whang, Ruiqi Gao, Alexey\u00a0A. Gritsenko, Diederik\u00a0P. Kingma, Ben Poole, Mohammad Norouzi, David\u00a0J. Fleet, and Tim Salimans. 2022a. Imagen Video: High Definition Video Generation with Diffusion Models. ArXiv abs\/2210.02303 (2022)."},{"key":"e_1_3_2_2_16_1","unstructured":"Jonathan Ho Ajay Jain and P. Abbeel. 2020. Denoising Diffusion Probabilistic Models. ArXiv abs\/2006.11239 (2020)."},{"key":"e_1_3_2_2_17_1","article-title":"Cascaded Diffusion Models for High Fidelity Image Generation","volume":"23","author":"Ho Jonathan","year":"2021","unstructured":"Jonathan Ho, Chitwan Saharia, William Chan, David\u00a0J. Fleet, Mohammad Norouzi, and Tim Salimans. 2021. Cascaded Diffusion Models for High Fidelity Image Generation. J. Mach. Learn. Res. 23 (2021), 47:1\u201347:33.","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_2_18_1","unstructured":"Jonathan Ho and Tim Salimans. 2022. Classifier-Free Diffusion Guidance. arxiv:2207.12598\u00a0[cs.LG]"},{"key":"e_1_3_2_2_19_1","volume-title":"Video Diffusion Models. ArXiv abs\/2204.03458","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho, Tim Salimans, Alexey Gritsenko, William Chan, Mohammad Norouzi, and David\u00a0J. Fleet. 2022b. Video Diffusion Models. ArXiv abs\/2204.03458 (2022)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550454.3555457"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19787-1_2"},{"key":"e_1_3_2_2_22_1","volume-title":"Imagic: Text-Based Real Image Editing with Diffusion Models. ArXiv abs\/2210.09276","author":"Kawar Bahjat","year":"2022","unstructured":"Bahjat Kawar, Shiran Zada, Oran Lang, Omer Tov, Hui-Tang Chang, Tali Dekel, Inbar Mosseri, and Michal Irani. 2022. Imagic: Text-Based Real Image Editing with Diffusion Models. ArXiv abs\/2210.09276 (2022)."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20071-7_21"},{"key":"e_1_3_2_2_24_1","volume-title":"DiffWave: A Versatile Diffusion Model for Audio Synthesis. ArXiv abs\/2009.09761","author":"Kong Zhifeng","year":"2020","unstructured":"Zhifeng Kong, Wei Ping, Jiaji Huang, Kexin Zhao, and Bryan Catanzaro. 2020. DiffWave: A Versatile Diffusion Model for Audio Synthesis. ArXiv abs\/2009.09761 (2020)."},{"key":"e_1_3_2_2_25_1","volume-title":"ArXiv abs\/2102.04432","author":"Kumar Manoj","year":"2021","unstructured":"Manoj Kumar, Dirk Weissenborn, and Nal Kalchbrenner. 2021. Colorization Transformer. ArXiv abs\/2102.04432 (2021)."},{"key":"e_1_3_2_2_26_1","volume-title":"Learning Representations for Automatic Colorization. In European Conference on Computer Vision.","author":"Larsson Gustav","year":"2016","unstructured":"Gustav Larsson, Michael Maire, and Gregory Shakhnarovich. 2016a. Learning Representations for Automatic Colorization. In European Conference on Computer Vision."},{"key":"e_1_3_2_2_27_1","volume-title":"Learning Representations for Automatic Colorization. In European Conference on Computer Vision.","author":"Larsson Gustav","year":"2016","unstructured":"Gustav Larsson, Michael Maire, and Gregory Shakhnarovich. 2016b. Learning Representations for Automatic Colorization. In European Conference on Computer Vision."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1186562.1015780"},{"key":"e_1_3_2_2_29_1","volume-title":"Improved Diffusion-based Image Colorization via Piggybacked Models. ArXiv abs\/2304.11105","author":"Liu Hanyuan","year":"2023","unstructured":"Hanyuan Liu, Jinbo Xing, Minshan Xie, Chengze Li, and Tien-Tsin Wong. 2023. Improved Diffusion-based Image Colorization via Piggybacked Models. ArXiv abs\/2304.11105 (2023). https:\/\/api.semanticscholar.org\/CorpusID:258291599"},{"key":"e_1_3_2_2_30_1","volume-title":"Learning to Color from Language. ArXiv abs\/1804.06026","author":"Manjunatha Varun","year":"2018","unstructured":"Varun Manjunatha, Mohit Iyyer, Jordan\u00a0L. Boyd-Graber, and Larry\u00a0S. Davis. 2018. Learning to Color from Language. ArXiv abs\/1804.06026 (2018)."},{"key":"e_1_3_2_2_31_1","volume-title":"Grad-TTS: A Diffusion Probabilistic Model for Text-to-Speech. In International Conference on Machine Learning.","author":"Popov Vadim","year":"2021","unstructured":"Vadim Popov, Ivan Vovk, Vladimir Gogoryan, Tasnima Sadekova, and Mikhail Kudinov. 2021. Grad-TTS: A Diffusion Probabilistic Model for Text-to-Speech. In International Conference on Machine Learning."},{"key":"e_1_3_2_2_32_1","volume-title":"International conference on machine learning. PMLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_2_2_33_1","volume-title":"Hierarchical Text-Conditional Image Generation with CLIP Latents. ArXiv abs\/2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical Text-Conditional Image Generation with CLIP Latents. ArXiv abs\/2204.06125 (2022)."},{"key":"e_1_3_2_2_34_1","volume-title":"Zero-Shot Text-to-Image Generation. ArXiv abs\/2102.12092","author":"Ramesh Aditya","year":"2021","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-Shot Text-to-Image Generation. ArXiv abs\/2102.12092 (2021)."},{"key":"e_1_3_2_2_35_1","volume-title":"High-Resolution Image Synthesis with Latent Diffusion Models. 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Rombach Robin","year":"2021","unstructured":"Robin Rombach, A. Blattmann, Dominik Lorenz, Patrick Esser, and Bj\u00f6rn Ommer. 2021. High-Resolution Image Synthesis with Latent Diffusion Models. 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2021), 10674\u201310685."},{"key":"e_1_3_2_2_36_1","volume-title":"Palette: Image-to-Image Diffusion Models. arxiv:2111.05826\u00a0[cs.CV]","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Huiwen Chang, Chris\u00a0A. Lee, Jonathan Ho, Tim Salimans, David\u00a0J. Fleet, and Mohammad Norouzi. 2022a. Palette: Image-to-Image Diffusion Models. arxiv:2111.05826\u00a0[cs.CV]"},{"key":"e_1_3_2_2_37_1","volume-title":"Burcu\u00a0Karagol Ayan, Seyedeh\u00a0Sara Mahdavi, Raphael\u00a0Gontijo Lopes, Tim Salimans, Jonathan Ho, David\u00a0J. Fleet, and Mohammad Norouzi.","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily\u00a0L. Denton, Seyed Kamyar\u00a0Seyed Ghasemipour, Burcu\u00a0Karagol Ayan, Seyedeh\u00a0Sara Mahdavi, Raphael\u00a0Gontijo Lopes, Tim Salimans, Jonathan Ho, David\u00a0J. Fleet, and Mohammad Norouzi. 2022b. Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding. ArXiv abs\/2205.11487 (2022)."},{"key":"e_1_3_2_2_38_1","volume-title":"Instance-Aware Image Colorization. 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Su Jheng-Wei","year":"2020","unstructured":"Jheng-Wei Su, Hung kuo Chu, and Jia-Bin Huang. 2020. Instance-Aware Image Colorization. 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2020), 7965\u20137974."},{"key":"e_1_3_2_2_39_1","volume-title":"ChromaGAN: Adversarial Picture Colorization with Semantic Class Distribution. 2020 IEEE Winter Conference on Applications of Computer Vision (WACV)","author":"Vitoria Patricia","year":"2019","unstructured":"Patricia Vitoria, Lara Raad, and Coloma Ballester. 2019. ChromaGAN: Adversarial Picture Colorization with Semantic Class Distribution. 2020 IEEE Winter Conference on Applications of Computer Vision (WACV) (2019), 2434\u20132443."},{"key":"e_1_3_2_2_40_1","volume-title":"Towards Real-World Blind Face Restoration with Generative Facial Prior. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Wang Xintao","year":"2021","unstructured":"Xintao Wang, Yu Li, Honglun Zhang, and Ying Shan. 2021. Towards Real-World Blind Face Restoration with Generative Facial Prior. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_41_1","volume-title":"Zero-Shot Image Restoration Using Denoising Diffusion Null-Space Model. ArXiv abs\/2212.00490","author":"Wang Yinhuai","year":"2022","unstructured":"Yinhuai Wang, Jiwen Yu, and Jian Zhang. 2022. Zero-Shot Image Restoration Using Denoising Diffusion Null-Space Model. ArXiv abs\/2212.00490 (2022)."},{"key":"e_1_3_2_2_42_1","volume-title":"L-CoDe: Language-Based Colorization Using Color-Object Decoupled Conditions. In AAAI Conference on Artificial Intelligence.","author":"Weng Shuchen","year":"2022","unstructured":"Shuchen Weng, Hao Wu, Zheng Chang, Jiajun Tang, Si Li, and Boxin Shi. 2022. L-CoDe: Language-Based Colorization Using Color-Object Decoupled Conditions. In AAAI Conference on Artificial Intelligence."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01411"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3550454.3555432"},{"key":"e_1_3_2_2_45_1","volume-title":"Adding Conditional Control to Text-to-Image Diffusion Models. ArXiv abs\/2302.05543","author":"Zhang Lvmin","year":"2023","unstructured":"Lvmin Zhang and Maneesh Agrawala. 2023. Adding Conditional Control to Text-to-Image Diffusion Models. ArXiv abs\/2302.05543 (2023)."},{"key":"e_1_3_2_2_46_1","volume-title":"Colorful Image Colorization. In European Conference on Computer Vision.","author":"Zhang Richard","year":"2016","unstructured":"Richard Zhang, Phillip Isola, and Alexei\u00a0A. Efros. 2016. Colorful Image Colorization. In European Conference on Computer Vision."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073710"}],"event":{"name":"SA '23: SIGGRAPH Asia 2023","location":"Sydney NSW Australia","acronym":"SA '23","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["SIGGRAPH Asia 2023 Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610548.3618180","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3610548.3618180","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T09:34:19Z","timestamp":1755768859000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610548.3618180"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,10]]},"references-count":47,"alternative-id":["10.1145\/3610548.3618180","10.1145\/3610548"],"URL":"https:\/\/doi.org\/10.1145\/3610548.3618180","relation":{},"subject":[],"published":{"date-parts":[[2023,12,10]]},"assertion":[{"value":"2023-12-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}