{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:45:12Z","timestamp":1767339912231,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":86,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,12]],"date-time":"2023-06-12T00:00:00Z","timestamp":1686528000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"name":"JSPS KAKENHI","award":["23H00497,20K19822"],"award-info":[{"award-number":["23H00497,20K19822"]}]},{"name":"JST FOREST","award":["JPMJFR216O"],"award-info":[{"award-number":["JPMJFR216O"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,12]]},"DOI":"10.1145\/3591106.3592262","type":"proceedings-article","created":{"date-parts":[[2023,6,8]],"date-time":"2023-06-08T22:33:38Z","timestamp":1686263618000},"page":"199-208","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":24,"title":["Not Only Generative Art: Stable Diffusion for Content-Style Disentanglement in Art Analysis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-7175-8307","authenticated-orcid":false,"given":"Yankun","family":"Wu","sequence":"first","affiliation":[{"name":"Osaka University, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8000-3567","authenticated-orcid":false,"given":"Yuta","family":"Nakashima","sequence":"additional","affiliation":[{"name":"Osaka University, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9200-6359","authenticated-orcid":false,"given":"Noa","family":"Garcia","sequence":"additional","affiliation":[{"name":"Osaka University, Japan"}]}],"member":"320","published-online":{"date-parts":[[2023,6,12]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"crossref","unstructured":"Zechen Bai Yuta Nakashima and Noa Garcia. 2021. Explain me the painting: Multi-topic knowledgeable art description generation. In ICCV.","DOI":"10.1109\/ICCV48922.2021.00537"},{"key":"e_1_3_2_2_2_1","volume-title":"Alessio\u00a0Del Bue, and Jo\u00e3o\u00a0Paulo Costeira.","author":"Carneiro Gustavo","year":"2012","unstructured":"Gustavo Carneiro, Nuno Pinho\u00a0da Silva, Alessio\u00a0Del Bue, and Jo\u00e3o\u00a0Paulo Costeira. 2012. Artistic image classification: An analysis on the printart database. In ECCV."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"crossref","unstructured":"Mathilde Caron Hugo Touvron Ishan Misra Herv\u00e9 J\u00e9gou Julien Mairal Piotr Bojanowski and Armand Joulin. 2021. Emerging properties in self-supervised vision transformers. In ICCV.","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"e_1_3_2_2_4_1","volume-title":"Towards generating and evaluating iconographic image captions of artworks. Journal of Imaging","author":"Cetinic Eva","year":"2021","unstructured":"Eva Cetinic. 2021. Towards generating and evaluating iconographic image captions of artworks. Journal of Imaging (2021)."},{"key":"e_1_3_2_2_5_1","volume-title":"Fine-tuning convolutional neural networks for fine art classification. Expert Systems with Applications","author":"Cetinic Eva","year":"2018","unstructured":"Eva Cetinic, Tomislav Lipic, and Sonja Grgic. 2018. Fine-tuning convolutional neural networks for fine art classification. Expert Systems with Applications (2018)."},{"key":"e_1_3_2_2_6_1","volume-title":"Dualast: Dual style-learning networks for artistic style transfer. In CVPR.","author":"Chen Haibo","year":"2021","unstructured":"Haibo Chen, Lei Zhao, Zhizhong Wang, Huiming Zhang, Zhiwen Zuo, Ailin Li, Wei Xing, and Dongming Lu. 2021. Dualast: Dual style-learning networks for artistic style transfer. In CVPR."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"crossref","unstructured":"Haibo Chen Lei Zhao Huiming Zhang Zhizhong Wang Zhiwen Zuo Ailin Li Wei Xing and Dongming Lu. 2021. Diverse image style transfer via invertible cross-space mapping. In ICCV.","DOI":"10.1109\/ICCV48922.2021.01461"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"crossref","unstructured":"Liyi Chen and Jufeng Yang. 2019. Recognizing the style of visual arts via adaptive cross-layer correlation. In ACM MM.","DOI":"10.1145\/3343031.3350977"},{"key":"e_1_3_2_2_9_1","unstructured":"Ting Chen Simon Kornblith Mohammad Norouzi and Geoffrey Hinton. 2020. A simple framework for contrastive learning of visual representations. In ICML."},{"key":"e_1_3_2_2_10_1","volume-title":"InfoGAN: Interpretable representation learning by information maximizing generative adversarial nets. NeurIPS","author":"Chen Xi","year":"2016","unstructured":"Xi Chen, Yan Duan, Rein Houthooft, John Schulman, Ilya Sutskever, and Pieter Abbeel. 2016. InfoGAN: Interpretable representation learning by information maximizing generative adversarial nets. NeurIPS (2016)."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"crossref","unstructured":"Xinlei Chen and Kaiming He. 2021. Exploring simple siamese representation learning. In CVPR.","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"crossref","unstructured":"Ruizhe Cheng Bichen Wu Peizhao Zhang Peter Vajda and Joseph\u00a0E Gonzalez. 2021. Data-efficient language-supervised zero-shot learning with self-distillation. In CVPR.","DOI":"10.1109\/CVPRW53098.2021.00348"},{"key":"e_1_3_2_2_13_1","volume-title":"Image style classification based on learnt deep correlation features. Transactions on Multimedia","author":"Chu Wei-Ta","year":"2018","unstructured":"Wei-Ta Chu and Yi-Ling Wu. 2018. Image style classification based on learnt deep correlation features. Transactions on Multimedia (2018)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"crossref","unstructured":"John Collomosse Tu Bui Michael\u00a0J Wilber Chen Fang and Hailin Jin. 2017. Sketching with style: Visual search with sketches and aesthetic context. In ICCV.","DOI":"10.1109\/ICCV.2017.290"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"crossref","unstructured":"Elliot\u00a0J Crowley and Andrew Zisserman. 2014. The state of the art: Object retrieval in paintings using discriminative regions. In BMVC.","DOI":"10.5244\/C.28.38"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"crossref","unstructured":"Yingying Deng Fan Tang Weiming Dong Wen Sun Feiyue Huang and Changsheng Xu. 2020. Arbitrary style transfer via multi-adaptation network. In ACM MM.","DOI":"10.1145\/3394171.3414015"},{"key":"e_1_3_2_2_17_1","volume-title":"Unsupervised learning of disentangled representations from video. NeurIPS","author":"L Denton","year":"2017","unstructured":"Emily\u00a0L Denton 2017. Unsupervised learning of disentangled representations from video. NeurIPS (2017)."},{"key":"e_1_3_2_2_18_1","volume-title":"CogView: Mastering text-to-image generation via transformers. NeurIPS","author":"Ding Ming","year":"2021","unstructured":"Ming Ding, Zhuoyi Yang, Wenyi Hong, Wendi Zheng, Chang Zhou, Da Yin, Junyang Lin, Xu Zou, Zhou Shao, Hongxia Yang, 2021. CogView: Mastering text-to-image generation via transformers. NeurIPS (2021)."},{"key":"e_1_3_2_2_19_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly 2021. An image is worth 16x16 words: Transformers for image recognition at scale. In ICLR."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"crossref","unstructured":"Cheikh\u00a0Brahim El\u00a0Vaigh Noa Garcia Benjamin Renoust Chenhui Chu Yuta Nakashima and Hajime Nagahara. 2021. GCNBoost: Artwork classification by label propagation through a knowledge graph. In ICMR.","DOI":"10.1145\/3460426.3463636"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"crossref","unstructured":"Ahmed Elgammal Bingchen Liu Diana Kim Mohamed Elhoseiny and Marian Mazzone. 2018. The shape of art history in the eyes of the machine. In AAAI.","DOI":"10.1609\/aaai.v32i1.11894"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/EUSIPCO.2016.7760382"},{"key":"e_1_3_2_2_23_1","volume-title":"Improving style-content disentanglement in image-to-image translation. arXiv preprint arXiv:2007.04964","author":"Gabbay Aviv","year":"2020","unstructured":"Aviv Gabbay and Yedid Hoshen. 2020. Improving style-content disentanglement in image-to-image translation. arXiv preprint arXiv:2007.04964 (2020)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"crossref","unstructured":"Noa Garcia Benjamin Renoust and Yuta Nakashima. 2019. Context-aware embeddings for automatic art analysis. In ICMR.","DOI":"10.1145\/3323873.3325028"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/s13735-019-00189-4"},{"key":"e_1_3_2_2_26_1","volume-title":"ECCV Workshops.","author":"Garcia Noa","year":"2018","unstructured":"Noa Garcia and George Vogiatzis. 2018. How to read paintings: semantic art understanding with multi-modal retrieval. In ECCV Workshops."},{"key":"e_1_3_2_2_27_1","volume-title":"A Neural Algorithm of Artistic Style. Nature Communications","author":"Gatys LA","year":"2015","unstructured":"LA Gatys, AS Ecker, and M Bethge. 2015. A Neural Algorithm of Artistic Style. Nature Communications (2015)."},{"key":"e_1_3_2_2_28_1","volume-title":"Texture synthesis using convolutional neural networks. NeurIPS","author":"Gatys Leon","year":"2015","unstructured":"Leon Gatys, Alexander\u00a0S Ecker, and Matthias Bethge. 2015. Texture synthesis using convolutional neural networks. NeurIPS (2015)."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"crossref","unstructured":"Leon\u00a0A Gatys Alexander\u00a0S Ecker and Matthias Bethge. 2016. Image style transfer using convolutional neural networks. In CVPR.","DOI":"10.1109\/CVPR.2016.265"},{"key":"e_1_3_2_2_30_1","volume-title":"Weakly Supervised Object Detection in Artworks. In ECCV Workshops.","author":"Gonthier Nicolas","year":"2018","unstructured":"Nicolas Gonthier, Yann Gousseau, Said Ladjal, and Olivier Bonfait. 2018. Weakly Supervised Object Detection in Artworks. In ECCV Workshops."},{"key":"e_1_3_2_2_31_1","volume-title":"Joost Van De\u00a0Weijer, and Yoshua Bengio","author":"Gonzalez-Garcia Abel","year":"2018","unstructured":"Abel Gonzalez-Garcia, Joost Van De\u00a0Weijer, and Yoshua Bengio. 2018. Image-to-image translation for cross-domain disentanglement. NeurIPS (2018)."},{"key":"e_1_3_2_2_32_1","unstructured":"Kaiming He X. Zhang Shaoqing Ren and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In CVPR."},{"key":"e_1_3_2_2_33_1","unstructured":"Irina Higgins Loic Matthey Arka Pal Christopher Burgess Xavier Glorot Matthew Botvinick Shakir Mohamed and Alexander Lerchner. 2017. \u03b2 -VAE: Learning basic visual concepts with a constrained variational framework. In ICLR."},{"key":"e_1_3_2_2_34_1","volume-title":"Denoising diffusion probabilistic models. NeurIPS","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. NeurIPS (2020)."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"crossref","unstructured":"Xun Huang and Serge Belongie. 2017. Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization. In ICCV.","DOI":"10.1109\/ICCV.2017.167"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"crossref","unstructured":"Xun Huang Ming-Yu Liu Serge Belongie and Jan Kautz. 2018. Multimodal unsupervised image-to-image translation. In ECCV.","DOI":"10.1007\/978-3-030-01219-9_11"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"crossref","unstructured":"Sergey Karayev Matthew Trentacoste Helen Han Aseem Agarwala Trevor Darrell Aaron Hertzmann and Holger Winnemoeller. 2014. Recognizing image style. In BMVC.","DOI":"10.5244\/C.28.122"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"crossref","unstructured":"Hadi Kazemi Seyed\u00a0Mehdi Iranmanesh and Nasser Nasrabadi. 2019. Style and content disentanglement in generative adversarial networks. In WACV.","DOI":"10.1109\/WACV.2019.00095"},{"key":"e_1_3_2_2_39_1","volume-title":"Khan and Nanne van Noord","author":"J.","year":"2021","unstructured":"Selina\u00a0J. Khan and Nanne van Noord. 2021. Stylistic Multi-Task Analysis of Ukiyo-e Woodblock Prints. In BMVC."},{"key":"e_1_3_2_2_40_1","unstructured":"Valentin Khrulkov Leyla Mirvakhabova Ivan Oseledets and Artem Babenko. 2022. Disentangled representations from non-disentangled models. In ICLR."},{"key":"e_1_3_2_2_41_1","unstructured":"Gwanghyun Kim Taesung Kwon and Jong\u00a0Chul Ye. 2022. DiffusionCLIP: Text-Guided Diffusion Models for Robust Image Manipulation. In CVPR."},{"key":"e_1_3_2_2_42_1","volume-title":"Adam: A method for stochastic optimization. In ICLR.","author":"Kingma P","year":"2015","unstructured":"Diederick\u00a0P Kingma and Jimmy Ba. 2015. Adam: A method for stochastic optimization. In ICLR."},{"key":"e_1_3_2_2_43_1","unstructured":"Diederik\u00a0P Kingma and Max Welling. 2014. Auto-encoding variational bayes. In ICLR."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"crossref","unstructured":"Dmytro Kotovenko Artsiom Sanakoyeu Sabine Lang and Bjorn Ommer. 2019. Content and style disentanglement for artistic style transfer. In ICCV.","DOI":"10.1109\/ICCV.2019.00452"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"crossref","unstructured":"Gihyun Kwon and Jong\u00a0Chul Ye. 2022. CLIPstyler: Image style transfer with a single text condition. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01753"},{"key":"e_1_3_2_2_46_1","unstructured":"Gihyun Kwon and Jong\u00a0Chul Ye. 2023. Diffusion-based image translation using disentangled style and content representation. In ICLR."},{"key":"e_1_3_2_2_47_1","volume-title":"ECCV Workshops.","author":"Lang Sabine","year":"2018","unstructured":"Sabine Lang and Bjorn Ommer. 2018. Reflecting on how artworks are processed and analyzed by computer vision. In ECCV Workshops."},{"key":"e_1_3_2_2_48_1","volume-title":"Asian Conference on Machine Learning.","author":"Lecoutre Adrian","year":"2017","unstructured":"Adrian Lecoutre, Benjamin Negrevergne, and Florian Yger. 2017. Recognizing art style automatically in painting with deep learning. In Asian Conference on Machine Learning."},{"key":"e_1_3_2_2_49_1","unstructured":"Zhiheng Li Martin\u00a0Renqiang Min Kai Li and Chenliang Xu. 2022. StyleT2I: Toward Compositional and High-Fidelity Text-to-Image Synthesis. In CVPR."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"crossref","unstructured":"Wentong Liao Kai Hu Michael\u00a0Ying Yang and Bodo Rosenhahn. 2022. Text to image generation with semantic-spatial aware GAN. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01765"},{"key":"e_1_3_2_2_51_1","unstructured":"Luping Liu Yi Ren Zhijie Lin and Zhou Zhao. 2022. Pseudo numerical methods for diffusion models on manifolds. In ICLR."},{"key":"e_1_3_2_2_52_1","unstructured":"Xiao Liu Spyridon Thermos Gabriele Valvano Agisilaos Chartsias Alison O\u2019Neil and Sotirios\u00a0A Tsaftaris. 2021. Measuring the Biases and Effectiveness of Content-Style Disentanglement. In BMVC."},{"key":"e_1_3_2_2_53_1","unstructured":"Daiqian Ma Feng Gao Yan Bai Yihang Lou Shiqi Wang Tiejun Huang and Ling-Yu Duan. 2017. From part to whole: who is behind the painting?. In ACM MM."},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"crossref","unstructured":"Hui Mao Ming Cheung and James She. 2017. DeepArt: Learning joint representations of visual arts. In ACM MM.","DOI":"10.1145\/3123266.3123405"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"crossref","unstructured":"Thomas Mensink and Jan Van\u00a0Gemert. 2014. The rijksmuseum challenge: Museum-centered visual recognition. In ICMR.","DOI":"10.1145\/2578726.2578791"},{"key":"e_1_3_2_2_56_1","unstructured":"Alec Radford Jong\u00a0Wook Kim Chris Hallacy Aditya Ramesh Gabriel Goh Sandhini Agarwal Girish Sastry Amanda Askell Pamela Mishkin Jack Clark 2021. Learning transferable visual models from natural language supervision. In ICML."},{"key":"e_1_3_2_2_57_1","volume-title":"Hierarchical text-conditional image generation with CLIP latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with CLIP latents. arXiv preprint arXiv:2204.06125 (2022)."},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. High-resolution image synthesis with latent diffusion models. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"crossref","unstructured":"Dan Ruta Saeid Motiian Baldo Faieta Zhe Lin Hailin Jin Alex Filipkowski Andrew Gilbert and John Collomosse. 2021. ALADIN: all layer adaptive instance normalization for fine-grained style similarity. In ICCV.","DOI":"10.1109\/ICCV48922.2021.01171"},{"key":"e_1_3_2_2_60_1","volume-title":"ECCV Workshops.","author":"Sabatelli Matthia","year":"2018","unstructured":"Matthia Sabatelli, Mike Kestemont, Walter Daelemans, and Pierre Geurts. 2018. Deep transfer learning for art classification problems. In ECCV Workshops."},{"key":"e_1_3_2_2_61_1","volume-title":"Photorealistic text-to-image diffusion models with deep language understanding. NeurIPS","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily\u00a0L Denton, Kamyar Ghasemipour, Raphael Gontijo\u00a0Lopes, Burcu Karagol\u00a0Ayan, Tim Salimans, 2022. Photorealistic text-to-image diffusion models with deep language understanding. NeurIPS (2022)."},{"key":"e_1_3_2_2_62_1","volume-title":"International Journal for Digital Art History","author":"Saleh Babak","year":"2016","unstructured":"Babak Saleh and Ahmed Elgammal. 2016. Large-scale Classification of Fine-Art Paintings: Learning The Right Metric on The Right Feature. International Journal for Digital Art History (2016)."},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2907986"},{"key":"e_1_3_2_2_64_1","doi-asserted-by":"crossref","unstructured":"Mert\u00a0Bulent Sariyildiz Karteek Alahari Diane Larlus and Yannis Kalantidis. 2023. Fake it till you make it: Learning transferable representations from synthetic ImageNet clones. In CVPR.","DOI":"10.1109\/CVPR52729.2023.00774"},{"key":"e_1_3_2_2_65_1","doi-asserted-by":"crossref","unstructured":"Florian Schroff Dmitry Kalenichenko and James Philbin. 2015. FaceNet: A unified embedding for face recognition and clustering. In CVPR.","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"e_1_3_2_2_66_1","volume-title":"LAION-5B: An open large-scale dataset for training next generation image-text models. NeurIPS","author":"Schuhmann Christoph","year":"2022","unstructured":"Christoph Schuhmann, Romain Beaumont, Richard Vencu, Cade Gordon, Ross Wightman, Mehdi Cherti, Theo Coombes, Aarush Katta, Clayton Mullis, Mitchell Wortsman, 2022. LAION-5B: An open large-scale dataset for training next generation image-text models. NeurIPS (2022)."},{"key":"e_1_3_2_2_67_1","unstructured":"Huajie Shao Shuochao Yao Dachun Sun Aston Zhang Shengzhong Liu Dongxin Liu Jun Wang and Tarek Abdelzaher. 2020. ControlVAE: Controllable variational autoencoder. In ICML."},{"key":"e_1_3_2_2_68_1","doi-asserted-by":"crossref","unstructured":"Xi Shen Alexei\u00a0A Efros and Mathieu Aubry. 2019. Discovering visual patterns in art collections with spatially-consistent feature learning. In CVPR.","DOI":"10.1109\/CVPR.2019.00950"},{"key":"e_1_3_2_2_69_1","unstructured":"Yichun Shi Xiao Yang Yangyue Wan and Xiaohui Shen. 2022. SemanticStyleGAN: Learning Compositional Generative Priors for Controllable Image Synthesis and Editing. In CVPR."},{"key":"e_1_3_2_2_70_1","volume-title":"Improved deep metric learning with multi-class n-pair loss objective. Advances in neural information processing systems","author":"Sohn Kihyuk","year":"2016","unstructured":"Kihyuk Sohn. 2016. Improved deep metric learning with multi-class n-pair loss objective. Advances in neural information processing systems (2016)."},{"key":"e_1_3_2_2_71_1","doi-asserted-by":"crossref","unstructured":"Gjorgji Strezoski and Marcel Worring. 2018. OmniArt: a large-scale artistic benchmark. TOMM.","DOI":"10.1145\/3273022"},{"key":"e_1_3_2_2_72_1","volume-title":"KT-GAN: knowledge-transfer generative adversarial network for text-to-image synthesis. Transactions on Image Processing","author":"Tan Hongchen","year":"2020","unstructured":"Hongchen Tan, Xiuping Liu, Meng Liu, Baocai Yin, and Xin Li. 2020. KT-GAN: knowledge-transfer generative adversarial network for text-to-image synthesis. Transactions on Image Processing (2020)."},{"key":"e_1_3_2_2_73_1","volume-title":"Improved ArtGAN for Conditional Synthesis of Natural Image and Artwork. Transactions on Image Processing","author":"Tan Wei\u00a0Ren","year":"2019","unstructured":"Wei\u00a0Ren Tan, Chee\u00a0Seng Chan, Hernan Aguirre, and Kiyoshi Tanaka. 2019. Improved ArtGAN for Conditional Synthesis of Natural Image and Artwork. Transactions on Image Processing (2019)."},{"key":"e_1_3_2_2_74_1","unstructured":"Wei\u00a0Ren Tan Chee\u00a0Seng Chan Hern\u00e1n\u00a0E Aguirre and Kiyoshi Tanaka. 2016. Ceci n\u2019est pas une pipe: A deep convolutional network for fine-art paintings classification. In ICIP."},{"key":"e_1_3_2_2_75_1","doi-asserted-by":"crossref","unstructured":"Ming Tao Hao Tang Fei Wu Xiao-Yuan Jing Bing-Kun Bao and Changsheng Xu. 2022. DF-GAN: A Simple and Effective Baseline for Text-to-Image Synthesis. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01602"},{"key":"e_1_3_2_2_76_1","doi-asserted-by":"crossref","unstructured":"Luan Tran Xi Yin and Xiaoming Liu. 2017. Disentangled representation learning GAN for pose-invariant face recognition. In CVPR.","DOI":"10.1109\/CVPR.2017.141"},{"key":"e_1_3_2_2_77_1","doi-asserted-by":"crossref","unstructured":"Narek Tumanyan Omer Bar-Tal Shai Bagon and Tali Dekel. 2022. Splicing ViT Features for Semantic Appearance Transfer. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01048"},{"key":"e_1_3_2_2_78_1","volume-title":"Toward discovery of the artist\u2019s style: Learning to recognize artists by their artworks","author":"Van\u00a0Noord Nanne","year":"2015","unstructured":"Nanne Van\u00a0Noord, Ella Hendriks, and Eric Postma. 2015. Toward discovery of the artist\u2019s style: Learning to recognize artists by their artworks. IEEE Signal Processing Magazine (2015)."},{"key":"e_1_3_2_2_79_1","doi-asserted-by":"crossref","unstructured":"Michael\u00a0J Wilber Chen Fang Hailin Jin Aaron Hertzmann John Collomosse and Serge Belongie. 2017. BAM! The behance artistic media dataset for recognition beyond photography. In ICCV.","DOI":"10.1109\/ICCV.2017.136"},{"key":"e_1_3_2_2_80_1","volume-title":"Uncovering the Disentanglement Capability in Text-to-Image Diffusion Models. arXiv preprint arXiv:2212.08698","author":"Wu Qiucheng","year":"2022","unstructured":"Qiucheng Wu, Yujian Liu, Handong Zhao, Ajinkya Kale, Trung Bui, Tong Yu, Zhe Lin, Yang Zhang, and Shiyu Chang. 2022. Uncovering the Disentanglement Capability in Text-to-Image Diffusion Models. arXiv preprint arXiv:2212.08698 (2022)."},{"key":"e_1_3_2_2_81_1","doi-asserted-by":"crossref","unstructured":"Xin Xie Yi Li Huaibo Huang Haiyan Fu Wanwan Wang and Yanqing Guo. 2022. Artistic Style Discovery With Independent Components. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01925"},{"key":"e_1_3_2_2_82_1","unstructured":"Zipeng Xu Tianwei Lin Hao Tang Fu Li Dongliang He Nicu Sebe Radu Timofte Luc Van\u00a0Gool and Errui Ding. 2022. Predict Prevent and Evaluate: Disentangled Text-Driven Image Manipulation Empowered by Pre-Trained Vision-Language Model. In CVPR."},{"key":"e_1_3_2_2_83_1","unstructured":"Nikolaos-Antonios Ypsilantis Noa Garcia Guangxing Han Sarah Ibrahimi Nanne Van\u00a0Noord and Giorgos Tolias. 2021. The Met dataset: Instance-level recognition for artworks. In NeurIPS Datasets and Benchmarks Track."},{"key":"e_1_3_2_2_84_1","volume-title":"Multi-mapping image-to-image translation via learning disentanglement. NeurIPS","author":"Yu Xiaoming","year":"2019","unstructured":"Xiaoming Yu, Yuanqi Chen, Shan Liu, Thomas Li, and Ge Li. 2019. Multi-mapping image-to-image translation via learning disentanglement. NeurIPS (2019)."},{"key":"e_1_3_2_2_85_1","doi-asserted-by":"crossref","unstructured":"Renrui Zhang Ziyu Guo Wei Zhang Kunchang Li Xupeng Miao Bin Cui Yu Qiao Peng Gao and Hongsheng Li. 2022. PointCLIP: Point cloud understanding by CLIP. In CVPR.","DOI":"10.1109\/CVPR52688.2022.00836"},{"key":"e_1_3_2_2_86_1","doi-asserted-by":"crossref","unstructured":"Yufan Zhou Ruiyi Zhang Changyou Chen Chunyuan Li Chris Tensmeyer Tong Yu Jiuxiang Gu Jinhui Xu and Tong Sun. 2022. Towards Language-Free Training for Text-to-Image Generation. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01738"}],"event":{"name":"ICMR '23: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Thessaloniki Greece","acronym":"ICMR '23"},"container-title":["Proceedings of the 2023 ACM International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592262","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3591106.3592262","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:30Z","timestamp":1750178250000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592262"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,12]]},"references-count":86,"alternative-id":["10.1145\/3591106.3592262","10.1145\/3591106"],"URL":"https:\/\/doi.org\/10.1145\/3591106.3592262","relation":{},"subject":[],"published":{"date-parts":[[2023,6,12]]},"assertion":[{"value":"2023-06-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}