{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,25]],"date-time":"2026-01-25T11:26:38Z","timestamp":1769340398620,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":117,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,12,10]],"date-time":"2023-12-10T00:00:00Z","timestamp":1702166400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Adobe Inc."},{"name":"Sony Corporation"},{"name":"NSF","award":["IIS-2239076"],"award-info":[{"award-number":["IIS-2239076"]}]},{"name":"Naver Corporation"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,12,10]]},"DOI":"10.1145\/3610548.3618189","type":"proceedings-article","created":{"date-parts":[[2023,12,11]],"date-time":"2023-12-11T12:28:40Z","timestamp":1702297720000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Content-based Search for Deep Generative Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8733-6177","authenticated-orcid":false,"given":"Daohan","family":"Lu","sequence":"first","affiliation":[{"name":"Carnegie Mellon University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4000-2046","authenticated-orcid":false,"given":"Sheng-Yu","family":"Wang","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1799-1069","authenticated-orcid":false,"given":"Nupur","family":"Kumari","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-3525-5765","authenticated-orcid":false,"given":"Rohan","family":"Agarwal","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3553-3732","authenticated-orcid":false,"given":"Mia","family":"Tang","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1744-6765","authenticated-orcid":false,"given":"David","family":"Bau","sequence":"additional","affiliation":[{"name":"Northeastern University, United States of America"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8504-3410","authenticated-orcid":false,"given":"Jun-Yan","family":"Zhu","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2023,12,11]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"2022. Civit AI. https:\/\/civitai.com."},{"key":"e_1_3_2_2_2_1","unstructured":"2022. Stable Diffusion Dreambooth Concepts Library. https:\/\/huggingface.co\/sd-dreambooth-library."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3478513.3480559"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"crossref","unstructured":"Ivan Anokhin Kirill Demochkin Taras Khakhulin Gleb Sterkin Victor Lempitsky and Denis Korzhenkov. 2021. Image Generators with Conditionally-Independent Pixel Synthesis. In CVPR.","DOI":"10.1109\/CVPR46437.2021.01405"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"crossref","unstructured":"Relja Arandjelovi\u0107 and Andrew Zisserman. 2012. Three things everyone should know to improve object retrieval. In CVPR.","DOI":"10.1109\/CVPR.2012.6248018"},{"key":"e_1_3_2_2_6_1","unstructured":"Derek\u00a0Philip Au. 2019. This vessel does not exist.https:\/\/thisvesseldoesnotexist.com\/."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"crossref","unstructured":"Omri Avrahami Dani Lischinski and Ohad Fried. 2022. Blended diffusion for text-driven editing of natural images. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01767"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"crossref","unstructured":"Artem Babenko Anton Slesarev Alexandr Chigorin and Victor Lempitsky. 2014. Neural codes for image retrieval. In ECCV.","DOI":"10.1007\/978-3-319-10590-1_38"},{"key":"e_1_3_2_2_9_1","volume-title":"Modern information retrieval. Vol.\u00a0463","author":"Baeza-Yates Ricardo","unstructured":"Ricardo Baeza-Yates, Berthier Ribeiro-Neto, 1999. Modern information retrieval. Vol.\u00a0463. ACM press New York."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"crossref","unstructured":"David Bau Steven Liu Tongzhou Wang Jun-Yan Zhu and Antonio Torralba. 2020. Rewriting a deep generative model. In ECCV.","DOI":"10.1007\/978-3-030-58452-8_21"},{"key":"e_1_3_2_2_11_1","volume-title":"State-of-the-Art in the Architecture, Methods and Applications of StyleGAN. arXiv preprint arXiv:2202.14020","author":"Bermano H","year":"2022","unstructured":"Amit\u00a0H Bermano, Rinon Gal, Yuval Alaluf, Ron Mokady, Yotam Nitzan, Omer Tov, Or Patashnik, and Daniel Cohen-Or. 2022. State-of-the-Art in the Architecture, Methods and Applications of StyleGAN. arXiv preprint arXiv:2202.14020 (2022)."},{"key":"e_1_3_2_2_12_1","first-page":"15309","article-title":"Retrieval-augmented diffusion models","volume":"35","author":"Blattmann Andreas","year":"2022","unstructured":"Andreas Blattmann, Robin Rombach, Kaan Oktay, Jonas M\u00fcller, and Bj\u00f6rn Ommer. 2022. Retrieval-augmented diffusion models. Advances in Neural Information Processing Systems 35 (2022), 15309\u201315324.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_13_1","unstructured":"Andrew Brock Jeff Donahue and Karen Simonyan. 2019. Large scale gan training for high fidelity natural image synthesis. In ICLR."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"e_1_3_2_2_15_1","first-page":"27517","article-title":"Instance-conditioned gan","volume":"34","author":"Casanova Arantxa","year":"2021","unstructured":"Arantxa Casanova, Marlene Careil, Jakob Verbeek, Michal Drozdzal, and Adriana Romero\u00a0Soriano. 2021. Instance-conditioned gan. Advances in Neural Information Processing Systems 34 (2021), 27517\u201327529.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"crossref","unstructured":"Caroline Chan Fredo Durand and Phillip Isola. 2022. Learning to generate line drawings that convey geometry and semantics. In CVPR.","DOI":"10.1109\/CVPR52688.2022.00776"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3386569.3392386"},{"key":"e_1_3_2_2_18_1","volume-title":"Re-imagen: Retrieval-augmented text-to-image generator. In ICLR.","author":"Chen Wenhu","year":"2023","unstructured":"Wenhu Chen, Hexiang Hu, Chitwan Saharia, and William\u00a0W Cohen. 2023. Re-imagen: Retrieval-augmented text-to-image generator. In ICLR."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"crossref","unstructured":"Yunjey Choi Youngjung Uh Jaejun Yoo and Jung-Woo Ha. 2020. StarGAN v2: Diverse Image Synthesis for Multiple Domains. In CVPR.","DOI":"10.1109\/CVPR42600.2020.00821"},{"key":"e_1_3_2_2_20_1","unstructured":"Navneet Dalal and Bill Triggs. 2005. Histograms of oriented gradients for human detection. In CVPR."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/1348246.1348248"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"crossref","unstructured":"J. Deng W. Dong R. Socher L.-J. Li K. Li and L. Fei-Fei. 2009. ImageNet: A Large-Scale Hierarchical Image Database. In CVPR.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_2_23_1","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. In NeurIPS."},{"key":"e_1_3_2_2_24_1","volume-title":"Sketch-based image retrieval: Benchmark and bag-of-features descriptors","author":"Eitz Mathias","year":"2010","unstructured":"Mathias Eitz, Kristian Hildebrand, Tamy Boubekeur, and Marc Alexa. 2010. Sketch-based image retrieval: Benchmark and bag-of-features descriptors. IEEE transactions on visualization and computer graphics 17, 11 (2010), 1624\u20131636."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1511\/2019.107.1.18"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"crossref","unstructured":"Patrick Esser Robin Rombach and Bjorn Ommer. 2021. Taming transformers for high-resolution image synthesis. In CVPR.","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"e_1_3_2_2_27_1","unstructured":"Fartash Faghri David\u00a0J Fleet Jamie\u00a0Ryan Kiros and Sanja Fidler. 2017. Vse++: Improving visual-semantic embeddings with hard negatives. In BMVC."},{"key":"e_1_3_2_2_28_1","volume-title":"Devise: A deep visual-semantic embedding model. In NeurIPS.","author":"Frome Andrea","year":"2013","unstructured":"Andrea Frome, Greg\u00a0S Corrado, Jon Shlens, Samy Bengio, Jeff Dean, Marc\u2019Aurelio Ranzato, and Tomas Mikolov. 2013. Devise: A deep visual-semantic embedding model. In NeurIPS."},{"key":"e_1_3_2_2_29_1","volume-title":"An image is worth one word: Personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618","author":"Gal Rinon","year":"2022","unstructured":"Rinon Gal, Yuval Alaluf, Yuval Atzmon, Or Patashnik, Amit\u00a0H Bermano, Gal Chechik, and Daniel Cohen-Or. 2022a. An image is worth one word: Personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618 (2022)."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592133"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528223.3530164"},{"key":"e_1_3_2_2_32_1","volume-title":"Iterative quantization: A procrustean approach to learning binary codes for large-scale image retrieval","author":"Gong Yunchao","year":"2012","unstructured":"Yunchao Gong, Svetlana Lazebnik, Albert Gordo, and Florent Perronnin. 2012. Iterative quantization: A procrustean approach to learning binary codes for large-scale image retrieval. IEEE transactions on pattern analysis and machine intelligence 35, 12 (2012), 2916\u20132929."},{"key":"e_1_3_2_2_33_1","unstructured":"Ian Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative adversarial nets. In NeurIPS."},{"key":"e_1_3_2_2_34_1","unstructured":"Timofey Grigoryev Andrey Voynov and Artem Babenko. 2022. When Why and Which Pretrained GANs Are Useful?. In ICLR."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/2.410145"},{"key":"e_1_3_2_2_36_1","volume-title":"World models. arXiv preprint arXiv:1803.10122","author":"Ha David","year":"2018","unstructured":"David Ha and J\u00fcrgen Schmidhuber. 2018. World models. arXiv preprint arXiv:1803.10122 (2018)."},{"key":"e_1_3_2_2_37_1","volume-title":"Svdiff: Compact parameter space for diffusion fine-tuning. arXiv preprint arXiv:2303.11305","author":"Han Ligong","year":"2023","unstructured":"Ligong Han, Yinxiao Li, Han Zhang, Peyman Milanfar, Dimitris Metaxas, and Feng Yang. 2023. Svdiff: Compact parameter space for diffusion fine-tuning. arXiv preprint arXiv:2303.11305 (2023)."},{"key":"e_1_3_2_2_38_1","volume-title":"Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626","author":"Hertz Amir","year":"2022","unstructured":"Amir Hertz, Ron Mokady, Jay Tenenbaum, Kfir Aberman, Yael Pritch, and Daniel Cohen-Or. 2022. Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626 (2022)."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3347092"},{"key":"e_1_3_2_2_40_1","unstructured":"Martin Heusel Hubert Ramsauer Thomas Unterthiner Bernhard Nessler and Sepp Hochreiter. 2017. GANs trained by a two time-scale update rule converge to a local Nash equilibrium. In NeurIPS."},{"key":"e_1_3_2_2_41_1","volume-title":"Training products of experts by minimizing contrastive divergence. Neural computation 14, 8","author":"Hinton E","year":"2002","unstructured":"Geoffrey\u00a0E Hinton. 2002. Training products of experts by minimizing contrastive divergence. Neural computation 14, 8 (2002), 1771\u20131800."},{"key":"e_1_3_2_2_42_1","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. In NeurIPS."},{"key":"e_1_3_2_2_43_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu J","year":"2021","unstructured":"Edward\u00a0J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2011.2109710"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"crossref","unstructured":"Xun Huang Arun Mallya Ting-Chun Wang and Ming-Yu Liu. 2022. Multimodal Conditional Image Synthesis with Product-of-Experts GANs. In ECCV.","DOI":"10.1007\/978-3-031-19787-1_6"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459860"},{"key":"e_1_3_2_2_47_1","volume-title":"Aggregating local descriptors into a compact image representation. In 2010 IEEE computer society conference on computer vision and pattern recognition","author":"J\u00e9gou Herv\u00e9","unstructured":"Herv\u00e9 J\u00e9gou, Matthijs Douze, Cordelia Schmid, and Patrick P\u00e9rez. 2010. Aggregating local descriptors into a compact image representation. In 2010 IEEE computer society conference on computer vision and pattern recognition. IEEE, 3304\u20133311."},{"key":"e_1_3_2_2_48_1","unstructured":"Chao Jia Yinfei Yang Ye Xia Yi-Ting Chen Zarana Parekh Hieu Pham Quoc Le Yun-Hsuan Sung Zhen Li and Tom Duerig. 2021. Scaling up visual and vision-language representation learning with noisy text supervision. In ICML."},{"key":"e_1_3_2_2_49_1","unstructured":"Andrej Karpathy Armand Joulin and Li\u00a0F Fei-Fei. 2014. Deep fragment embeddings for bidirectional image sentence mapping. In NeurIPS."},{"key":"e_1_3_2_2_50_1","unstructured":"Tero Karras Timo Aila Samuli Laine and Jaakko Lehtinen. 2018. Progressive growing of gans for improved quality stability and variation. In ICLR."},{"key":"e_1_3_2_2_51_1","unstructured":"Tero Karras Miika Aittala Janne Hellsten Samuli Laine Jaakko Lehtinen and Timo Aila. 2020a. Training Generative Adversarial Networks with Limited Data. In NeurIPS."},{"key":"e_1_3_2_2_52_1","unstructured":"Tero Karras Miika Aittala Samuli Laine Erik H\u00e4rk\u00f6nen Janne Hellsten Jaakko Lehtinen and Timo Aila. 2021. Alias-Free Generative Adversarial Networks. In NeurIPS."},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"crossref","unstructured":"Tero Karras Samuli Laine and Timo Aila. 2019. A style-based generator architecture for generative adversarial networks. In CVPR.","DOI":"10.1109\/CVPR.2019.00453"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"crossref","unstructured":"Tero Karras Samuli Laine Miika Aittala Janne Hellsten Jaakko Lehtinen and Timo Aila. 2020b. Analyzing and improving the image quality of stylegan. In CVPR.","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"e_1_3_2_2_55_1","volume-title":"Imagic: Text-based real image editing with diffusion models.","author":"Kawar Bahjat","year":"2023","unstructured":"Bahjat Kawar, Shiran Zada, Oran Lang, Omer Tov, Huiwen Chang, Tali Dekel, Inbar Mosseri, and Michal Irani. 2023. Imagic: Text-based real image editing with diffusion models. (2023), 6007\u20136017."},{"key":"e_1_3_2_2_56_1","unstructured":"Diederik\u00a0P Kingma and Max Welling. 2014. Auto-encoding variational bayes. In ICLR."},{"key":"e_1_3_2_2_57_1","unstructured":"Alex Krizhevsky and Geoffrey\u00a0E Hinton. 2011. Using very deep autoencoders for content-based image retrieval.. In ESANN Vol.\u00a01. Citeseer 2."},{"key":"e_1_3_2_2_58_1","volume-title":"Multi-concept customization of text-to-image diffusion. (2023)","author":"Kumari Nupur","year":"1931","unstructured":"Nupur Kumari, Bingliang Zhang, Richard Zhang, Eli Shechtman, and Jun-Yan Zhu. 2023. Multi-concept customization of text-to-image diffusion. (2023), 1931\u20131941."},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"crossref","unstructured":"Nupur Kumari Richard Zhang Eli Shechtman and Jun-Yan Zhu. 2022. Ensembling Off-the-shelf Models for GAN Training. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01039"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459884"},{"key":"e_1_3_2_2_61_1","volume-title":"BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. arxiv:2201.12086\u00a0[cs.CV]","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. arxiv:2201.12086\u00a0[cs.CV]"},{"key":"e_1_3_2_2_62_1","unstructured":"Yijun Li Richard Zhang Jingwan Lu and Eli Shechtman. 2020. Few-shot image generation with elastic weight consolidation. In NeurIPS."},{"key":"e_1_3_2_2_63_1","unstructured":"Yen-Liang Lin Cheng-Yu Huang Hao-Jeng Wang and Winston Hsu. 2013. 3D sub-query expansion for improving sketch-based multi-view image retrieval. In ICCV."},{"key":"e_1_3_2_2_64_1","unstructured":"Bingchen Liu Yizhe Zhu Kunpeng Song and Ahmed Elgammal. 2021. Towards faster and stabilized gan training for high-fidelity few-shot image synthesis. In ICLR."},{"key":"e_1_3_2_2_65_1","unstructured":"Li Liu Fumin Shen Yuming Shen Xianglong Liu and Ling Shao. 2017. Deep sketch hashing: Fast free-hand sketch-based image retrieval. In CVPR."},{"key":"e_1_3_2_2_66_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"e_1_3_2_2_67_1","unstructured":"lucid layers. 2022. Datasets and pretrained Models for StyleGAN3. https:\/\/github.com\/edstoica\/lucid_stylegan3_datasets_models\/."},{"key":"e_1_3_2_2_68_1","volume-title":"Unified multi-modal latent diffusion for joint subject and text conditional image generation. arXiv preprint arXiv:2303.09319","author":"Ma Yiyang","year":"2023","unstructured":"Yiyang Ma, Huan Yang, Wenjing Wang, Jianlong Fu, and Jiaying Liu. 2023. Unified multi-modal latent diffusion for joint subject and text conditional image generation. arXiv preprint arXiv:2303.09319 (2023)."},{"key":"e_1_3_2_2_69_1","first-page":"100","article-title":"Introduction to information retrieval","volume":"16","author":"Manning Christopher","year":"2010","unstructured":"Christopher Manning, Prabhakar Raghavan, and Hinrich Sch\u00fctze. 2010. Introduction to information retrieval. Natural Language Engineering 16, 1 (2010), 100\u2013103.","journal-title":"Natural Language Engineering"},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503250"},{"key":"e_1_3_2_2_71_1","volume-title":"CVPR Workshop.","author":"Mo Sangwoo","year":"2020","unstructured":"Sangwoo Mo, Minsu Cho, and Jinwoo Shin. 2020. Freeze the Discriminator: a Simple Baseline for Fine-Tuning GANs. In CVPR Workshop."},{"key":"e_1_3_2_2_72_1","doi-asserted-by":"crossref","unstructured":"Ron Mokady Michal Yarom Omer Tov Oran Lang Daniel Cohen-Or Tali Dekel Michal Irani and Inbar Mosseri. 2022. Self-Distilled StyleGAN: Towards Generation from Internet Photos. In ACM SIGGRAPH.","DOI":"10.1145\/3528233.3530708"},{"key":"e_1_3_2_2_73_1","volume-title":"MyStyle: A Personalized Generative Prior. arXiv preprint arXiv:2203.17272","author":"Nitzan Yotam","year":"2022","unstructured":"Yotam Nitzan, Kfir Aberman, Qiurui He, Orly Liba, Michal Yarom, Yossi Gandelsman, Inbar Mosseri, Yael Pritch, and Daniel Cohen-Or. 2022. MyStyle: A Personalized Generative Prior. arXiv preprint arXiv:2203.17272 (2022)."},{"key":"e_1_3_2_2_74_1","doi-asserted-by":"crossref","unstructured":"Atsuhiro Noguchi and Tatsuya Harada. 2019. Image generation from small datasets via batch statistics adaptation. In ICCV.","DOI":"10.1109\/ICCV.2019.00284"},{"key":"e_1_3_2_2_75_1","doi-asserted-by":"crossref","unstructured":"Utkarsh Ojha Yijun Li Cynthia Lu Alexei\u00a0A. Efros Yong\u00a0Jae Lee Eli Shechtman and Richard Zhang. 2021. Few-shot Image Generation via Cross-domain Correspondence. In CVPR.","DOI":"10.1109\/CVPR46437.2021.01060"},{"key":"e_1_3_2_2_76_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1011139631724"},{"key":"e_1_3_2_2_77_1","unstructured":"Aaron van\u00a0den Oord Nal Kalchbrenner Oriol Vinyals Lasse Espeholt Alex Graves and Koray Kavukcuoglu. 2016. Conditional Image Generation with PixelCNN Decoders. In NeurIPS."},{"key":"e_1_3_2_2_78_1","volume-title":"Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748","author":"van\u00a0den Oord Aaron","year":"2018","unstructured":"Aaron van\u00a0den Oord, Yazhe Li, and Oriol Vinyals. 2018. Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)."},{"key":"e_1_3_2_2_79_1","doi-asserted-by":"crossref","unstructured":"Gaurav Parmar Richard Zhang and Jun-Yan Zhu. 2022. On Buggy Resizing Libraries and Surprising Subtleties in FID Calculation. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01112"},{"key":"e_1_3_2_2_80_1","volume-title":"Styleclip: Text-driven manipulation of stylegan imagery. In ICCV. 2085\u20132094.","author":"Patashnik Or","year":"2021","unstructured":"Or Patashnik, Zongze Wu, Eli Shechtman, Daniel Cohen-Or, and Dani Lischinski. 2021. Styleclip: Text-driven manipulation of stylegan imagery. In ICCV. 2085\u20132094."},{"key":"e_1_3_2_2_81_1","unstructured":"Justin Pinkney. 2020. Awesome Pretrained StyleGAN. https:\/\/www.justinpinkney.com\/pretrained-stylegan\/."},{"key":"e_1_3_2_2_82_1","volume-title":"Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988","author":"Poole Ben","year":"2022","unstructured":"Ben Poole, Ajay Jain, Jonathan\u00a0T Barron, and Ben Mildenhall. 2022. Dreamfusion: Text-to-3d using 2d diffusion. arXiv preprint arXiv:2209.14988 (2022)."},{"key":"e_1_3_2_2_83_1","doi-asserted-by":"crossref","unstructured":"Filip Radenovic Giorgos Tolias and Ondrej Chum. 2018. Deep shape matching. In ECCV.","DOI":"10.1007\/978-3-030-01228-1_46"},{"key":"e_1_3_2_2_84_1","unstructured":"Alec Radford Jong\u00a0Wook Kim Chris Hallacy Aditya Ramesh Gabriel Goh Sandhini Agarwal Girish Sastry Amanda Askell Pamela Mishkin Jack Clark Gretchen Krueger and Ilya Sutskever. 2021. Learning transferable visual models from natural language supervision. In ICML."},{"key":"e_1_3_2_2_85_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)."},{"key":"e_1_3_2_2_86_1","volume-title":"Aaron van\u00a0den Oord, and Oriol Vinyals","author":"Razavi Ali","year":"2019","unstructured":"Ali Razavi, Aaron van\u00a0den Oord, and Oriol Vinyals. 2019. Generating diverse high-fidelity images with vq-vae-2. In NeurIPS."},{"key":"e_1_3_2_2_87_1","volume-title":"Sketchformer: Transformer-based representation for sketched structure. In CVPR.","author":"Sampaio\u00a0Ferraz Ribeiro Leo","year":"2020","unstructured":"Leo Sampaio\u00a0Ferraz Ribeiro, Tu Bui, John Collomosse, and Moacir Ponti. 2020. Sketchformer: Transformer-based representation for sketched structure. In CVPR."},{"key":"e_1_3_2_2_88_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. High-Resolution Image Synthesis with Latent Diffusion Models. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_89_1","doi-asserted-by":"crossref","unstructured":"Nataniel Ruiz Yuanzhen Li Varun Jampani Yael Pritch Michael Rubinstein and Kfir Aberman. 2022. DreamBooth: Fine Tuning Text-to-image Diffusion Models for Subject-Driven Generation. In arXiv preprint arxiv:2208.12242.","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"e_1_3_2_2_90_1","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume":"35","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily\u00a0L Denton, Kamyar Ghasemipour, Raphael Gontijo\u00a0Lopes, Burcu Karagol\u00a0Ayan, Tim Salimans, 2022. Photorealistic text-to-image diffusion models with deep language understanding. Advances in Neural Information Processing Systems 35 (2022), 36479\u201336494.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_91_1","doi-asserted-by":"publisher","DOI":"10.1145\/2897824.2925954"},{"key":"e_1_3_2_2_92_1","unstructured":"Axel Sauer Kashyap Chitta Jens M\u00fcller and Andreas Geiger. 2021. Projected GANs Converge Faster. In NeurIPS."},{"key":"e_1_3_2_2_93_1","volume-title":"Stylegan-xl: Scaling stylegan to large diverse datasets. In ACM SIGGRAPH.","author":"Sauer Axel","year":"2022","unstructured":"Axel Sauer, Katja Schwarz, and Andreas Geiger. 2022. Stylegan-xl: Scaling stylegan to large diverse datasets. In ACM SIGGRAPH."},{"key":"e_1_3_2_2_94_1","unstructured":"Derrick Schultz. 2020. FreaGAN undertrained GAN trained on Frea Buckler\u2019s artwork. https:\/\/twitter.com\/dvsch\/status\/1255885874560225284."},{"key":"e_1_3_2_2_95_1","volume-title":"Hugginggpt: Solving ai tasks with chatgpt and its friends in huggingface. arXiv preprint arXiv:2303.17580","author":"Shen Yongliang","year":"2023","unstructured":"Yongliang Shen, Kaitao Song, Xu Tan, Dongsheng Li, Weiming Lu, and Yueting Zhuang. 2023. Hugginggpt: Solving ai tasks with chatgpt and its friends in huggingface. arXiv preprint arXiv:2303.17580 (2023)."},{"key":"e_1_3_2_2_96_1","volume-title":"Video Google: A text retrieval approach to object matching in videos. In ICCV.","author":"Sivic Josef","year":"2003","unstructured":"Josef Sivic and Andrew Zisserman. 2003. Video Google: A text retrieval approach to object matching in videos. In ICCV."},{"key":"e_1_3_2_2_97_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.895972"},{"key":"e_1_3_2_2_98_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00177"},{"key":"e_1_3_2_2_99_1","unstructured":"Jiaming Song Chenlin Meng and Stefano Ermon. 2021a. Denoising diffusion implicit models. In ICLR."},{"key":"e_1_3_2_2_100_1","unstructured":"Yang Song Jascha Sohl-Dickstein Diederik\u00a0P Kingma Abhishek Kumar Stefano Ermon and Ben Poole. 2021b. Score-based generative modeling through stochastic differential equations. In ICLR."},{"key":"e_1_3_2_2_101_1","volume-title":"State of the Art on Neural Rendering. Computer Graphics Forum (EG STAR 2020)","author":"Tewari A.","year":"2020","unstructured":"A. Tewari, O. Fried, J. Thies, V. Sitzmann, S. Lombardi, K. Sunkavalli, R. Martin-Brualla, T. Simon, J. Saragih, M. Nie\u00dfner, R. Pandey, S. Fanello, G. Wetzstein, J.-Y. Zhu, C. Theobalt, M. Agrawala, E. Shechtman, D.\u00a0B Goldman, and M. Zollh\u00f6fer. 2020. State of the Art on Neural Rendering. Computer Graphics Forum (EG STAR 2020) (2020)."},{"key":"e_1_3_2_2_102_1","doi-asserted-by":"crossref","unstructured":"Antonio Torralba Rob Fergus and Yair Weiss. 2008. Small codes and large image databases for recognition. In CVPR.","DOI":"10.1109\/CVPR.2008.4587633"},{"key":"e_1_3_2_2_103_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00191"},{"key":"e_1_3_2_2_104_1","unstructured":"Unsplash. 2022. Unsplash. https:\/\/unsplash.com."},{"key":"e_1_3_2_2_105_1","doi-asserted-by":"crossref","unstructured":"Sheng-Yu Wang David Bau and Jun-Yan Zhu. 2021. Sketch Your Own GAN. In ICCV.","DOI":"10.1109\/ICCV48922.2021.01379"},{"key":"e_1_3_2_2_106_1","volume-title":"Rewriting Geometric Rules of a GAN. ACM TOG","author":"Wang Sheng-Yu","year":"2022","unstructured":"Sheng-Yu Wang, David Bau, and Jun-Yan Zhu. 2022. Rewriting Geometric Rules of a GAN. ACM TOG (2022)."},{"key":"e_1_3_2_2_107_1","doi-asserted-by":"crossref","unstructured":"Yaxing Wang Abel Gonzalez-Garcia David Berga Luis Herranz Fahad\u00a0Shahbaz Khan and Joost van\u00a0de Weijer. 2020. Minegan: effective knowledge transfer from gans to target domains with few images. In CVPR.","DOI":"10.1109\/CVPR42600.2020.00935"},{"key":"e_1_3_2_2_108_1","volume-title":"Abel Gonzalez-Garcia, and Bogdan Raducanu.","author":"Wang Yaxing","year":"2018","unstructured":"Yaxing Wang, Chenshen Wu, Luis Herranz, Joost van\u00a0de Weijer, Abel Gonzalez-Garcia, and Bogdan Raducanu. 2018. Transferring gans: generating images from limited data. In ECCV."},{"key":"e_1_3_2_2_109_1","unstructured":"Yair Weiss Antonio Torralba and Rob Fergus. 2008. Spectral hashing. In NeurIPS."},{"key":"e_1_3_2_2_110_1","volume-title":"Lsun: Construction of a large-scale image dataset using deep learning with humans in the loop. arXiv preprint arXiv:1506.03365","author":"Yu Fisher","year":"2015","unstructured":"Fisher Yu, Ari Seff, Yinda Zhang, Shuran Song, Thomas Funkhouser, and Jianxiong Xiao. 2015. Lsun: Construction of a large-scale image dataset using deep learning with humans in the loop. arXiv preprint arXiv:1506.03365 (2015)."},{"key":"e_1_3_2_2_111_1","unstructured":"Qian Yu Feng Liu Yi-Zhe Song Tao Xiang Timothy\u00a0M Hospedales and Chen-Change Loy. 2016. Sketch me that shoe. In CVPR."},{"key":"e_1_3_2_2_112_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_2_113_1","volume-title":"Datasetgan: Efficient labeled data factory with minimal human effort. In CVPR.","author":"Zhang Yuxuan","year":"2021","unstructured":"Yuxuan Zhang, Huan Ling, Jun Gao, Kangxue Yin, Jean-Francois Lafleche, Adela Barriuso, Antonio Torralba, and Sanja Fidler. 2021. Datasetgan: Efficient labeled data factory with minimal human effort. In CVPR."},{"key":"e_1_3_2_2_114_1","unstructured":"Miaoyun Zhao Yulai Cong and Lawrence Carin. 2020a. On leveraging pretrained GANs for generation with limited data. In ICML."},{"key":"e_1_3_2_2_115_1","unstructured":"Shengyu Zhao Zhijian Liu Ji Lin Jun-Yan Zhu and Song Han. 2020b. Differentiable Augmentation for Data-Efficient GAN Training. In NeurIPS."},{"key":"e_1_3_2_2_116_1","volume-title":"SIFT meets CNN: A decade survey of instance retrieval","author":"Zheng Liang","year":"2017","unstructured":"Liang Zheng, Yi Yang, and Qi Tian. 2017. SIFT meets CNN: A decade survey of instance retrieval. IEEE TPAMI (2017)."},{"key":"e_1_3_2_2_117_1","doi-asserted-by":"publisher","DOI":"10.1145\/3478513.3480537"}],"event":{"name":"SA '23: SIGGRAPH Asia 2023","location":"Sydney NSW Australia","acronym":"SA '23","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["SIGGRAPH Asia 2023 Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610548.3618189","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3610548.3618189","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T09:33:51Z","timestamp":1755768831000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3610548.3618189"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,10]]},"references-count":117,"alternative-id":["10.1145\/3610548.3618189","10.1145\/3610548"],"URL":"https:\/\/doi.org\/10.1145\/3610548.3618189","relation":{},"subject":[],"published":{"date-parts":[[2023,12,10]]},"assertion":[{"value":"2023-12-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}