{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,13]],"date-time":"2025-11-13T07:24:32Z","timestamp":1763018672350,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":84,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"the National Science Fund for Distinguished Young Scholars","award":["No.62025603"],"award-info":[{"award-number":["No.62025603"]}]},{"name":"the National Natural Science Foundation of China","award":["No. U21B2037, No. U22B2051, No. 62176222, No. 62176223, No. 62176226, No. 62072386, No. 62072387, No. 62072389, No. 62002305, No. 62272401"],"award-info":[{"award-number":["No. U21B2037, No. U22B2051, No. 62176222, No. 62176223, No. 62176226, No. 62072386, No. 62072387, No. 62072389, No. 62002305, No. 62272401"]}]},{"name":"National Key R&D Program of China","award":["No.2022ZD0118201"],"award-info":[{"award-number":["No.2022ZD0118201"]}]},{"name":"the Natural Science Foundation of Fujian Province of China","award":["No.2021J01002, No.2022J06001"],"award-info":[{"award-number":["No.2021J01002, No.2022J06001"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612067","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:30Z","timestamp":1698391650000},"page":"4666-4677","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["PixelFace+: Towards Controllable Face Generation and Manipulation with Text Descriptions and Segmentation Masks"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-7743-7301","authenticated-orcid":false,"given":"Xiaoxiong","family":"Du","sequence":"first","affiliation":[{"name":"Xiamen University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0655-1594","authenticated-orcid":false,"given":"Jun","family":"Peng","sequence":"additional","affiliation":[{"name":"Xiamen University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5110-4526","authenticated-orcid":false,"given":"Yiyi","family":"Zhou","sequence":"additional","affiliation":[{"name":"Xiamen University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8731-7099","authenticated-orcid":false,"given":"Jinlu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Xiamen University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3129-2308","authenticated-orcid":false,"given":"Siting","family":"Chen","sequence":"additional","affiliation":[{"name":"Xiamen University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4355-5711","authenticated-orcid":false,"given":"Guannan","family":"Jiang","sequence":"additional","affiliation":[{"name":"Contemporary Amperex Technology Co., Limited, Ningde, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3912-9306","authenticated-orcid":false,"given":"Xiaoshuai","family":"Sun","sequence":"additional","affiliation":[{"name":"Xiamen University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9163-2932","authenticated-orcid":false,"given":"Rongrong","family":"Ji","sequence":"additional","affiliation":[{"name":"Xiamen University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00785"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01405"},{"key":"e_1_3_2_1_3_1","volume-title":"International conference on machine learning. PMLR, 214--223","author":"Arjovsky Martin","year":"2017","unstructured":"Martin Arjovsky, Soumith Chintala, and L\u00e9on Bottou. 2017. Wasserstein generative adversarial networks. In International conference on machine learning. PMLR, 214--223."},{"key":"e_1_3_2_1_4_1","volume-title":"Large scale GAN training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096","author":"Brock Andrew","year":"2018","unstructured":"Andrew Brock, Jeff Donahue, and Karen Simonyan. 2018. Large scale GAN training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096 (2018)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470848"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.168"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3386569.3392386"},{"key":"e_1_3_2_1_8_1","volume-title":"FTGAN: A fully-trained generative adversarial networks for text to face generation. arXiv preprint arXiv:1904.05729","author":"Chen Xiang","year":"2019","unstructured":"Xiang Chen, Lingbo Qing, Xiaohai He, Xiaodong Luo, and Yining Xu. 2019. FTGAN: A fully-trained generative adversarial networks for text to face generation. arXiv preprint arXiv:1904.05729 (2019)."},{"key":"e_1_3_2_1_9_1","volume-title":"Generative adversarial networks: An overview","author":"Creswell Antonia","year":"2018","unstructured":"Antonia Creswell, Tom White, Vincent Dumoulin, Kai Arulkumaran, Biswa Sengupta, and Anil A Bharath. 2018. Generative adversarial networks: An overview. IEEE signal processing magazine, Vol. 35, 1 (2018), 53--65."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00526"},{"key":"e_1_3_2_1_11_1","first-page":"8780","article-title":"Diffusion models beat gans on image synthesis","volume":"34","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. Advances in Neural Information Processing Systems, Vol. 34 (2021), 8780--8794.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.608"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"e_1_3_2_1_14_1","volume-title":"Tel Aviv","author":"He Jing","year":"2022","unstructured":"Jing He, Yiyi Zhou, Qi Zhang, Jun Peng, Yunhang Shen, Xiaoshuai Sun, Chao Chen, and Rongrong Ji. 2022. PixelFolder: An Efficient Progressive Pixel Synthesis Network for Image Generation. In Computer Vision--ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23-27, 2022, Proceedings, Part XIV. Springer, 643--660."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01414"},{"key":"e_1_3_2_1_16_1","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems, Vol. 33 (2020), 6840--6851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_17_1","volume-title":"TextFace: Text-to-Style Mapping based Face Generation and Manipulation","author":"Hou Xianxu","year":"2022","unstructured":"Xianxu Hou, Xiaokang Zhang, Yudong Li, and Linlin Shen. 2022. TextFace: Text-to-Style Mapping based Face Generation and Manipulation. IEEE Transactions on Multimedia (2022)."},{"key":"e_1_3_2_1_18_1","volume-title":"Istr: End-to-end instance segmentation with transformers. arXiv preprint arXiv:2105.00637","author":"Hu Jie","year":"2021","unstructured":"Jie Hu, Liujuan Cao, Yao Lu, ShengChuan Zhang, Yan Wang, Ke Li, Feiyue Huang, Ling Shao, and Rongrong Ji. 2021. Istr: End-to-end instance segmentation with transformers. arXiv preprint arXiv:2105.00637 (2021)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01709"},{"key":"e_1_3_2_1_20_1","volume-title":"Yuming Jiang, and Ziwei Liu.","author":"Huang Ziqi","year":"2023","unstructured":"Ziqi Huang, Kelvin CK Chan, Yuming Jiang, and Ziwei Liu. 2023. Collaborative Diffusion for Multi-Modal Face Generation and Editing. arXiv preprint arXiv:2304.10530 (2023)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00183"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00133"},{"key":"e_1_3_2_1_24_1","volume-title":"Progressive growing of gans for improved quality, stability, and variation. arXiv preprint arXiv:1710.10196","author":"Karras Tero","year":"2017","unstructured":"Tero Karras, Timo Aila, Samuli Laine, and Jaakko Lehtinen. 2017. Progressive growing of gans for improved quality, stability, and variation. arXiv preprint arXiv:1710.10196 (2017)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00559"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00790"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/VTC2022-Fall57202.2022.10012878"},{"key":"e_1_3_2_1_31_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Li Yikang","year":"2019","unstructured":"Yikang Li, Tao Ma, Yeqi Bai, Nan Duan, Sining Wei, and Xiaogang Wang. 2019. Pastegan: A semi-parametric method to generate image from scene graph. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00927"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01765"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00461"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01474"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01611"},{"key":"e_1_3_2_1_37_1","volume-title":"Eric Frank, Alex Sergeev, and Jason Yosinski.","author":"Liu Rosanne","year":"2018","unstructured":"Rosanne Liu, Joel Lehman, Piero Molino, Felipe Petroski Such, Eric Frank, Alex Sergeev, and Jason Yosinski. 2018. An intriguing failing of convolutional neural networks and the coordconv solution. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_38_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Liu Xihui","year":"2019","unstructured":"Xihui Liu, Guojun Yin, Jing Shao, Xiaogang Wang, et al. 2019. Learning to predict layout-to-image conditional convolutions for semantic image synthesis. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413505"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3414006"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01005"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3139234"},{"key":"e_1_3_2_1_43_1","volume-title":"Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784","author":"Mirza Mehdi","year":"2014","unstructured":"Mehdi Mirza and Simon Osindero. 2014. Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784 (2014)."},{"key":"e_1_3_2_1_44_1","volume-title":"Interactive image generation using scene graphs. arXiv preprint arXiv:1905.03743","author":"Mittal Gaurav","year":"2019","unstructured":"Gaurav Mittal, Shubham Agrawal, Anuva Agarwal, Sushant Mehta, and Tanya Marwah. 2019. Interactive image generation using scene graphs. arXiv preprint arXiv:1905.03743 (2019)."},{"key":"e_1_3_2_1_45_1","volume-title":"Plugnet: Degradation aware scene text recognition supervised by a pluggable super-resolution unit. In Computer Vision-ECCV 2020: 16th European Conference","author":"Mou Yongqiang","year":"2020","unstructured":"Yongqiang Mou, Lei Tan, Hui Yang, Jingying Chen, Leyuan Liu, Rui Yan, and Yaohong Huang. 2020. Plugnet: Degradation aware scene text recognition supervised by a pluggable super-resolution unit. In Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23-28, 2020, Proceedings, Part XV 16. Springer, 158--174."},{"key":"e_1_3_2_1_46_1","volume-title":"Text-adaptive generative adversarial networks: manipulating images with natural language. Advances in neural information processing systems","author":"Nam Seonghyeon","year":"2018","unstructured":"Seonghyeon Nam, Yunji Kim, and Seon Joo Kim. 2018. Text-adaptive generative adversarial networks: manipulating images with natural language. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigMM.2019.00-42"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00244"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00209"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547818"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547758"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3116416"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00918"},{"key":"e_1_3_2_1_54_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_55_1","volume-title":"International Conference on Machine Learning. PMLR, 8821--8831","author":"Ramesh Aditya","year":"2021","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-shot text-to-image generation. In International Conference on Machine Learning. PMLR, 8821--8831."},{"key":"e_1_3_2_1_56_1","volume-title":"International conference on machine learning. PMLR, 1060--1069","author":"Reed Scott","year":"2016","unstructured":"Scott Reed, Zeynep Akata, Xinchen Yan, Lajanugen Logeswaran, Bernt Schiele, and Honglak Lee. 2016. Generative adversarial text to image synthesis. In International conference on machine learning. PMLR, 1060--1069."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00232"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_59_1","volume-title":"Improved techniques for training gans. Advances in neural information processing systems","author":"Salimans Tim","year":"2016","unstructured":"Tim Salimans, Ian Goodfellow, Wojciech Zaremba, Vicki Cheung, Alec Radford, and Xi Chen. 2016. Improved techniques for training gans. Advances in neural information processing systems, Vol. 29 (2016)."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01061"},{"key":"e_1_3_2_1_61_1","volume-title":"International Conference on Machine Learning. PMLR, 2256--2265","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In International Conference on Machine Learning. PMLR, 2256--2265."},{"key":"e_1_3_2_1_62_1","volume-title":"Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502","author":"Song Jiaming","year":"2020","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2020a. Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)."},{"key":"e_1_3_2_1_63_1","volume-title":"Improved techniques for training score-based generative models. Advances in neural information processing systems","author":"Song Yang","year":"2020","unstructured":"Yang Song and Stefano Ermon. 2020. Improved techniques for training score-based generative models. Advances in neural information processing systems, Vol. 33 (2020), 12438--12448."},{"key":"e_1_3_2_1_64_1","volume-title":"Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456","author":"Song Yang","year":"2020","unstructured":"Yang Song, Jascha Sohl-Dickstein, Diederik P Kingma, Abhishek Kumar, Stefano Ermon, and Ben Poole. 2020b. Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)."},{"key":"e_1_3_2_1_65_1","volume-title":"Conditional image generation and manipulation for user-specified content. arXiv preprint arXiv:2005.04909","author":"Stap David","year":"2020","unstructured":"David Stap, Maurits Bleeker, Sarah Ibrahimi, and Maartje Ter Hoeve. 2020. Conditional image generation and manipulation for user-specified content. arXiv preprint arXiv:2005.04909 (2020)."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475391"},{"key":"e_1_3_2_1_67_1","volume-title":"You only need adversarial supervision for semantic image synthesis. arXiv preprint arXiv:2012.04781","author":"Sushko Vadim","year":"2020","unstructured":"Vadim Sushko, Edgar Sch\u00f6nfeld, Dan Zhang, Juergen Gall, Bernt Schiele, and Anna Khoreva. 2020. You only need adversarial supervision for semantic image synthesis. arXiv preprint arXiv:2012.04781 (2020)."},{"key":"e_1_3_2_1_68_1","volume-title":"Philip HS Torr, and Nicu Sebe","author":"Tang Hao","year":"2020","unstructured":"Hao Tang, Song Bai, Li Zhang, Philip HS Torr, and Nicu Sebe. 2020. Xinggan for person image generation. In Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23-28, 2020, Proceedings, Part XXV 16. Springer, 717--734."},{"key":"e_1_3_2_1_69_1","volume-title":"Deep Fusion Generative Adversarial Networks for Text-to-Image Synthesis. arXiv preprint arXiv:2008.05865","author":"Tao Ming","year":"2020","unstructured":"Ming Tao, Hao Tang, Songsong Wu, Nicu Sebe, Xiaoyuan Jing, Fei Wu, and Bingkun Bao. 2020a. Deep Fusion Generative Adversarial Networks for Text-to-Image Synthesis. arXiv preprint arXiv:2008.05865 (2020)."},{"key":"e_1_3_2_1_70_1","volume-title":"Df-gan: Deep fusion generative adversarial networks for text-to-image synthesis. arXiv preprint arXiv:2008.05865","author":"Tao Ming","year":"2020","unstructured":"Ming Tao, Hao Tang, Songsong Wu, Nicu Sebe, Xiao-Yuan Jing, Fei Wu, and Bingkun Bao. 2020b. Df-gan: Deep fusion generative adversarial networks for text-to-image synthesis. arXiv preprint arXiv:2008.05865 (2020)."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00342"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00917"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01349"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00229"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00143"},{"key":"e_1_3_2_1_76_1","volume-title":"Freestyle Layout-to-Image Synthesis. arXiv preprint arXiv:2303.14412","author":"Xue Han","year":"2023","unstructured":"Han Xue, Zhiwu Huang, Qianru Sun, Li Song, and Wenjun Zhang. 2023. Freestyle Layout-to-Image Synthesis. arXiv preprint arXiv:2303.14412 (2023)."},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.629"},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3090426"},{"key":"e_1_3_2_1_80_1","volume-title":"Plenty is plague: Fine-grained learning for visual question answering","author":"Zhou Yiyi","year":"2019","unstructured":"Yiyi Zhou, Rongrong Ji, Xiaoshuai Sun, Jinsong Su, Deyu Meng, Yue Gao, and Chunhua Shen. 2019. Plenty is plague: Fine-grained learning for visual question answering. IEEE transactions on pattern analysis and machine intelligence, Vol. 44, 2 (2019), 697--709."},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00208"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00595"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00515"},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00551"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Ottawa ON Canada","acronym":"MM '23"},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612067","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612067","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:03:29Z","timestamp":1755821009000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612067"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":84,"alternative-id":["10.1145\/3581783.3612067","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612067","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}