{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,10]],"date-time":"2025-09-10T22:11:25Z","timestamp":1757542285734,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,17]],"date-time":"2022-10-17T00:00:00Z","timestamp":1665964800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Science Foundation","award":["IIS-2203262"],"award-info":[{"award-number":["IIS-2203262"]}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS- 2209814"],"award-info":[{"award-number":["IIS- 2209814"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100005289","name":"National Institute of Justice","doi-asserted-by":"publisher","award":["2018-75-CX- 0032"],"award-info":[{"award-number":["2018-75-CX- 0032"]}],"id":[{"id":"10.13039\/100005289","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,17]]},"DOI":"10.1145\/3511808.3557382","type":"proceedings-article","created":{"date-parts":[[2022,10,16]],"date-time":"2022-10-16T01:22:22Z","timestamp":1665883342000},"page":"2519-2528","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Look Twice as Much as You Say"],"prefix":"10.1145","author":[{"given":"Chunhui","family":"Zhang","sequence":"first","affiliation":[{"name":"Brandeis University, Waltham, MA, USA"}]},{"given":"Chao","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Hong Kong, Hong Kong, China"}]},{"given":"Youhuan","family":"Li","sequence":"additional","affiliation":[{"name":"Hunan University, Changsha, China"}]},{"given":"Xiangliang","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Notre Dame, South Bend, IN, USA"}]},{"given":"Yanfang","family":"Ye","sequence":"additional","affiliation":[{"name":"University of Notre Dame, South Bend, IN, USA"}]},{"given":"Chuxu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Brandeis University, Waltham, MA, USA"}]}],"member":"320","published-online":{"date-parts":[[2022,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Spice: Semantic propositional image caption evaluation. In ECCV. 382--398.","author":"Anderson Peter","year":"2016","unstructured":"Peter Anderson , Basura Fernando , Mark Johnson , and Stephen Gould . 2016 . Spice: Semantic propositional image caption evaluation. In ECCV. 382--398. Peter Anderson, Basura Fernando, Mark Johnson, and Stephen Gould. 2016. Spice: Semantic propositional image caption evaluation. In ECCV. 382--398."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Peter Anderson Xiaodong He Chris Buehler Damien Teney Mark Johnson Stephen Gould and Lei Zhang. 2018. Bottom-up and top-down attention for image captioning and visual question answering. In CVPR. 6077--6086. Peter Anderson Xiaodong He Chris Buehler Damien Teney Mark Johnson Stephen Gould and Lei Zhang. 2018. Bottom-up and top-down attention for image captioning and visual question answering. In CVPR. 6077--6086.","DOI":"10.1109\/CVPR.2018.00636"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Shizhe Chen Qin Jin Peng Wang and Qi Wu. 2020a. Say as you wish: Fine-grained control of image caption generation with abstract scene graphs. In CVPR. 9962--9971. Shizhe Chen Qin Jin Peng Wang and Qi Wu. 2020a. Say as you wish: Fine-grained control of image caption generation with abstract scene graphs. In CVPR. 9962--9971.","DOI":"10.1109\/CVPR42600.2020.00998"},{"key":"e_1_3_2_1_4_1","unstructured":"Ting Chen Simon Kornblith Mohammad Norouzi and Geoffrey Hinton. 2020b. A simple framework for contrastive learning of visual representations. In ICML. 1597--1607. Ting Chen Simon Kornblith Mohammad Norouzi and Geoffrey Hinton. 2020b. A simple framework for contrastive learning of visual representations. In ICML. 1597--1607."},{"key":"e_1_3_2_1_5_1","volume-title":"Microsoft coco captions: Data collection and evaluation server. arXiv preprint arXiv:1504.00325","author":"Chen Xinlei","year":"2015","unstructured":"Xinlei Chen , Hao Fang , Tsung-Yi Lin , Ramakrishna Vedantam , Saurabh Gupta , Piotr Doll\u00e1r , and C Lawrence Zitnick . 2015. Microsoft coco captions: Data collection and evaluation server. arXiv preprint arXiv:1504.00325 ( 2015 ). Xinlei Chen, Hao Fang, Tsung-Yi Lin, Ramakrishna Vedantam, Saurabh Gupta, Piotr Doll\u00e1r, and C Lawrence Zitnick. 2015. Microsoft coco captions: Data collection and evaluation server. arXiv preprint arXiv:1504.00325 (2015)."},{"key":"e_1_3_2_1_6_1","volume-title":"ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators. In ICLR.","author":"Clark Kevin","year":"2019","unstructured":"Kevin Clark , Minh-Thang Luong , Quoc V Le , and Christopher D Manning . 2019 . ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators. In ICLR. Kevin Clark, Minh-Thang Luong, Quoc V Le, and Christopher D Manning. 2019. ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators. In ICLR."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Marcella Cornia Matteo Stefanini Lorenzo Baraldi and Rita Cucchiara. 2020. Meshed-Memory Transformer for Image Captioning. In CVPR. 10578--10587. Marcella Cornia Matteo Stefanini Lorenzo Baraldi and Rita Cucchiara. 2020. Meshed-Memory Transformer for Image Captioning. In CVPR. 10578--10587.","DOI":"10.1109\/CVPR42600.2020.01059"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3348"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Yang Feng Lin Ma Wei Liu and Jiebo Luo. 2019. Unsupervised image captioning. In CVPR. 4125--4134. Yang Feng Lin Ma Wei Liu and Jiebo Luo. 2019. Unsupervised image captioning. In CVPR. 4125--4134.","DOI":"10.1109\/CVPR.2019.00425"},{"key":"e_1_3_2_1_10_1","unstructured":"Tianyu Gao Xingcheng Yao and Danqi Chen. 2021. SimCSE: Simple Contrastive Learning of Sentence Embeddings. In EMNLP. 6894--6910. Tianyu Gao Xingcheng Yao and Danqi Chen. 2021. SimCSE: Simple Contrastive Learning of Sentence Embeddings. In EMNLP. 6894--6910."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Jiuxiang Gu Shafiq Joty Jianfei Cai and Gang Wang. 2018. Unpaired image captioning by language pivoting. In ECCV. 503--519. Jiuxiang Gu Shafiq Joty Jianfei Cai and Gang Wang. 2018. Unpaired image captioning by language pivoting. In ECCV. 503--519.","DOI":"10.1007\/978-3-030-01246-5_31"},{"key":"e_1_3_2_1_12_1","unstructured":"Longteng Guo Jing Liu Jinhui Tang Jiangwei Li Wei Luo and Hanqing Lu. 2019. Aligning linguistic words and visual semantic units for image captioning. In ACM MM. 765--773. Longteng Guo Jing Liu Jinhui Tang Jiangwei Li Wei Luo and Hanqing Lu. 2019. Aligning linguistic words and visual semantic units for image captioning. In ACM MM. 765--773."},{"key":"e_1_3_2_1_13_1","unstructured":"Zhichun Guo Wenhao Yu Chuxu Zhang Meng Jiang and Nitesh V Chawla. 2020. GraSeq: graph and sequence fusion learning for molecular property prediction. In CIKM. 435--443. Zhichun Guo Wenhao Yu Chuxu Zhang Meng Jiang and Nitesh V Chawla. 2020. GraSeq: graph and sequence fusion learning for molecular property prediction. In CIKM. 435--443."},{"key":"e_1_3_2_1_14_1","unstructured":"Kaveh Hassani and Amir Hosein Khasahmadi. 2020. Contrastive multi-view representation learning on graphs. In ICML. 4116--4126. Kaveh Hassani and Amir Hosein Khasahmadi. 2020. Contrastive multi-view representation learning on graphs. In ICML. 4116--4126."},{"key":"e_1_3_2_1_15_1","unstructured":"Kaiming He Haoqi Fan Yuxin Wu Saining Xie and Ross Girshick. 2020. Momentum contrast for unsupervised visual representation learning. In CVPR. 9729--9738. Kaiming He Haoqi Fan Yuxin Wu Saining Xie and Ross Girshick. 2020. Momentum contrast for unsupervised visual representation learning. In CVPR. 9729--9738."},{"key":"e_1_3_2_1_16_1","volume-title":"Long short-term memory. Neural computation","author":"Hochreiter Sepp","year":"1997","unstructured":"Sepp Hochreiter and J\u00fcrgen Schmidhuber . 1997. Long short-term memory. Neural computation , Vol. 9 , 8 ( 1997 ), 1735--1780. Sepp Hochreiter and J\u00fcrgen Schmidhuber. 1997. Long short-term memory. Neural computation, Vol. 9, 8 (1997), 1735--1780."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Chao Huang Huance Xu Yong Xu Peng Dai Lianghao Xia Mengyin Lu Liefeng Bo Hao Xing Xiaoping Lai and Yanfang Ye. 2021. Knowledge-aware coupled graph neural network for social recommendation. In AAAI. 4115--4122. Chao Huang Huance Xu Yong Xu Peng Dai Lianghao Xia Mengyin Lu Liefeng Bo Hao Xing Xiaoping Lai and Yanfang Ye. 2021. Knowledge-aware coupled graph neural network for social recommendation. In AAAI. 4115--4122.","DOI":"10.1609\/aaai.v35i5.16533"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Justin Johnson Ranjay Krishna Michael Stark Li-Jia Li David Shamma Michael Bernstein and Li Fei-Fei. 2015. Image retrieval using scene graphs. In CVPR. 3668--3678. Justin Johnson Ranjay Krishna Michael Stark Li-Jia Li David Shamma Michael Bernstein and Li Fei-Fei. 2015. Image retrieval using scene graphs. In CVPR. 3668--3678.","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Andrej Karpathy and Li Fei-Fei. 2015. Deep visual-semantic alignments for generating image descriptions. In CVPR. 3128--3137. Andrej Karpathy and Li Fei-Fei. 2015. Deep visual-semantic alignments for generating image descriptions. In CVPR. 3128--3137.","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"e_1_3_2_1_20_1","volume-title":"Adam: A Method for Stochastic Optimization. In ICLR.","author":"Kingma Diederik P","year":"2015","unstructured":"Diederik P Kingma and Jimmy Ba . 2015 . Adam: A Method for Stochastic Optimization. In ICLR. Diederik P Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In ICLR."},{"key":"e_1_3_2_1_21_1","volume-title":"Kipf and Max Welling","author":"Thomas","year":"2017","unstructured":"Thomas N. Kipf and Max Welling . 2017 . Semi-Supervised Classification with Graph Convolutional Networks. In ICLR. Thomas N. Kipf and Max Welling. 2017. Semi-Supervised Classification with Graph Convolutional Networks. In ICLR."},{"key":"e_1_3_2_1_22_1","volume-title":"ACL Workshop. 74--81","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin . 2004 . Rouge: A package for automatic evaluation of summaries . In ACL Workshop. 74--81 . Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In ACL Workshop. 74--81."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Xihui Liu Hongsheng Li Jing Shao Dapeng Chen and Xiaogang Wang. 2018. Show tell and discriminate: Image captioning by self-retrieval with partially labeled data. In ECCV. 338--354. Xihui Liu Hongsheng Li Jing Shao Dapeng Chen and Xiaogang Wang. 2018. Show tell and discriminate: Image captioning by self-retrieval with partially labeled data. In ECCV. 338--354.","DOI":"10.1007\/978-3-030-01267-0_21"},{"key":"e_1_3_2_1_24_1","volume-title":"Self-supervised Learning for Linking Knowledge Graphs. TKDE","author":"Liu Xiao","year":"2021","unstructured":"Xiao Liu , Li Mian , Yuxiao Dong , Fanjin Zhang , Jing Zhang , Jie Tang , Peng Zhang , Jibing Gong , and Kuansan Wang . 2021. OAG_know : Self-supervised Learning for Linking Knowledge Graphs. TKDE ( 2021 ). Xiao Liu, Li Mian, Yuxiao Dong, Fanjin Zhang, Jing Zhang, Jie Tang, Peng Zhang, Jibing Gong, and Kuansan Wang. 2021. OAG_know: Self-supervised Learning for Linking Knowledge Graphs. TKDE (2021)."},{"key":"e_1_3_2_1_25_1","unstructured":"Jiasen Lu Caiming Xiong Devi Parikh and Richard Socher. 2017. Knowing when to look: Adaptive attention via a visual sentinel for image captioning. In CVPR. 375--383. Jiasen Lu Caiming Xiong Devi Parikh and Richard Socher. 2017. Knowing when to look: Adaptive attention via a visual sentinel for image captioning. In CVPR. 375--383."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Yunpeng Luo Jiayi Ji Xiaoshuai Sun Liujuan Cao Yongjian Wu Feiyue Huang Chia-Wen Lin and Rongrong Ji. 2021. Dual-level collaborative transformer for image captioning. In AAAI. 2286--2293. Yunpeng Luo Jiayi Ji Xiaoshuai Sun Liujuan Cao Yongjian Wu Feiyue Huang Chia-Wen Lin and Rongrong Ji. 2021. Dual-level collaborative transformer for image captioning. In AAAI. 2286--2293.","DOI":"10.1609\/aaai.v35i3.16328"},{"key":"e_1_3_2_1_27_1","volume-title":"Marie Francine Moens, and Iacer Calixto","author":"Janusz Milewski Victor Siemen","year":"2020","unstructured":"Victor Siemen Janusz Milewski , Marie Francine Moens, and Iacer Calixto . 2020 . Are Scene Graphs Good Enough to Improve Image Captioning?. In IJCNLP. 504--515. Victor Siemen Janusz Milewski, Marie Francine Moens, and Iacer Calixto. 2020. Are Scene Graphs Good Enough to Improve Image Captioning?. In IJCNLP. 504--515."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Kishore Papineni Salim Roukos Todd Ward and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In ACL. 311--318. Kishore Papineni Salim Roukos Todd Ward and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In ACL. 311--318.","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_29_1","volume-title":"Manning","author":"Pennington Jeffrey","year":"2014","unstructured":"Jeffrey Pennington , Richard Socher , and Christopher D . Manning . 2014 . GloVe: Global Vectors for Word Representation. In EMNLP. 1532--1543. Jeffrey Pennington, Richard Socher, and Christopher D. Manning. 2014. GloVe: Global Vectors for Word Representation. In EMNLP. 1532--1543."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Xin Qian Eunyee Koh Fan Du Sungchul Kim Joel Chan Ryan A Rossi Sana Malik and Tak Yeon Lee. 2021. Generating Accurate Caption Units for Figure Captioning. In WWW. 2792--2804. Xin Qian Eunyee Koh Fan Du Sungchul Kim Joel Chan Ryan A Rossi Sana Malik and Tak Yeon Lee. 2021. Generating Accurate Caption Units for Figure Captioning. In WWW. 2792--2804.","DOI":"10.1145\/3442381.3449923"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403168"},{"key":"e_1_3_2_1_32_1","volume-title":"Faster r-cnn: Towards real-time object detection with region proposal networks. NeurIPS","author":"Ren Shaoqing","year":"2015","unstructured":"Shaoqing Ren , Kaiming He , Ross Girshick , and Jian Sun . 2015. Faster r-cnn: Towards real-time object detection with region proposal networks. NeurIPS ( 2015 ), 91--99. Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster r-cnn: Towards real-time object detection with region proposal networks. NeurIPS (2015), 91--99."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Zhan Shi Xu Zhou Xipeng Qiu and Xiaodan Zhu. 2020. Improving Image Captioning with Better Use of Caption. In ACL. 7454--7464. Zhan Shi Xu Zhou Xipeng Qiu and Xiaodan Zhu. 2020. Improving Image Captioning with Better Use of Caption. In ACL. 7454--7464.","DOI":"10.18653\/v1\/2020.acl-main.664"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Ruixiang Tang Mengnan Du Yuening Li Zirui Liu Na Zou and Xia Hu. 2021. Mitigating Gender Bias in Captioning Systems. In WWW. 633--645. Ruixiang Tang Mengnan Du Yuening Li Zirui Liu Na Zou and Xia Hu. 2021. Mitigating Gender Bias in Captioning Systems. In WWW. 633--645.","DOI":"10.1145\/3442381.3449950"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Yijun Tian Chuxu Zhang Zhichun Guo Chao Huang Ronald Metoyer and Nitesh V Chawla. 2022. RecipeRec: A Heterogeneous Graph Learning Model for Recipe Recommendation. In IJCAI. Yijun Tian Chuxu Zhang Zhichun Guo Chao Huang Ronald Metoyer and Nitesh V Chawla. 2022. RecipeRec: A Heterogeneous Graph Learning Model for Recipe Recommendation. In IJCAI.","DOI":"10.24963\/ijcai.2022\/481"},{"key":"e_1_3_2_1_36_1","volume-title":"Pushing the limits of self-supervised ResNets: Can we outperform supervised learning without labels on ImageNet? arXiv preprint arXiv:2201.05119","author":"Tomasev Nenad","year":"2022","unstructured":"Nenad Tomasev , Ioana Bica , Brian McWilliams , Lars Buesing , Razvan Pascanu , Charles Blundell , and Jovana Mitrovic . 2022. Pushing the limits of self-supervised ResNets: Can we outperform supervised learning without labels on ImageNet? arXiv preprint arXiv:2201.05119 ( 2022 ). Nenad Tomasev, Ioana Bica, Brian McWilliams, Lars Buesing, Razvan Pascanu, Charles Blundell, and Jovana Mitrovic. 2022. Pushing the limits of self-supervised ResNets: Can we outperform supervised learning without labels on ImageNet? arXiv preprint arXiv:2201.05119 (2022)."},{"key":"e_1_3_2_1_37_1","volume-title":"Cider: Consensus-based image description evaluation. In CVPR. 4566--4575.","author":"Vedantam Ramakrishna","year":"2015","unstructured":"Ramakrishna Vedantam , C Lawrence Zitnick , and Devi Parikh . 2015 . Cider: Consensus-based image description evaluation. In CVPR. 4566--4575. Ramakrishna Vedantam, C Lawrence Zitnick, and Devi Parikh. 2015. Cider: Consensus-based image description evaluation. In CVPR. 4566--4575."},{"key":"e_1_3_2_1_38_1","unstructured":"Petar Veli\u010dkovi\u0107 William Fedus William L Hamilton Pietro Li\u00f2 Yoshua Bengio and R Devon Hjelm. 2018. Deep Graph Infomax. In ICLR. Petar Veli\u010dkovi\u0107 William Fedus William L Hamilton Pietro Li\u00f2 Yoshua Bengio and R Devon Hjelm. 2018. Deep Graph Infomax. In ICLR."},{"key":"e_1_3_2_1_39_1","unstructured":"Petar Veli\u010dkovi\u0107 Guillem Cucurull Arantxa Casanova Adriana Romero Pietro Li\u00f2 and Yoshua Bengio. 2018. Graph Attention Networks. In ICLR. Petar Veli\u010dkovi\u0107 Guillem Cucurull Arantxa Casanova Adriana Romero Pietro Li\u00f2 and Yoshua Bengio. 2018. Graph Attention Networks. In ICLR."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Oriol Vinyals Alexander Toshev Samy Bengio and Dumitru Erhan. 2015. Show and tell: A neural image caption generator. In CVPR. 3156--3164. Oriol Vinyals Alexander Toshev Samy Bengio and Dumitru Erhan. 2015. Show and tell: A neural image caption generator. In CVPR. 3156--3164.","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_2_1_41_1","volume-title":"Infogcl: Information-aware graph contrastive learning. In NeurIPS. 30414--30425.","author":"Xu Dongkuan","year":"2021","unstructured":"Dongkuan Xu , Wei Cheng , Dongsheng Luo , Haifeng Chen , and Xiang Zhang . 2021 . Infogcl: Information-aware graph contrastive learning. In NeurIPS. 30414--30425. Dongkuan Xu, Wei Cheng, Dongsheng Luo, Haifeng Chen, and Xiang Zhang. 2021. Infogcl: Information-aware graph contrastive learning. In NeurIPS. 30414--30425."},{"key":"e_1_3_2_1_42_1","unstructured":"Danfei Xu Yuke Zhu Christopher B Choy and Li Fei-Fei. 2017. Scene graph generation by iterative message passing. In CVPR. 5410--5419. Danfei Xu Yuke Zhu Christopher B Choy and Li Fei-Fei. 2017. Scene graph generation by iterative message passing. In CVPR. 5410--5419."},{"key":"e_1_3_2_1_43_1","unstructured":"Yuanmeng Yan Rumei Li Sirui Wang Fuzheng Zhang Wei Wu and Weiran Xu. 2021. ConSERT: A Contrastive Framework for Self-Supervised Sentence Representation Transfer. In ACL. 5065--5075. Yuanmeng Yan Rumei Li Sirui Wang Fuzheng Zhang Wei Wu and Weiran Xu. 2021. ConSERT: A Contrastive Framework for Self-Supervised Sentence Representation Transfer. In ACL. 5065--5075."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"Xu Yang Kaihua Tang Hanwang Zhang and Jianfei Cai. 2019. Auto-encoding scene graphs for image captioning. In CVPR. 10685--10694. Xu Yang Kaihua Tang Hanwang Zhang and Jianfei Cai. 2019. Auto-encoding scene graphs for image captioning. In CVPR. 10685--10694.","DOI":"10.1109\/CVPR.2019.01094"},{"key":"e_1_3_2_1_45_1","unstructured":"Yuning You Tianlong Chen Yang Shen and Zhangyang Wang. 2021. Graph Contrastive Learning Automated. In ICML. 12121--12132. Yuning You Tianlong Chen Yang Shen and Zhangyang Wang. 2021. Graph Contrastive Learning Automated. In ICML. 12121--12132."},{"key":"e_1_3_2_1_46_1","unstructured":"Yuning You Tianlong Chen Yongduo Sui Ting Chen Zhangyang Wang and Yang Shen. 2020. Graph contrastive learning with augmentations. In NeurIPS. 5812--5823. Yuning You Tianlong Chen Yongduo Sui Ting Chen Zhangyang Wang and Yang Shen. 2020. Graph contrastive learning with augmentations. In NeurIPS. 5812--5823."},{"key":"e_1_3_2_1_47_1","volume-title":"SAIL: Self-Augmented Graph Contrastive Learning. In AAAI. 8927--8935.","author":"Yu Lu","year":"2022","unstructured":"Lu Yu , Shichao Pei , Lizhong Ding , Jun Zhou , Longfei Li , Chuxu Zhang , and Xiangliang Zhang . 2022 . SAIL: Self-Augmented Graph Contrastive Learning. In AAAI. 8927--8935. Lu Yu, Shichao Pei, Lizhong Ding, Jun Zhou, Longfei Li, Chuxu Zhang, and Xiangliang Zhang. 2022. SAIL: Self-Augmented Graph Contrastive Learning. In AAAI. 8927--8935."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"crossref","unstructured":"Chuxu Zhang Dongjin Song Chao Huang Ananthram Swami and Nitesh V Chawla. 2019. Heterogeneous graph neural network. In KDD. 793--803. Chuxu Zhang Dongjin Song Chao Huang Ananthram Swami and Nitesh V Chawla. 2019. Heterogeneous graph neural network. In KDD. 793--803.","DOI":"10.1145\/3292500.3330961"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"Jianan Zhao Qianlong Wen Shiyu Sun Yanfang Ye and Chuxu Zhang. 2021. Multi-view Self-supervised Heterogeneous Graph Embedding. In ECML\/PKDD. 319--334. Jianan Zhao Qianlong Wen Shiyu Sun Yanfang Ye and Chuxu Zhang. 2021. Multi-view Self-supervised Heterogeneous Graph Embedding. In ECML\/PKDD. 319--334.","DOI":"10.1007\/978-3-030-86520-7_20"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Yiwu Zhong Liwei Wang Jianshu Chen Dong Yu and Yin Li. 2020. Comprehensive image captioning via scene graph decomposition. In ECCV. 211--229. Yiwu Zhong Liwei Wang Jianshu Chen Dong Yu and Yin Li. 2020. Comprehensive image captioning via scene graph decomposition. In ECCV. 211--229.","DOI":"10.1007\/978-3-030-58568-6_13"}],"event":{"name":"CIKM '22: The 31st ACM International Conference on Information and Knowledge Management","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Atlanta GA USA","acronym":"CIKM '22"},"container-title":["Proceedings of the 31st ACM International Conference on Information &amp; Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3511808.3557382","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3511808.3557382","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3511808.3557382","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:48:54Z","timestamp":1750182534000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3511808.3557382"}},"subtitle":["Scene Graph Contrastive Learning for Self-Supervised Image Caption Generation"],"short-title":[],"issued":{"date-parts":[[2022,10,17]]},"references-count":50,"alternative-id":["10.1145\/3511808.3557382","10.1145\/3511808"],"URL":"https:\/\/doi.org\/10.1145\/3511808.3557382","relation":{},"subject":[],"published":{"date-parts":[[2022,10,17]]},"assertion":[{"value":"2022-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}