{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T04:26:56Z","timestamp":1770352016374,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Shanghai Science and Technology Commission","award":["21511100700 and 22511104600"],"award-info":[{"award-number":["21511100700 and 22511104600"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62002121 and 62072183"],"award-info":[{"award-number":["62002121 and 62072183"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Natural Science Foundation Project of CQ","award":["CSTB2022NSCQ-MSX0552"],"award-info":[{"award-number":["CSTB2022NSCQ-MSX0552"]}]},{"name":"the Open Project Program of the State Key Lab of CAD&CG","award":["A2203"],"award-info":[{"award-number":["A2203"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612210","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:30Z","timestamp":1698391650000},"page":"2888-2897","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Beware of Overcorrection: Scene-induced Commonsense Graph for Scene Graph Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4131-0828","authenticated-orcid":false,"given":"Lianggangxu","family":"Chen","sequence":"first","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6963-4525","authenticated-orcid":false,"given":"Jiale","family":"Lu","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6769-7862","authenticated-orcid":false,"given":"Youqi","family":"Song","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8940-6418","authenticated-orcid":false,"given":"Changbo","family":"Wang","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8365-0970","authenticated-orcid":false,"given":"Gaoqi","family":"He","sequence":"additional","affiliation":[{"name":"East China Normal University &amp; Chongqing Key Laboratory of Precision Optics, Shanghai &amp; Chongqing, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Jamie Ryan Kiros, and Geoffrey E Hinton","author":"Ba Jimmy Lei","year":"2016","unstructured":"Jimmy Lei Ba, Jamie Ryan Kiros, and Geoffrey E Hinton. 2016. Layer Normalization. stat, Vol. 1050 (2016), 21."},{"key":"e_1_3_2_2_2_1","volume-title":"Proceedings of the International Conference on Learning Representations (ICLR).","author":"Balcilar Muhammet","year":"2021","unstructured":"Muhammet Balcilar, Renton Guillaume, Pierre H\u00e9roux, Benoit Ga\u00fcz\u00e8re, S\u00e9bastien Adam, and Paul Honeine. 2021. Analyzing the expressive power of graph neural networks in a spectral perspective. In Proceedings of the International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.285"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01005"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19896"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00632"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01882"},{"key":"e_1_3_2_2_8_1","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops. 0--0.","author":"Dornadula Apoorva","year":"2019","unstructured":"Apoorva Dornadula, Austin Narcomey, Ranjay Krishna, Michael Bernstein, and Fei-Fei Li. 2019. Visual relationships as functions: Enabling few-shot scene graph prediction. In Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops. 0--0."},{"key":"e_1_3_2_2_9_1","volume-title":"How powerful are k-hop message passing graph neural networks. arXiv preprint arXiv:2205.13328","author":"Feng Jiarui","year":"2022","unstructured":"Jiarui Feng, Yixin Chen, Fuhai Li, Anindya Sarkar, and Muhan Zhang. 2022. How powerful are k-hop message passing graph neural networks. arXiv preprint arXiv:2205.13328 (2022)."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00207"},{"key":"e_1_3_2_2_11_1","volume-title":"European Conference on Computer Vision. 210--227","author":"Herzig R.","unstructured":"R. Herzig, A. Bar, H. Xu, G. Chechik, T. Darrell, and A. Globerson. 2020. Learning Canonical Representations for Scene Graph to Image Generation. In European Conference on Computer Vision. 210--227."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00133"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01790"},{"key":"e_1_3_2_2_14_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_2_15_1","volume-title":"Semi-Supervised Classification with Graph Convolutional Networks. In International Conference on Learning Representations (ICLR).","author":"Thomas","unstructured":"Thomas N. Kipf and Max Welling. 2017. Semi-Supervised Classification with Graph Convolutional Networks. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"crossref","unstructured":"Ranjay Krishna Yuke Zhu Oliver Groth Justin Johnson Kenji Hata Joshua Kravitz Stephanie Chen Yannis Kalantidis Li-Jia Li David A Shamma et al. 2017. Visual genome: Connecting language and vision using crowdsourced dense image annotations. International journal of computer vision Vol. 123 1 (2017) 32--73.","DOI":"10.1007\/s11263-016-0981-7"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01830"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01096"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01884"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_21"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.142"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.108300"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"e_1_3_2_2_24_1","volume-title":"Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision.","author":"Lin T. Y.","unstructured":"T. Y. Lin, M. Maire, S. Belongie, J. Hays, and C. L. Zitnick. 2014. Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00380"},{"key":"e_1_3_2_2_26_1","volume-title":"ConceptNet-a practical commonsense reasoning tool-kit. BT technology journal","author":"Liu Hugo","year":"2004","unstructured":"Hugo Liu and Push Singh. 2004. ConceptNet-a practical commonsense reasoning tool-kit. BT technology journal, Vol. 22, 4 (2004), 211--226."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_51"},{"key":"e_1_3_2_2_28_1","volume-title":"Revisiting heterophily for graph neural networks. arXiv preprint arXiv:2210.07606","author":"Luan Sitao","year":"2022","unstructured":"Sitao Luan, Chenqing Hua, Qincheng Lu, Jiaqi Zhu, Mingde Zhao, Shuyuan Zhang, Xiao-Wen Chang, and Doina Precup. 2022. Revisiting heterophily for graph neural networks. arXiv preprint arXiv:2210.07606 (2022)."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01886"},{"key":"e_1_3_2_2_30_1","volume-title":"Proc. icml","volume":"30","author":"Maas Andrew L","year":"2013","unstructured":"Andrew L Maas, Awni Y Hannun, Andrew Y Ng, et al. 2013. Rectifier nonlinearities improve neural network acoustic models. In Proc. icml, Vol. 30. Citeseer, 3."},{"key":"e_1_3_2_2_31_1","volume-title":"I","author":"Miller George","year":"1995","unstructured":"George Miller. 1995. Wordnet: a lexical database for english communications of the acm 38 (11) 3941. Niemela, I (1995)."},{"key":"e_1_3_2_2_32_1","unstructured":"Shaoqing Ren Kaiming He Ross Girshick and Jian Sun. 2015. Faster r-cnn: Towards real-time object detection with region proposal networks. In Advances in neural information processing systems. 91--99."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"crossref","unstructured":"Olga Russakovsky Jia Deng Hao Su Jonathan Krause Sanjeev Satheesh Sean Ma Zhiheng Huang Andrej Karpathy Aditya Khosla Michael Bernstein et al. 2015. Imagenet large scale visual recognition challenge. International journal of computer vision Vol. 115 3 (2015) 211--252.","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_2_34_1","volume-title":"Martin Schmitt, Hinrich Sch\u00fctze, and Volker Tresp.","author":"Sharifzadeh Sahand","year":"2021","unstructured":"Sahand Sharifzadeh, Sina Moayed Baharlou, Martin Schmitt, Hinrich Sch\u00fctze, and Volker Tresp. 2021b. Improving Scene Graph Classification by Exploiting Knowledge from Texts. arXiv preprint arXiv:2102.04760 (2021)."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i6.16636"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00377"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00678"},{"key":"e_1_3_2_2_38_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_2_39_1","volume-title":"Dynamic graph cnn for learning on point clouds. Acm Transactions On Graphics (tog)","author":"Wang Yue","year":"2019","unstructured":"Yue Wang, Yongbin Sun, Ziwei Liu, Sanjay E Sarma, Michael M Bronstein, and Justin M Solomon. 2019. Dynamic graph cnn for learning on point clouds. Acm Transactions On Graphics (tog), Vol. 38, 5 (2019), 1--12."},{"key":"e_1_3_2_2_40_1","volume-title":"Yew Soon Ong, and Zejun Ma","author":"Wei Pengfei","year":"2022","unstructured":"Pengfei Wei, Yiping Ke, Yew Soon Ong, and Zejun Ma. 2022. Adaptive Transfer Kernel Learning for Transfer Gaussian Process Regression. IEEE Transactions on Pattern Analysis and Machine Intelligence (2022)."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.330"},{"key":"e_1_3_2_2_42_1","first-page":"7689","article-title":"Joint Modeling of Visual Objects and Relations for Scene Graph Generation","volume":"34","author":"Xu Minghao","year":"2021","unstructured":"Minghao Xu, Meng Qu, Bingbing Ni, and Jian Tang. 2021. Joint Modeling of Visual Objects and Relations for Scene Graph Generation. Advances in Neural Information Processing Systems, Vol. 34 (2021), 7689--7702.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00143"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413722"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_41"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01094"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_42"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.121"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58592-1_36"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00379"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58592-1_38"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00611"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01180"},{"key":"e_1_3_2_2_54_1","volume-title":"Spectral Feature Augmentation for Graph Contrastive Learning and Beyond. arXiv preprint arXiv:2212.01026","author":"Zhang Yifei","year":"2022","unstructured":"Yifei Zhang, Hao Zhu, Zixing Song, Piotr Koniusz, and Irwin King. 2022. Spectral Feature Augmentation for Graph Contrastive Learning and Beyond. arXiv preprint arXiv:2212.01026 (2022)."},{"key":"e_1_3_2_2_55_1","volume-title":"2023 a. Prototype-based Embedding Network for Scene Graph Generation. arXiv preprint arXiv:2303.07096","author":"Zheng Chaofan","year":"2023","unstructured":"Chaofan Zheng, Xinyu Lyu, Lianli Gao, Bo Dai, and Jingkuan Song. 2023 a. Prototype-based Embedding Network for Scene Graph Generation. arXiv preprint arXiv:2303.07096 (2023)."},{"key":"e_1_3_2_2_56_1","volume-title":"2023 b. Webly Supervised Knowledge-Embedded Model for Visual Reasoning","author":"Zheng Wenbo","year":"2023","unstructured":"Wenbo Zheng, Lan Yan, Wenwen Zhang, and Fei-Yue Wang. 2023 b. Webly Supervised Knowledge-Embedded Model for Visual Reasoning. IEEE Transactions on Neural Networks and Learning Systems (2023)."},{"key":"e_1_3_2_2_57_1","volume-title":"European Conference on Computer Vision. 211--229","author":"Zhong Y.","unstructured":"Y. Zhong, L. Wang, J. Chen, D. Yu, and Y. Li. 2020. Comprehensive Image Captioning via Scene Graph Decomposition. In European Conference on Computer Vision. 211--229."}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612210","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612210","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:05:21Z","timestamp":1755821121000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612210"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":57,"alternative-id":["10.1145\/3581783.3612210","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612210","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}