{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,27]],"date-time":"2026-05-27T17:48:20Z","timestamp":1779904100019,"version":"3.53.1"},"reference-count":191,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"publisher","award":["DE190100626"],"award-info":[{"award-number":["DE190100626"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61973250"],"award-info":[{"award-number":["61973250"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62073218"],"award-info":[{"award-number":["62073218"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61906109"],"award-info":[{"award-number":["61906109"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61972315"],"award-info":[{"award-number":["61972315"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Natural Science Outstanding Youth Fund of Shandong Province","award":["ZR2021YQ44"],"award-info":[{"award-number":["ZR2021YQ44"]}]},{"name":"Shaanxi Provincial Department of Education serves local scientific research","award":["19JC038"],"award-info":[{"award-number":["19JC038"]}]},{"name":"The Key Research and Development Program of Shaanxi","award":["2021GY-077"],"award-info":[{"award-number":["2021GY-077"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2023,1,1]]},"DOI":"10.1109\/tpami.2021.3137605","type":"journal-article","created":{"date-parts":[[2021,12,23]],"date-time":"2021-12-23T23:01:49Z","timestamp":1640300509000},"page":"1-26","source":"Crossref","is-referenced-by-count":278,"title":["A Comprehensive Survey of Scene Graphs: Generation and Application"],"prefix":"10.1109","volume":"45","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7778-8807","authenticated-orcid":false,"given":"Xiaojun","family":"Chang","sequence":"first","affiliation":[{"name":"ReLER Lab, AAII, Faculty of Engineering and Information Technology, University of Technology Sydney, Ultimo, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9965-8088","authenticated-orcid":false,"given":"Pengzhen","family":"Ren","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Northwest University, Xi&#x0027;an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8701-2669","authenticated-orcid":false,"given":"Pengfei","family":"Xu","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Northwest University, Xi&#x0027;an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9642-8009","authenticated-orcid":false,"given":"Zhihui","family":"Li","sequence":"additional","affiliation":[{"name":"Shandong Artificial Intelligence Institute, Qilu University of Technology (Shandong Academy of Sciences), Jinan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaojiang","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Northwest University, Xi&#x0027;an, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alex","family":"Hauptmann","sequence":"additional","affiliation":[{"name":"School of Computer Science, Carnegie Mellon University, Pittsburgh, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref170","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093614"},{"key":"ref172","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00402"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58529-7_32"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351040"},{"key":"ref173","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01050"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313681"},{"key":"ref175","article-title":"An empirical study on leveraging scene graphs for visual question answering","author":"zhang","year":"2019","journal-title":"Proc Brit Mach Vis Conf"},{"key":"ref178","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2019.00279"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-020-01295-1"},{"key":"ref168","article-title":"Scene graph based image retrieval&#x2013;A case study on the CLEVR dataset","author":"ramnath","year":"2019"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00097"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.3115\/1119176.1119206"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-1030"},{"key":"ref33","first-page":"558","article-title":"LinkNet: Relational embedding for scene graph","author":"woo","year":"2018","journal-title":"Proc 32nd Int Conf Neural Inf Process Syst"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01138"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.330"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.142"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.179"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_8"},{"key":"ref35","first-page":"109","article-title":"Efficient inference in fully connected CRFs with gaussian edge potentials","author":"kr\u00e4henb\u00fchl","year":"2011","journal-title":"Proc 24th Int Conf Neural Inf Process Syst"},{"key":"ref34","first-page":"282","article-title":"Conditional random fields: Probabilistic models for segmenting and labeling sequence data","author":"lafferty","year":"2001","journal-title":"Proc 18th Int Conf Mach Learn"},{"key":"ref181","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_25"},{"key":"ref180","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00212"},{"key":"ref185","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00380"},{"key":"ref184","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"ref183","first-page":"740","article-title":"Microsoft COCO: Common objects in context","author":"lin","year":"2014","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref182","article-title":"The new data and new challenges in multimedia research","author":"thomee","year":"2015"},{"key":"ref189","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413575"},{"key":"ref188","article-title":"Tackling the unannotated: Scene graph generation with bias-reduced models","author":"wang","year":"2020","journal-title":"Proc Brit Mach Vis Conf"},{"key":"ref187","article-title":"Assisting scene graph generation with self-supervision","author":"inuganti","year":"2020"},{"key":"ref186","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00838"},{"key":"ref28","first-page":"444","article-title":"Natural language guided visual relationship detection","author":"wentong","year":"2019","journal-title":"Proc IEEE\/CVF Conf Comput Vis Pattern Recognit Workshops"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_21"},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.122"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.766"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00632"},{"key":"ref22","first-page":"2171","article-title":"Pixels to graphs by associative embedding","author":"newell","year":"2017","journal-title":"Proc 31st Int Conf Neural Inf Process Syst"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.352"},{"key":"ref24","article-title":"An interpretable model for scene graph generation","author":"zhang","year":"2018"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_20"},{"key":"ref101","first-page":"1730","article-title":"Visual relationships as functions: Enabling few-shot scene graph prediction","author":"dornadula","year":"2019","journal-title":"Proc IEEE\/CVF Conf Comput Vis Pattern Recognit Workshops"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01372"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00408"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2992222"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P15-1067"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58574-7_13"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00878"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00221"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00094"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12260"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00872"},{"key":"ref151","first-page":"381","article-title":"Learning to detect human-object interactions","author":"yu-wei","year":"2018","journal-title":"Proc IEEE Winter Conf Appl Comput Vis"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00857"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019185"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2931042"},{"key":"ref149","article-title":"Transferable interactiveness prior for human-object interaction detection","author":"li","year":"2018"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/132"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298713"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.164"},{"key":"ref56","article-title":"Learn to pay attention","author":"jetley","year":"2018","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.71"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107707"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2979270"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00207"},{"key":"ref40","first-page":"1097","article-title":"Conditional random fields for object recognition","author":"quattoni","year":"2004","journal-title":"Proc 17th Int Conf Neural Inf Process Syst"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01042"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2018.12.027"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2896516"},{"key":"ref164","first-page":"211","article-title":"Comprehensive image captioning via scene graph decomposition","author":"zhong","year":"2020","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00998"},{"key":"ref162","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01094"},{"key":"ref161","article-title":"TPsgtR: Neural-symbolic tensor product scene-graph-triplet representation for image captioning","author":"sur","year":"2019"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1145\/3195106.3195114"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3472291"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W15-2812"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3127346"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00526"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12256"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8803106"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00576"},{"key":"ref157","first-page":"3950","article-title":"PasteGAN: A semi-parametric method to generate image from scene graph","author":"yikang","year":"2019","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref158","doi-asserted-by":"crossref","first-page":"2559","DOI":"10.1109\/TVCG.2017.2759265","article-title":"Narrative collage of image collections by scene graph recombination","volume":"24","author":"fei","year":"2018","journal-title":"IEEE Trans Vis Comput Graph"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6455"},{"key":"ref46","first-page":"2787","article-title":"Translating embeddings for modeling multi-relational data","author":"antoine","year":"2013","journal-title":"Proc 26th Int Conf Neural Inf Process Syst"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995711"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.331"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v29i1.9491"},{"key":"ref42","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2015","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref41","article-title":"Scene graph generation via conditional random fields","author":"cong","year":"2018"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00717"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00170"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i6.16636"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-35085-6_6"},{"key":"ref73","article-title":"On the properties of neural machine translation: Encoder-decoder approaches","author":"kyunghyun","year":"2014","journal-title":"CoRR"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00678"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00807"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1111\/coin.12202"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11164"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58601-0_14"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/ISM.2017.20"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58592-1_38"},{"key":"ref77","first-page":"2204","article-title":"Recurrent models of visual attention","author":"mnih","year":"2014","journal-title":"Proc 27th Int Conf Neural Inf Process Syst"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_41"},{"key":"ref133","article-title":"From saturation to zero-shot visual relationship detection using local context","author":"gkanatsios","year":"2020","journal-title":"Proc 31st Brit Mach Vis Virt Conf"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3414025"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01025"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2013.2272919"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2018.00181"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P15-1107"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/82"},{"key":"ref135","article-title":"Graph density-aware losses for novel compositions in scene graph generation","author":"knyazev","year":"2020","journal-title":"Proc Brit Mach Vis Conf"},{"key":"ref138","article-title":"A tutorial on energy-based learning","volume":"1","author":"lecun","year":"2006","journal-title":"Predicting Structured Data"},{"key":"ref137","first-page":"6000","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc 31st Int Conf Neural Inf Process Syst"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00217"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00133"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.555"},{"key":"ref63","first-page":"21","article-title":"SSD: Single shot MultiBox detector","author":"wei","year":"2016","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.101"},{"key":"ref140","article-title":"Interactive image generation using scene graphs","author":"mittal","year":"2019","journal-title":"CoRR"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2017.2736553"},{"key":"ref141","article-title":"Using scene graph context to improve image generation","author":"subarna","year":"2019","journal-title":"CoRR"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.95"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123311"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"ref143","article-title":"Scene graph reasoning with prior visual relationship for visual question answering","author":"yang","year":"2018"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00611"},{"key":"ref144","article-title":"Generating natural language explanations for visual question answering using scene graphs and visual attention","author":"ghosh","year":"2019"},{"key":"ref2","first-page":"76:1","article-title":"A comprehensive survey of neural architecture search: Challenges and solutions","volume":"54","author":"ren","year":"2021","journal-title":"ACM Comput Surv"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2019.00094"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2017.12.004"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.469"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3086066"},{"key":"ref95","first-page":"2014","article-title":"Learning convolutional neural networks for graphs","author":"niepert","year":"2016","journal-title":"Proc 33rd Int Conf Mach Learn"},{"key":"ref190","article-title":"Dual ResGCN for balanced scene GraphGeneration","author":"zhang","year":"2020"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01258-8_3"},{"key":"ref94","first-page":"3844","article-title":"Convolutional neural networks on graphs with fast localized spectral filtering","author":"defferrard","year":"2016","journal-title":"Proc 30th Int Conf Neural Inf Process Syst"},{"key":"ref191","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.320"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.554"},{"key":"ref93","first-page":"2702","article-title":"Discriminative embeddings of latent variable models for structured data","author":"dai","year":"2016","journal-title":"Proc 33rd Int Conf Mach Learn"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00718"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.230"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00112"},{"key":"ref91","article-title":"A simple neural network module for relational reasoning","author":"santoro","year":"2017"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6455"},{"key":"ref90","article-title":"Gated graph sequence neural networks","author":"li","year":"2016","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6783"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00471"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413722"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123380"},{"key":"ref110","article-title":"Learning visual relation priors for image-text matching and image captioning with neural scene graph generators","author":"lee","year":"2019"},{"key":"ref98","first-page":"7211","article-title":"Mapping images to scene graphs with permutation-invariant structured prediction","author":"herzig","year":"2018","journal-title":"Proc 32nd Int Conf Neural Inf Process Syst"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01228-1_25"},{"key":"ref96","first-page":"1025","article-title":"Inductive representation learning on large graphs","author":"hamilton","year":"2017","journal-title":"Proc 31st Int Conf Neural Inf Process Syst"},{"key":"ref97","article-title":"Classifying collisions with spatio-temporal action graph networks","author":"herzig","year":"2018"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2010.5509319"},{"key":"ref11","article-title":"From images to sentences through scene description graphs using commonsense reasoning and knowledge","author":"aditya","year":"2015"},{"key":"ref12","article-title":"SG2Caps: Revisiting scene graphs for image captioning","author":"tripathi","year":"2021"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1177\/02783649211056674"},{"key":"ref14","article-title":"Graphical contrastive losses for scene graph generation","author":"zhang","year":"2019"},{"key":"ref15","first-page":"3713","article-title":"Unbiased scene graph generation from biased training","author":"kaihua","year":"2020","journal-title":"Proc IEEE Conf Comput Vis Pattern Recognit"},{"key":"ref118","article-title":"Learning to generalize to new compositions in image understanding","author":"atzmon","year":"2016"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00756"},{"key":"ref117","article-title":"Efficient estimation of word representations in vector space","author":"mikolov","year":"2013","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240668"},{"key":"ref81","article-title":"Scene dynamics: Counterfactual critic multi-agent training for scene graph generation","author":"chen","year":"2018"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_51"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P15-1150"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.213"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.121"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2016.2639438"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00244"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093297"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01234"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1106"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00819"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1228"},{"key":"ref89","first-page":"1263","article-title":"Neural message passing for quantum chemistry","author":"gilmer","year":"2017","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2017.07.010"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58592-1_36"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00207"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/ICNN.1996.548916"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2005.1555942"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2008.2005605"},{"key":"ref88","article-title":"Semi-supervised classification with graph convolutional networks","author":"kipf","year":"2017","journal-title":"Proc Int Conf Learn Representations"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/9970415\/09661322.pdf?arnumber=9661322","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,26]],"date-time":"2022-12-26T19:14:39Z","timestamp":1672082079000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9661322\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,1]]},"references-count":191,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2021.3137605","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,1,1]]}}}