{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T20:24:07Z","timestamp":1740169447354,"version":"3.37.3"},"reference-count":163,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62303361","62302161","U1811463"],"award-info":[{"award-number":["62303361","62302161","U1811463"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004761","name":"Hainan Provincial Natural Science Foundation of China","doi-asserted-by":"publisher","award":["623QN266"],"award-info":[{"award-number":["623QN266"]}],"id":[{"id":"10.13039\/501100004761","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["WUT: 233110002"],"award-info":[{"award-number":["WUT: 233110002"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2018AAA0101502"],"award-info":[{"award-number":["2018AAA0101502"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Cybern."],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1109\/tcyb.2023.3310892","type":"journal-article","created":{"date-parts":[[2023,9,20]],"date-time":"2023-09-20T17:54:49Z","timestamp":1695232489000},"page":"2579-2591","source":"Crossref","is-referenced-by-count":2,"title":["Knowledge-Embedded Mutual Guidance for Visual Reasoning"],"prefix":"10.1109","volume":"54","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9732-3217","authenticated-orcid":false,"given":"Wenbo","family":"Zheng","sequence":"first","affiliation":[{"name":"School of Computer Science and Artificial Intelligence, Wuhan University of Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6452-9649","authenticated-orcid":false,"given":"Lan","family":"Yan","sequence":"additional","affiliation":[{"name":"College of Computer Science and Engineering, Hunan University, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8305-0198","authenticated-orcid":false,"given":"Long","family":"Chen","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Management and Control for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2334-5824","authenticated-orcid":false,"given":"Qiang","family":"Li","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Management and Control of Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9185-3989","authenticated-orcid":false,"given":"Fei-Yue","family":"Wang","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Management and Control of Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2019.00030"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2021.3050508"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2019.2956975"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2020.3029423"},{"key":"ref5","first-page":"12747","article-title":"Two heads are better than one: Hypergraph-enhanced graph reasoning for visual event ratiocination","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","volume":"139","author":"Zheng"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467285"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01246"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00686"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.470"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_4"},{"key":"ref11","first-page":"1","article-title":"Compositional attention networks for machine reasoning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Hudson"},{"key":"ref12","first-page":"91","article-title":"Faster R-CNN: Towards real-time object detection with region proposal networks","volume-title":"Advances in Neural Information Processing Systems","volume":"28","author":"Ren","year":"2015"},{"key":"ref13","first-page":"379","article-title":"R-FCN: Object detection via region-based fully convolutional networks","volume-title":"Advances in Neural Information Processing Systems","volume":"29","author":"Dai","year":"2016"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1219"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6795"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.651"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10605-2_27"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1016\/j.inffus.2020.10.007","article-title":"KM4: Visual reasoning via knowledge embedding memory model with mutual modulation","volume":"67","author":"Zheng","year":"2021","journal-title":"Inf. Fusion"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"key":"ref22","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Brown","year":"2020"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"ref24","first-page":"13","article-title":"Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Lu","year":"2019"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1514"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.469"},{"key":"ref27","first-page":"1","article-title":"VL-Bert: Pre-training of generic visual-linguistic representations","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Su"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.7005"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.269"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.12"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"ref32","first-page":"4651","article-title":"Perceiver: General perception with iterative attention","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","volume":"139","author":"Jaegle"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-022-10166-9"},{"volume-title":"Deep Learning","year":"2016","author":"Goodfellow","key":"ref34"},{"key":"ref35","doi-asserted-by":"crossref","first-page":"323","DOI":"10.1016\/j.ins.2022.03.089","article-title":"Cognitive decisions based on a rule-based fuzzy system","volume":"600","author":"Yuan","year":"2022","journal-title":"Inf. Sci."},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350923"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/tcyb.2022.3209653"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3108724"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2010.11929"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1312.4400"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1118"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00644"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403297"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3017530"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.5555\/3045118.3045336"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1038\/35039043"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-021-00301-6"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1359"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.3115\/1219044.1219075"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11573"},{"key":"ref52","first-page":"1","article-title":"Query2box: Reasoning over knowledge graphs in vector space using box embeddings","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Ren"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00632"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240523"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2676130"},{"key":"ref56","first-page":"1","article-title":"Embedding entities and relations for learning and inference in knowledge bases","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Yang"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1162\/089976604773135104"},{"key":"ref58","first-page":"1315","article-title":"From one point to a manifold: Knowledge graph embedding for precise link prediction","volume-title":"Proc. IJCAI","author":"Xiao"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00041"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1644"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-2034"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/8996.003.0015"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.97"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01039"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01045"},{"key":"ref70","first-page":"1","article-title":"Visual question answering with prior class semantics","volume-title":"Proc. CVPR","author":"Shevchenko"},{"key":"ref71","first-page":"5903","article-title":"Learning by abstraction: The neural state machine","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Hudson","year":"2019"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00070"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/114"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58589-1_25"},{"key":"ref75","first-page":"1","article-title":"Weak supervision helps emergence of word-object alignment and improves vision-language tasks","volume-title":"Proc. ECAI 2020","author":"Kervadec"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.10.016"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA52953.2021.00157"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00441"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482218"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00141"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01513"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00502"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/ICME52920.2022.9859766"},{"key":"ref84","first-page":"458","article-title":"Predicting is not understanding: Recognizing and asciitextaddressing underspecification in asciitextmachine learning","volume-title":"Computer Vision\u2013ECCV","author":"Teney","year":"2022"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i5.20536"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19945"},{"key":"ref87","first-page":"1","article-title":"Large-scale adversarial training for vision-and-language representation learning","volume-title":"Proc. NeurIPS","author":"Gan"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475637"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_8"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.707"},{"key":"ref91","first-page":"1931","article-title":"Unifying vision-and-language tasks via text generation","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","volume":"139","author":"Cho"},{"key":"ref92","first-page":"1","article-title":"VinVL: Making visual representations matter in vision-language models","volume-title":"Proc. CVPR","author":"Zhang"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.232"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.maiworkshop-1.11"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.199"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475637"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00502"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00192"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00180"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3105284"},{"key":"ref101","first-page":"1","article-title":"ProTo: Program-guided transformer for program-guided tasks","volume-title":"Proc. 35th Conf. Neural Inf. Process. Syst.","author":"Zhao"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00972"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00516"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3139234"},{"key":"ref105","first-page":"1","article-title":"RelViT: Concept-guided vision transformer for visual relational reasoning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Ma"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.93"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.516"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01278"},{"key":"ref109","article-title":"LST: Ladder side-tuning for parameter and memory efficient transfer learning","volume-title":"Advances in Neural Information Processing Systems","author":"Sung","year":"2022"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.683"},{"key":"ref111","first-page":"5583","article-title":"ViLT: Vision-and-language transformer without convolution or region supervision","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","volume":"139","author":"Kim"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.240"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.420"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.42"},{"key":"ref115","first-page":"1","article-title":"SimVLM: Simple visual language model pretraining with weak supervision","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Wang"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-acl.118"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01763"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462838"},{"key":"ref119","first-page":"1","article-title":"Align before fuse: Vision and language representation learning with momentum distillation","volume-title":"Proc. NeurIPS","author":"Li"},{"key":"ref120","first-page":"1","article-title":"Probing inter-modality: visual parsing with self-attention for vision-language pre-training","volume-title":"Proc. 35th Conf. Neural Inf. Process. Syst.","author":"Xue"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52688.2022.01599"},{"key":"ref122","first-page":"1","article-title":"Efficient vision-language pretraining with visual concepts and hierarchical alignment","volume-title":"Proc. 33rd Brit. Mach. Vis. Conf. (BMVC)","author":"Shukor"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR56361.2022.9956616"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01520"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01522"},{"key":"ref126","first-page":"395","article-title":"Grit-VLP: Grouped mini-batch sampling for asciitextefficient vision and asciitextlanguage pre-training","volume-title":"Computer Vision\u2014ECCV","author":"Byun","year":"2022"},{"key":"ref127","first-page":"1","article-title":"Masked vision and language modeling for multi-modal representation learning","volume-title":"Proc. 11th Int. Conf. Learn. Represent.","author":"Kwon"},{"key":"ref128","first-page":"1","article-title":"BLIP: Bootstrapping language-image pre-training for unified vision-language understanding and generation","volume-title":"Proc. ICML","author":"Li"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350962"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_51"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58601-0_14"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00611"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.331"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.454"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12271"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.330"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01258-8_3"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_41"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240611"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00838"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00527"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00058"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-020-01353-8"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-33676-9_23"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/82"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00471"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00182"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01180"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2979270"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00380"},{"journal-title":"IEEE Trans. Image Process.","article-title":"Dual ResGCN for balanced scene graphgeneration","author":"Zhang","key":"ref151"},{"key":"ref152","first-page":"248","article-title":"Towards overcoming false positives in visual relationship detection","volume-title":"Proc. 32nd Brit. Mach. Vis. Conf.","author":"Jin"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00244"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413722"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58592-1_36"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01138"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01096"},{"key":"ref158","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01563"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00377"},{"key":"ref160","first-page":"1","article-title":"Joint modeling of visual objects and relations for scene graph generation","volume-title":"Proc. 35th Conf. Neural Inf. Process. Syst.","author":"Xu"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01887"},{"key":"ref162","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01885"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00678"}],"container-title":["IEEE Transactions on Cybernetics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6221036\/10474510\/10256092.pdf?arnumber=10256092","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T18:55:16Z","timestamp":1732906516000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10256092\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4]]},"references-count":163,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tcyb.2023.3310892","relation":{},"ISSN":["2168-2267","2168-2275"],"issn-type":[{"type":"print","value":"2168-2267"},{"type":"electronic","value":"2168-2275"}],"subject":[],"published":{"date-parts":[[2024,4]]}}}