{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,26]],"date-time":"2025-12-26T11:19:42Z","timestamp":1766747982262,"version":"3.27.0"},"reference-count":79,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,30]]},"DOI":"10.1109\/sibgrapi62404.2024.10716310","type":"proceedings-article","created":{"date-parts":[[2024,10,18]],"date-time":"2024-10-18T17:27:39Z","timestamp":1729272459000},"page":"01-06","source":"Crossref","is-referenced-by-count":3,"title":["Mastering Scene Understanding: Scene Graphs to the Rescue"],"prefix":"10.1109","author":[{"given":"Carlos","family":"Caetano","sequence":"first","affiliation":[{"name":"Instituto de Computa&#x00E7;&#x00E3;o (IC), Universidade Estadual de Campinas (UNICAMP),Campinas,Brazil"}]},{"given":"Leo Sampaio","family":"Ferraz Ribeiro","sequence":"additional","affiliation":[{"name":"Instituto de Computa&#x00E7;&#x00E3;o (IC), Universidade Estadual de Campinas (UNICAMP),Campinas,Brazil"}]},{"given":"Camila","family":"Laranjeira","sequence":"additional","affiliation":[{"name":"Universidade Federal de Minas Gerais (UFMG),Department of Computer Science (DCC),Belo Horizonte,Brazil"}]},{"given":"Gabriel Oliveira","family":"dos Santos","sequence":"additional","affiliation":[{"name":"Instituto de Computa&#x00E7;&#x00E3;o (IC), Universidade Estadual de Campinas (UNICAMP),Campinas,Brazil"}]},{"given":"Artur","family":"Barros","sequence":"additional","affiliation":[{"name":"Instituto de Computa&#x00E7;&#x00E3;o (IC), Universidade Estadual de Campinas (UNICAMP),Campinas,Brazil"}]},{"given":"Caio","family":"Petrucci","sequence":"additional","affiliation":[{"name":"Instituto de Computa&#x00E7;&#x00E3;o (IC), Universidade Estadual de Campinas (UNICAMP),Campinas,Brazil"}]},{"given":"Andreza Aparecida","family":"dos Santos","sequence":"additional","affiliation":[{"name":"Instituto de Computa&#x00E7;&#x00E3;o (IC), Universidade Estadual de Campinas (UNICAMP),Campinas,Brazil"}]},{"given":"Jo\u00e3o","family":"Macedo","sequence":"additional","affiliation":[{"name":"Universidade Federal de Minas Gerais (UFMG),Department of Computer Science (DCC),Belo Horizonte,Brazil"}]},{"given":"Gil","family":"Carvalho","sequence":"additional","affiliation":[{"name":"Instituto de Computa&#x00E7;&#x00E3;o (IC), Universidade Estadual de Campinas (UNICAMP),Campinas,Brazil"}]},{"given":"Fabricio","family":"Benevenuto","sequence":"additional","affiliation":[{"name":"Universidade Federal de Minas Gerais (UFMG),Department of Computer Science (DCC),Belo Horizonte,Brazil"}]},{"given":"Jefersson A.","family":"dos Santos","sequence":"additional","affiliation":[{"name":"School of Computer Science, University of Sheffield,Sheffield,United Kingdom"}]},{"given":"Sandra","family":"Avila","sequence":"additional","affiliation":[{"name":"Instituto de Computa&#x00E7;&#x00E3;o (IC), Universidade Estadual de Campinas (UNICAMP),Campinas,Brazil"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107256"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3195106.3195114"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00133"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01894"},{"key":"ref6","article-title":"An empirical study on leveraging scene graphs for visual question answering","author":"Zhang","year":"2019","journal-title":"BMVC"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.127052"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3137605"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01790"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19812-0_11"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01515"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00610"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00609"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2016.2577031"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00632"},{"key":"ref16","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2021","journal-title":"ICLR"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01138"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01888"},{"key":"ref19","article-title":"End-to-end object detection with transform-ers","author":"Carion","year":"2020","journal-title":"ECCV"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3268066"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01883"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995711"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_51"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.330"},{"key":"ref26","article-title":"Vrr-vg: Refocusing visually-relevant relation-ships","author":"Liang","year":"2019","journal-title":"ICCV"},{"key":"ref27","doi-asserted-by":"crossref","DOI":"10.1007\/s11263-020-01316-z","article-title":"The open images dataset v4","author":"Kuznetsovaand","year":"2020","journal-title":"IJCV"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00214"},{"key":"ref29","article-title":"SpatiaIVOC2K: A multilingual dataset of images with annotations and features for spatial relations between objects","author":"Belz","year":"2018","journal-title":"INLG"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00377"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00632"},{"journal-title":"ICML","article-title":"Learning transferable visual models from natural language supervision","year":"2021","key":"ref32"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"journal-title":"Gemini: A Family of Highly Capable Multimodal Models","year":"2024","author":"Team","key":"ref34"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.120698"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2896516"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01094"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.694"},{"key":"ref40","article-title":"Attention is all you need","author":"Vaswani","year":"2017","journal-title":"NeurIPS"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00144"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_24"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W15-2812"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123311"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-1804"},{"journal-title":"Scene graph based image retrieval- a case study on the CLEVR dataset","year":"2019","author":"Ramnath","key":"ref46"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00097"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093614"},{"journal-title":"SPAN: Learning similarity between scene graphs and images with transformers","year":"2024","author":"Cong","key":"ref49"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/3664816"},{"key":"ref51","article-title":"Generative adversarial nets","author":"Goodfellow","year":"2014","journal-title":"NeurIPS"},{"journal-title":"Conditional generative adversarial nets","year":"2014","author":"Mirza","key":"ref52"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00466"},{"key":"ref54","article-title":"PasteGAN: A Semi-Parametric Method to Generate Image from Scene Graph","author":"Li","year":"2019","journal-title":"NeurIPS"},{"key":"ref55","article-title":"Interactive image generation using scene graphs","author":"Mittal","year":"2019","journal-title":"Journal of King Saud University"},{"key":"ref56","article-title":"Heuristics for image generation from scene graphs","volume-title":"ICLR Workshop","author":"Tripathi","year":"2019"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00094"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58574-7_13"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"journal-title":"Diffusion-based scene graph to image generation with masked contrastive pre-training","year":"2022","author":"Yang","key":"ref60"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i4.28155"},{"key":"ref62","article-title":"Imagine that! abstract-to-intricate text-to-image synthesis with scene graph hallucination diffusion","author":"Wu","year":"2024","journal-title":"NeurIPS"},{"key":"ref63","article-title":"Zero-shot text-to-image generation","author":"Ramesh","year":"2021","journal-title":"ICML"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01018"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00093"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01458-8"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2022.104617"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_25"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01307"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3330304"},{"journal-title":"Generating Natural Language Explanations for Visual Question Answering using Scene Graphs and Visual Attention","year":"2019","author":"Ghosh","key":"ref71"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2017.05.001"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"journal-title":"Understanding the Role of Scene Graphs in Visual Question Answering","year":"2021","author":"Damodaran","key":"ref74"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482218"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/ICME52920.2022.9859766"},{"journal-title":"SA- VQA: Structured alignment of vi-sual and semantic representations for visual question answering","year":"2022","author":"Xiong","key":"ref77"},{"key":"ref78","article-title":"PaLI: A jointly-scaled multilingual language-image model","author":"Chen","year":"2023","journal-title":"ICLR"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01973"}],"event":{"name":"2024 37th SIBGRAPI Conference on Graphics, Patterns and Images (SIBGRAPI)","start":{"date-parts":[[2024,9,30]]},"location":"Manaus, Brazil","end":{"date-parts":[[2024,10,3]]}},"container-title":["2024 37th SIBGRAPI Conference on Graphics, Patterns and Images (SIBGRAPI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10716261\/10716262\/10716310.pdf?arnumber=10716310","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,19]],"date-time":"2024-10-19T04:53:16Z","timestamp":1729313596000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10716310\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,30]]},"references-count":79,"URL":"https:\/\/doi.org\/10.1109\/sibgrapi62404.2024.10716310","relation":{},"subject":[],"published":{"date-parts":[[2024,9,30]]}}}