{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T01:44:48Z","timestamp":1773193488248,"version":"3.50.1"},"reference-count":61,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001691","name":"Ministry of Education, Culture, Sports, Science and Technology (MEXT), Japan, through Grants-in-Aid for Scientific Research","doi-asserted-by":"publisher","award":["JP21H03519"],"award-info":[{"award-number":["JP21H03519"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001691","name":"Ministry of Education, Culture, Sports, Science and Technology (MEXT), Japan, through Grants-in-Aid for Scientific Research","doi-asserted-by":"publisher","award":["JP23K16945"],"award-info":[{"award-number":["JP23K16945"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/access.2024.3360113","type":"journal-article","created":{"date-parts":[[2024,1,30]],"date-time":"2024-01-30T18:49:47Z","timestamp":1706640587000},"page":"17499-17512","source":"Crossref","is-referenced-by-count":4,"title":["Image-Collection Summarization Using Scene-Graph Generation With External Knowledge"],"prefix":"10.1109","volume":"12","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1295-777X","authenticated-orcid":false,"given":"Itthisak","family":"Phueaksri","sequence":"first","affiliation":[{"name":"Graduate School of Informatics, Nagoya University, Aichi, Nagoya, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9193-5973","authenticated-orcid":false,"given":"Marc A.","family":"Kastner","sequence":"additional","affiliation":[{"name":"Graduate School of Informatics, Kyoto University, Kyoto, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3799-4550","authenticated-orcid":false,"given":"Yasutomo","family":"Kawanishi","sequence":"additional","affiliation":[{"name":"Graduate School of Informatics, Nagoya University, Aichi, Nagoya, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3041-4330","authenticated-orcid":false,"given":"Takahiro","family":"Komamizu","sequence":"additional","affiliation":[{"name":"Graduate School of Informatics, Nagoya University, Aichi, Nagoya, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3942-9296","authenticated-orcid":false,"given":"Ichiro","family":"Ide","sequence":"additional","affiliation":[{"name":"Graduate School of Informatics, Nagoya University, Aichi, Nagoya, Japan"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3137605"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3229654"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00097"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_38"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i4.16431"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00350"},{"key":"ref7","article-title":"Multi-image summarization: Textual summary from a set of cohesive images","author":"Trieu","year":"2020","journal-title":"arXiv:2006.08686"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-27077-2_14"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3332098"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1101"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6455"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2004.841694"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58568-6_13"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17281"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2008.01.039"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58592-1_36"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3186727"},{"key":"ref20","first-page":"7689","article-title":"Joint modeling of visual objects and relations for scene graph generation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Xu"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-86520-7_29"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11164"},{"key":"ref23","first-page":"1","article-title":"The PageRank citation ranking: Bringing order to the web","volume-title":"Proc. WWW","author":"Page"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018909"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/5.58325"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2006.05.022"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-016-3840-1"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1049\/iet-cvi.2017.0568"},{"key":"ref30","volume-title":"Introduction To Graph Theory","author":"Trudeau","year":"1993"},{"key":"ref31","article-title":"Image scene graph generation (SGG) benchmark","author":"Han","year":"2021","journal-title":"arXiv:2107.12604"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01791"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"ref34","first-page":"504","article-title":"Are scene graphs good enough to improve image captioning?","volume-title":"Proc. ACL-IJCNLP","author":"Milewski"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00611"},{"key":"ref36","first-page":"91","article-title":"Faster R-CNN: Towards real-time object detection with region proposal networks","volume-title":"Proc. Int. Conf. Adv. Neural Inf. Process. Syst.","volume":"28","author":"Ren"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"ref39","first-page":"1","article-title":"Bidirectional long short-term memory networks for relation classification","volume-title":"Proc. EMNLP","author":"Zeng"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00678"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.330"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00377"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3268066"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-tutorials.3"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref46","article-title":"GRCNN: Graph recognition convolutional neural network for synthesizing programs from flow charts","author":"Cheng","year":"2020","journal-title":"arXiv:2011.05980"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11782"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/s11760-022-02456-0"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16333"},{"key":"ref51","first-page":"74","article-title":"ROUGE: A package for automatic evaluation of summaries","volume-title":"Proc. ACL","author":"Lin"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2015.08.044"},{"key":"ref53","first-page":"1413","article-title":"Learning mixtures of submodular functions for image collection summarization","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"27","author":"Tschiatschek"},{"key":"ref54","first-page":"1","article-title":"BERTScore: Evaluating text generation with BERT","volume-title":"Proc. ICLR","author":"Zhang"},{"key":"ref55","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","volume-title":"Proc. Conf. North Amer. Chapter Assoc. Comput. Linguistics, Hum. Lang. Technol.","volume":"1","author":"Devlin"},{"key":"ref56","first-page":"12:1","article-title":"VSE++: Improving visual-semantic embeddings with hard negatives","volume-title":"Proc. BMVC","author":"Faghri"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"ref58","first-page":"1","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. ICLR","author":"Kingma"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.5220\/0005209202710278"},{"key":"ref60","article-title":"Learning similarity between scene graphs and images with transformers","author":"Cong","year":"2023","journal-title":"arXiv:2304.00590"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01180"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/10380310\/10416832.pdf?arnumber=10416832","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,6]],"date-time":"2024-02-06T22:32:59Z","timestamp":1707258779000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10416832\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":61,"URL":"https:\/\/doi.org\/10.1109\/access.2024.3360113","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}