{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T07:01:00Z","timestamp":1760598060234,"version":"3.37.3"},"reference-count":18,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,5]],"date-time":"2021-07-05T00:00:00Z","timestamp":1625443200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,5]],"date-time":"2021-07-05T00:00:00Z","timestamp":1625443200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,5]]},"DOI":"10.1109\/icme51207.2021.9428215","type":"proceedings-article","created":{"date-parts":[[2021,6,9]],"date-time":"2021-06-09T21:14:21Z","timestamp":1623273261000},"page":"1-6","source":"Crossref","is-referenced-by-count":8,"title":["Attention-Based Relation Reasoning Network for Video-Text Retrieval"],"prefix":"10.1109","author":[{"given":"Ni","family":"Wang","sequence":"first","affiliation":[{"name":"University of Electronic Science and Technology of China,Center for Future Media &#x0026; School of Computer Science and Engineering,China"}]},{"given":"Zheng","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China,Center for Future Media &#x0026; School of Computer Science and Engineering,China"}]},{"given":"Xing","family":"Xu","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China,Center for Future Media &#x0026; School of Computer Science and Engineering,China"}]},{"given":"Fumin","family":"Shen","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China,Center for Future Media &#x0026; School of Computer Science and Engineering,China"}]},{"given":"Yang","family":"Yang","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China,Center for Future Media &#x0026; School of Computer Science and Engineering,China"}]},{"given":"Heng Tao","family":"Shen","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China,Center for Future Media &#x0026; School of Computer Science and Engineering,China"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_49"},{"key":"ref11","article-title":"Order-embeddings of images and language","author":"vendrov","year":"2016","journal-title":"ICLRE"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654902"},{"key":"ref13","first-page":"2121","article-title":"Devise: A deep visual-semantic embedding model","author":"frome","year":"2013","journal-title":"NeurIPS"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46604-0_46"},{"key":"ref15","article-title":"Unifying visual-semantic embeddings with multimodal neural language models","author":"kiros","year":"2014","journal-title":"CoRR"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.502"},{"key":"ref18","article-title":"Collecting highly parallel data for paraphrase evaluation","author":"chen","year":"2011","journal-title":"ACL"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_29"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401149"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00208"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s13735-018-00166-3"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01065"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00957"},{"key":"ref2","first-page":"1571","article-title":"Bilinear attention networks","author":"kim","year":"2018","journal-title":"NeurIPS"},{"key":"ref9","first-page":"4967","article-title":"A simple neural network module for relational reasoning","author":"santoro","year":"2017","journal-title":"NeurIPS"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.209"}],"event":{"name":"2021 IEEE International Conference on Multimedia and Expo (ICME)","start":{"date-parts":[[2021,7,5]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,7,9]]}},"container-title":["2021 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9428049\/9428068\/09428215.pdf?arnumber=9428215","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T21:27:28Z","timestamp":1656365248000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9428215\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,5]]},"references-count":18,"URL":"https:\/\/doi.org\/10.1109\/icme51207.2021.9428215","relation":{},"subject":[],"published":{"date-parts":[[2021,7,5]]}}}