{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,23]],"date-time":"2024-10-23T05:05:06Z","timestamp":1729659906725,"version":"3.28.0"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,5]],"date-time":"2021-07-05T00:00:00Z","timestamp":1625443200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,5]],"date-time":"2021-07-05T00:00:00Z","timestamp":1625443200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,5]]},"DOI":"10.1109\/icme51207.2021.9428153","type":"proceedings-article","created":{"date-parts":[[2021,6,9]],"date-time":"2021-06-09T21:14:21Z","timestamp":1623273261000},"page":"1-6","source":"Crossref","is-referenced-by-count":2,"title":["Multi-Dimensional Attentive Hierarchical Graph Pooling Network for Video-Text Retrieval"],"prefix":"10.1109","author":[{"given":"Dehao","family":"Wu","sequence":"first","affiliation":[{"name":"Peking University,Shenzhen Graduate School"}]},{"given":"Yi","family":"Li","sequence":"additional","affiliation":[{"name":"Peking University,Shenzhen Graduate School"}]},{"given":"Yinghong","family":"Zhang","sequence":"additional","affiliation":[{"name":"Peking University,Shenzhen Graduate School"}]},{"given":"Yuesheng","family":"Zhu","sequence":"additional","affiliation":[{"name":"Peking University,Shenzhen Graduate School"}]}],"member":"263","reference":[{"key":"ref10","first-page":"4805","article-title":"Hierarchical graph representation learning with differentiable pooling","author":"ying","year":"2018","journal-title":"NeurIPS"},{"key":"ref11","first-page":"2083","article-title":"Graph u-nets","volume":"97","author":"gao","year":"2019","journal-title":"ICML"},{"key":"ref12","first-page":"3734","article-title":"Self-attention graph pooling","volume":"97","author":"lee","year":"2019","journal-title":"ICML"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref14","article-title":"Unifying visual-semantic embeddings with multimodal neural language models","author":"kiros","year":"2014","journal-title":"CoRR"},{"key":"ref15","first-page":"12","article-title":"VSE++: improving visual-semantic embeddings with hard negatives","author":"faghri","year":"2018","journal-title":"BMVC"},{"key":"ref16","first-page":"19","article-title":"Learning joint embedding with multimodal cues for cross-modal video-text retrieval","author":"mithun","year":"2018","journal-title":"ICMR"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2018.2832602"},{"key":"ref18","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2019","journal-title":"NAACL-HLT"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01302"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00208"},{"key":"ref4","first-page":"279","article-title":"Use what you have: Video retrieval using representations from collaborative experts","author":"liu","year":"2019","journal-title":"BMVC"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654902"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1145\/2911996.2912015","article-title":"Event detection with zero example: select the right and sup-press the wrong concepts","author":"lu","year":"2016","journal-title":"ICMR"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_29"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00957"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/140"},{"key":"ref7","article-title":"Semi-supervised classification with graph convolutional networks","author":"kipf","year":"2017","journal-title":"ICLRE"},{"key":"ref2","article-title":"Semantic concept discovery for large-scale zero-shot event detection","author":"chang","year":"2015","journal-title":"IJCAI"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/2578726.2578746"},{"key":"ref9","first-page":"10638","article-title":"Fine-grained video-text retrieval with hierarchical graph reasoning","author":"chen","year":"2020","journal-title":"CVPR"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.571"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00468"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.502"},{"key":"ref24","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"ICLRE"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref26","article-title":"Order-embeddings of images and language","author":"vendrov","year":"2016","journal-title":"ICLRE"},{"key":"ref25","first-page":"2121","article-title":"Devise: A deep visual-semantic embedding model","author":"frome","year":"2013","journal-title":"NIPS"}],"event":{"name":"2021 IEEE International Conference on Multimedia and Expo (ICME)","start":{"date-parts":[[2021,7,5]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,7,9]]}},"container-title":["2021 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9428049\/9428068\/09428153.pdf?arnumber=9428153","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T14:18:00Z","timestamp":1725200280000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9428153\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,5]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/icme51207.2021.9428153","relation":{},"subject":[],"published":{"date-parts":[[2021,7,5]]}}}