{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T17:51:35Z","timestamp":1775065895171,"version":"3.50.1"},"publisher-location":"Stroudsburg, PA, USA","reference-count":0,"publisher":"Association for Computational Linguistics","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.18653\/v1\/d16-1044","type":"proceedings-article","created":{"date-parts":[[2016,12,30]],"date-time":"2016-12-30T13:17:50Z","timestamp":1483103870000},"page":"457-468","source":"Crossref","is-referenced-by-count":947,"title":["Multimodal Compact Bilinear Pooling for Visual Question Answering\n            and Visual Grounding"],"prefix":"10.18653","author":[{"given":"Akira","family":"Fukui","sequence":"first","affiliation":[]},{"given":"Dong Huk","family":"Park","sequence":"additional","affiliation":[]},{"given":"Daylen","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Anna","family":"Rohrbach","sequence":"additional","affiliation":[]},{"given":"Trevor","family":"Darrell","sequence":"additional","affiliation":[]},{"given":"Marcus","family":"Rohrbach","sequence":"additional","affiliation":[]}],"member":"1643","event":{"name":"Proceedings of the 2016 Conference on Empirical Methods in Natural\n          Language Processing","location":"Austin, Texas","start":{"date-parts":[[2016,11]]},"end":{"date-parts":[[2016,11]]}},"container-title":["Proceedings of the 2016 Conference on Empirical Methods in Natural\n          Language Processing"],"original-title":[],"deposited":{"date-parts":[[2016,12,30]],"date-time":"2016-12-30T13:18:04Z","timestamp":1483103884000},"score":1,"resource":{"primary":{"URL":"http:\/\/aclweb.org\/anthology\/D16-1044"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"references-count":0,"URL":"https:\/\/doi.org\/10.18653\/v1\/d16-1044","relation":{},"subject":[],"published":{"date-parts":[[2016]]}}}