{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:26:27Z","timestamp":1750307187929,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":17,"publisher":"ACM","license":[{"start":{"date-parts":[[2011,11,28]],"date-time":"2011-11-28T00:00:00Z","timestamp":1322438400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2011,11,28]]},"DOI":"10.1145\/2072298.2071951","type":"proceedings-article","created":{"date-parts":[[2011,12,5]],"date-time":"2011-12-05T17:50:15Z","timestamp":1323107415000},"page":"1113-1116","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Fusing object detection and region appearance for image-text alignment"],"prefix":"10.1145","author":[{"given":"Luca","family":"Del Pero","sequence":"first","affiliation":[{"name":"University of Arizona, Tucson, AZ, USA"}]},{"given":"Philip","family":"Lee","sequence":"additional","affiliation":[{"name":"University of Arizona, Tucson, AZ, USA"}]},{"given":"James","family":"Magahern","sequence":"additional","affiliation":[{"name":"University of Arizona, Tucson, AZ, USA"}]},{"given":"Emily","family":"Hartley","sequence":"additional","affiliation":[{"name":"University of Arizona, Tucson, AZ, USA"}]},{"given":"Kobus","family":"Barnard","sequence":"additional","affiliation":[{"name":"University of Arizona, Tucson, AZ, USA"}]},{"given":"Ping","family":"Wang","sequence":"additional","affiliation":[{"name":"ObjectVideo, Reston, VA, USA"}]},{"given":"Atul","family":"Kanaujia","sequence":"additional","affiliation":[{"name":"ObjectVideo, Reston, VA, USA"}]},{"given":"Niels","family":"Haering","sequence":"additional","affiliation":[{"name":"ObjectVideo, Reston, VA, USA"}]}],"member":"320","published-online":{"date-parts":[[2011,11,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1177\/016555159702300403"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.5555\/944919.944965"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2007.383224"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-007-0068-6"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2001.937654"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-24670-1_27"},{"key":"e_1_3_2_1_7_1","series-title":"LNCS","first-page":"452","volume-title":"ECCV","author":"Deselaers T.","year":"2010","unstructured":"T. Deselaers , B. Alexe , , and V. Ferrari . Localizing objects while learning their appearance . In ECCV , volume 6314 of LNCS , pages 452 -- 466 . Springer , 2010 . T. Deselaers, B. Alexe, , and V. Ferrari. Localizing objects while learning their appearance. In ECCV, volume 6314 of LNCS, pages 452--466. Springer, 2010."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2009.03.008"},{"key":"e_1_3_2_1_9_1","unstructured":"C. Fellbaum P. G. A. Miller R. Tengi and P. Wakefield. Wordnet - a lexical database for english.  C. Fellbaum P. G. A. Miller R. Tengi and P. Wakefield. Wordnet - a lexical database for english."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2009.167"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-88682-2_3"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1101149.1101305"},{"key":"e_1_3_2_1_13_1","volume-title":"NIPS","author":"Lavrenko V.","year":"2003","unstructured":"V. Lavrenko , R. Manmatha , and J. Jeon . A model for learning the semantics of pictures . In NIPS , 2003 . V. Lavrenko, R. Manmatha, and J. Jeon. A model for learning the semantics of pictures. In NIPS, 2003."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.868688"},{"key":"e_1_3_2_1_15_1","volume-title":"Combining multiple classifiers by averaging or by multiplying?","author":"Tax D. M.","year":"2000","unstructured":"D. M. Tax , M. V. Breukelen , R. P. Duin , and J. Kittler . Combining multiple classifiers by averaging or by multiplying? , 2000 . D. M. Tax, M. V. Breukelen, R. P. Duin, and J. Kittler. Combining multiple classifiers by averaging or by multiplying?, 2000."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1743384.1743476"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2006.301"}],"event":{"name":"MM '11: ACM Multimedia Conference","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Scottsdale Arizona USA","acronym":"MM '11"},"container-title":["Proceedings of the 19th ACM international conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2072298.2071951","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2072298.2071951","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T10:05:49Z","timestamp":1750241149000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2072298.2071951"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2011,11,28]]},"references-count":17,"alternative-id":["10.1145\/2072298.2071951","10.1145\/2072298"],"URL":"https:\/\/doi.org\/10.1145\/2072298.2071951","relation":{},"subject":[],"published":{"date-parts":[[2011,11,28]]},"assertion":[{"value":"2011-11-28","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}