{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:27:27Z","timestamp":1750307247987,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","license":[{"start":{"date-parts":[[2012,6,5]],"date-time":"2012-06-05T00:00:00Z","timestamp":1338854400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2012,6,5]]},"DOI":"10.1145\/2324796.2324843","type":"proceedings-article","created":{"date-parts":[[2012,7,3]],"date-time":"2012-07-03T11:53:15Z","timestamp":1341316395000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":18,"title":["Joint audio-visual bi-modal codewords for video event detection"],"prefix":"10.1145","author":[{"given":"Guangnan","family":"Ye","sequence":"first","affiliation":[{"name":"Columbia University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"I-Hong","family":"Jhuo","sequence":"additional","affiliation":[{"name":"National Taiwan University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dong","family":"Liu","sequence":"additional","affiliation":[{"name":"Columbia University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu-Gang","family":"Jiang","sequence":"additional","affiliation":[{"name":"Fudan University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"D. T.","family":"Lee","sequence":"additional","affiliation":[{"name":"National Taiwan University and National Chung Hsing University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shih-Fu","family":"Chang","sequence":"additional","affiliation":[{"name":"Columbia University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2012,6,5]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"http:\/\/en.wikipedia.org\/wiki\/P-value.  http:\/\/en.wikipedia.org\/wiki\/P-value."},{"key":"e_1_3_2_1_2_1","unstructured":"http:\/\/www.nist.gov\/itl\/iad\/mig\/med11.cfm\/.  http:\/\/www.nist.gov\/itl\/iad\/mig\/med11.cfm\/."},{"key":"e_1_3_2_1_3_1","volume-title":"TRECVID 2011. In TRECVID Workshop","author":"Bao L.","year":"2011","unstructured":"L. Bao , TRECVID 2011. In TRECVID Workshop , 2011 . L. Bao, et al. Informedia @ TRECVID 2011. In TRECVID Workshop, 2011."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2003.1206512"},{"key":"e_1_3_2_1_5_1","volume-title":"ICML","author":"Boureau Y.-L.","year":"2010","unstructured":"Y.-L. Boureau , J. Ponce , and Y. Lecun . A theoretical analysis of feature pooling in visual recognition . In ICML , 2010 . Y.-L. Boureau, J. Ponce, and Y. Lecun. A theoretical analysis of feature pooling in visual recognition. In ICML, 2010."},{"key":"e_1_3_2_1_6_1","volume-title":"ECCV","author":"Csurka G.","year":"2004","unstructured":"G. Csurka , C. Dance , L. Fan , J. Willamowski , and C. Bray . Visual categorization with bags of keypoints . In ECCV , 2004 . G. Csurka, C. Dance, L. Fan, J. Willamowski, and C. Bray. Visual categorization with bags of keypoints. In ECCV, 2004."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2006.886263"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/502512.502550"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1631272.1631277"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2072298.2072316"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/1991996.1992025"},{"key":"e_1_3_2_1_12_1","volume-title":"NIST TRECVID Workshop","author":"Jiang Y.-G.","year":"2010","unstructured":"Y.-G. Jiang , multimedia event detection: Combining multiple modalities, contextual concepts, and temporal matching . In NIST TRECVID Workshop , 2010 . Y.-G. Jiang, et al. Columbia-ucf trecvid2010 multimedia event detection: Combining multiple modalities, contextual concepts, and temporal matching. In NIST TRECVID Workshop, 2010."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-005-1838-7"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995729"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.5555\/1394399"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000027790.02288.f2"},{"key":"e_1_3_2_1_18_1","volume-title":"BBN VISER TRECVID 2011 Multimedia Event Detection System. In In NIST TRECVID Workshop","author":"Natarajan P.","year":"2011","unstructured":"P. Natarajan BBN VISER TRECVID 2011 Multimedia Event Detection System. In In NIST TRECVID Workshop , 2011 . P. Natarajan et al. BBN VISER TRECVID 2011 Multimedia Event Detection System. In In NIST TRECVID Workshop, 2011."},{"key":"e_1_3_2_1_19_1","volume-title":"WWW","author":"Pan S.","year":"2010","unstructured":"S. Pan , X. Nu , J. T. Sun , Q. Yang , and Z. Chen . Co-clustering documents and words using bipartite spectral graph partitioning . In WWW , 2010 . S. Pan, X. Nu, J. T. Sun, Q. Yang, and Z. Chen. Co-clustering documents and words using bipartite spectral graph partitioning. In WWW, 2010."},{"key":"e_1_3_2_1_20_1","volume-title":"Spectral analysis and identification of Dutch vowels in monosyllabic words. Doctoral dissertion","author":"Pols L.","year":"1966","unstructured":"L. Pols . Spectral analysis and identification of Dutch vowels in monosyllabic words. Doctoral dissertion , Free University , Amsterdam , 1966 . L. Pols. Spectral analysis and identification of Dutch vowels in monosyllabic words. Doctoral dissertion, Free University, Amsterdam, 1966."},{"key":"e_1_3_2_1_21_1","volume-title":"Issues in visual and audio-visual speech processing","author":"Potamianos G.","year":"2004","unstructured":"G. Potamianos , C. Neti , J. Luettin , and I. Matthews . Audio-visual automatic speech recognition: an overview . In Issues in visual and audio-visual speech processing , 2004 . G. Potamianos, C. Neti, J. Luettin, and I. Matthews. Audio-visual automatic speech recognition: an overview. In Issues in visual and audio-visual speech processing, 2004."},{"key":"e_1_3_2_1_22_1","volume-title":"CVPR","author":"Ye G.","year":"2012","unstructured":"G. Ye , D. Liu , I.-H. Jhuo , and S.-F. Chang . Robust late fusion with rank minimization . In CVPR , 2012 . G. Ye, D. Liu, I.-H. Jhuo, and S.-F. Chang. Robust late fusion with rank minimization. In CVPR, 2012."}],"event":{"name":"ICMR '12: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Hong Kong China","acronym":"ICMR '12"},"container-title":["Proceedings of the 2nd ACM International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2324796.2324843","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2324796.2324843","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T10:52:09Z","timestamp":1750243929000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2324796.2324843"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,6,5]]},"references-count":22,"alternative-id":["10.1145\/2324796.2324843","10.1145\/2324796"],"URL":"https:\/\/doi.org\/10.1145\/2324796.2324843","relation":{},"subject":[],"published":{"date-parts":[[2012,6,5]]},"assertion":[{"value":"2012-06-05","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}