{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:27:09Z","timestamp":1750220829884,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":11,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,10,14]],"date-time":"2019-10-14T00:00:00Z","timestamp":1571011200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,10,14]]},"DOI":"10.1145\/3340555.3356101","type":"proceedings-article","created":{"date-parts":[[2019,10,17]],"date-time":"2019-10-17T12:49:48Z","timestamp":1571316588000},"page":"536-539","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Spotting Visual Keywords from Temporal Sliding Windows"],"prefix":"10.1145","author":[{"given":"Yue","family":"Yao","sequence":"first","affiliation":[{"name":"Australian National University, Australia"}]},{"given":"Tianyu","family":"Wang","sequence":"additional","affiliation":[{"name":"Australian National University, Australia"}]},{"given":"Heming","family":"Du","sequence":"additional","affiliation":[{"name":"Australian National University, Australia"}]},{"given":"Liang","family":"Zheng","sequence":"additional","affiliation":[{"name":"Australian National University, Australia"}]},{"given":"Tom","family":"Gedeon","sequence":"additional","affiliation":[{"name":"Australian National University, Australia"}]}],"member":"320","published-online":{"date-parts":[[2019,10,14]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Deep audio-visual speech recognition","author":"Afouras Triantafyllos","year":"2018","unstructured":"Triantafyllos Afouras , Joon\u00a0Son Chung , Andrew Senior , Oriol Vinyals , and Andrew Zisserman . 2018. Deep audio-visual speech recognition . IEEE transactions on pattern analysis and machine intelligence ( 2018 ). Triantafyllos Afouras, Joon\u00a0Son Chung, Andrew Senior, Oriol Vinyals, and Andrew Zisserman. 2018. Deep audio-visual speech recognition. IEEE transactions on pattern analysis and machine intelligence (2018)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00124"},{"key":"e_1_3_2_1_4_1","volume-title":"Asian Conference on Computer Vision. Springer, 87\u2013103","author":"Chung Joon\u00a0Son","year":"2016","unstructured":"Joon\u00a0Son Chung and Andrew Zisserman . 2016 . Lip reading in the wild . In Asian Conference on Computer Vision. Springer, 87\u2013103 . Joon\u00a0Son Chung and Andrew Zisserman. 2016. Lip reading in the wild. In Asian Conference on Computer Vision. Springer, 87\u2013103."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.2229005"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10844-016-0438-z"},{"key":"e_1_3_2_1_7_1","unstructured":"Christoph Feichtenhofer Haoqi Fan Jitendra Malik and Kaiming He. 2018. SlowFast Networks for Video Recognition. arXiv preprint arXiv:1812.03982(2018).  Christoph Feichtenhofer Haoqi Fan Jitendra Malik and Kaiming He. 2018. SlowFast Networks for Video Recognition. arXiv preprint arXiv:1812.03982(2018)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-85"},{"key":"e_1_3_2_1_9_1","volume-title":"Zero-Shot Keyword Spotting for Visual Speech Recognition In-the-wild. In European Conference on Computer Vision. Springer, 536\u2013552","author":"Stafylakis Themos","year":"2018","unstructured":"Themos Stafylakis and Georgios Tzimiropoulos . 2018 . Zero-Shot Keyword Spotting for Visual Speech Recognition In-the-wild. In European Conference on Computer Vision. Springer, 536\u2013552 . Themos Stafylakis and Georgios Tzimiropoulos. 2018. Zero-Shot Keyword Spotting for Visual Speech Recognition In-the-wild. In European Conference on Computer Vision. Springer, 536\u2013552."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.617"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2019.8756582"}],"event":{"name":"ICMI '19: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","acronym":"ICMI '19","location":"Suzhou China"},"container-title":["2019 International Conference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3340555.3356101","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3340555.3356101","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:13:29Z","timestamp":1750202009000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3340555.3356101"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,10,14]]},"references-count":11,"alternative-id":["10.1145\/3340555.3356101","10.1145\/3340555"],"URL":"https:\/\/doi.org\/10.1145\/3340555.3356101","relation":{},"subject":[],"published":{"date-parts":[[2019,10,14]]},"assertion":[{"value":"2019-10-14","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}