{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T01:01:12Z","timestamp":1730250072841,"version":"3.28.0"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["92048203"],"award-info":[{"award-number":["92048203"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,7,15]]},"DOI":"10.1109\/icmew63481.2024.10645389","type":"proceedings-article","created":{"date-parts":[[2024,8,29]],"date-time":"2024-08-29T17:43:36Z","timestamp":1724953416000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["Enhancing Visual Wake Word Spotting with Pretrained Model and Feature Balance Scaling"],"prefix":"10.1109","author":[{"given":"Xuandong","family":"Huang","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China,Hefei,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shangfei","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, University of Science and Technology of China,Hefei,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jinghao","family":"Yan","sequence":"additional","affiliation":[{"name":"Tencent Inc.,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kai","family":"Tang","sequence":"additional","affiliation":[{"name":"Tencent Inc.,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"H.","family":"Pengfei","sequence":"additional","affiliation":[{"name":"Tencent Inc.,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"LRS3-TED: a large-scale dataset for visual speech recognition","volume-title":"CoRR","volume":"abs\/1809.00496","author":"Afouras","year":"2018"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746683"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747216"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"2019","DOI":"10.21437\/Interspeech.2019-1363","article-title":"Temporal convolution for real-time keyword spotting on mobile devices","volume-title":"Interspeech 2019","author":"Choi","year":"2019"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-54184-6_6"},{"key":"ref6","article-title":"Voxceleb2: Deep speaker recognition","volume-title":"CoRR","volume":"abs\/1806.05622","author":"Chung","year":"2018"},{"key":"ref7","article-title":"Retinaface: Single-stage dense face localisation in the wild","author":"Deng","year":"2019","journal-title":"Cornell University-arXiv, Cornell University"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-74695-9_23"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02306"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414567"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053841"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/icassp43922.2022.9747025"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/icassp.1990.115555"},{"key":"ref14","first-page":"2015","article-title":"Convolutional neural networks for small-footprint keyword spotting","volume-title":"Interspeech 2015","author":"Sainath","year":"2021"},{"key":"ref15","article-title":"Learning audio-visual speech representation by masked multimodal cluster prediction","volume-title":"The Tenth International Conference on Learning Representations, ICLR 2022","author":"Shi","year":"2022"},{"key":"ref16","article-title":"LRS3-TED: a large-scale dataset for visual speech recognition","volume-title":"CoRR","volume":"abs\/1809.00496","author":"Afouras","year":"2018"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095459"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/icassp48485.2024.10446074"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/icassp.1991.150338"},{"key":"ref20","article-title":"Simam: A simple, parameter-free attention module for convolutional neural networks","volume-title":"International Conference on Machine Learning","author":"Yang","year":"2021"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/FG.2019.8756582"},{"key":"ref22","first-page":"1","article-title":"Ve-kws: Visual modality enhanced end-to-end keyword spotting","volume-title":"ICASSP 2023 \u2013 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","author":"Zhang","year":"2023"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10650"},{"key":"ref24","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2022-10650","article-title":"Audio-visual wake word spotting in misp2021 challenge: Dataset release and deep analysis","volume-title":"Interspeech","author":"Zhou","year":"2022"}],"event":{"name":"2024 IEEE International Conference on Multimedia and Expo Workshops (ICMEW)","start":{"date-parts":[[2024,7,15]]},"location":"Niagara Falls, ON, Canada","end":{"date-parts":[[2024,7,19]]}},"container-title":["2024 IEEE International Conference on Multimedia and Expo Workshops (ICMEW)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10645349\/10645352\/10645389.pdf?arnumber=10645389","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,31]],"date-time":"2024-08-31T05:21:39Z","timestamp":1725081699000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10645389\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,15]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/icmew63481.2024.10645389","relation":{},"subject":[],"published":{"date-parts":[[2024,7,15]]}}}