{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,2]],"date-time":"2025-12-02T22:37:35Z","timestamp":1764715055594,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,18]],"date-time":"2021-10-18T00:00:00Z","timestamp":1634515200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","award":["2021R1F1A1059665"],"award-info":[{"award-number":["2021R1F1A1059665"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,18]]},"DOI":"10.1145\/3461615.3485428","type":"proceedings-article","created":{"date-parts":[[2021,12,18]],"date-time":"2021-12-18T04:57:40Z","timestamp":1639803460000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Knock&amp;Tap: Classification and Localization of Knock and Tap Gestures using Deep Sound Transfer Learning"],"prefix":"10.1145","author":[{"given":"Jae-Yeop","family":"Jeong","sequence":"first","affiliation":[{"name":"Data science, Seoul National University of Science and Technology, Korea, Republic of"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jung-Hwa","family":"Kim","sequence":"additional","affiliation":[{"name":"Research Center for Data Science, Seoul National University of Science and Technology, Korea, Republic of"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ha-Yeong","family":"Yoon","sequence":"additional","affiliation":[{"name":"Department of Data Science, Seoul National University of Science and Technology, Korea, Republic of"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jin-Woo","family":"Jeong","sequence":"additional","affiliation":[{"name":"Data Science, Seoul National University of Science and Technology, Korea, Republic of"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,12,17]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Sainath Adapa. 2019. Urban Sound Tagging using Convolutional Neural Networks. arXiv:1909.12699 [cs.SD]  Sainath Adapa. 2019. Urban Sound Tagging using Convolutional Neural Networks. arXiv:1909.12699 [cs.SD]","DOI":"10.33682\/8axe-9243"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Relja Arandjelovi\u0107 and Andrew Zisserman. 2017. Look Listen and Learn. arXiv:1705.08168 [cs.CV]  Relja Arandjelovi\u0107 and Andrew Zisserman. 2017. Look Listen and Learn. arXiv:1705.08168 [cs.CV]","DOI":"10.1109\/ICCV.2017.73"},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings ofthe 30th International Conference on Neural Information Processing Systems","author":"Vondrick Carl","year":"2016","unstructured":"YusufAytar, Carl Vondrick , and Antonio Torralba . 2016 . SoundNet: Learning Sound Representations from Unlabeled Video . In Proceedings ofthe 30th International Conference on Neural Information Processing Systems ( Barcelona, Spain) (NIPS\u201916). Curran Associates Inc., Red Hook, NY, USA, 892\u2013900. YusufAytar, Carl Vondrick, and Antonio Torralba. 2016. SoundNet: Learning Sound Representations from Unlabeled Video. In Proceedings ofthe 30th International Conference on Neural Information Processing Systems (Barcelona, Spain) (NIPS\u201916). Curran Associates Inc., Red Hook, NY, USA, 892\u2013900."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2015.7280624"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3025991"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"e_1_3_2_1_8_1","volume-title":"AST: Audio Spectrogram Transformer. arXiv:2104.01778 [cs.SD]","author":"Gong Yuan","year":"2021","unstructured":"Yuan Gong , Yu-An Chung , and James Glass . 2021 . AST: Audio Spectrogram Transformer. arXiv:2104.01778 [cs.SD] Yuan Gong, Yu-An Chung, and James Glass. 2021. AST: Audio Spectrogram Transformer. arXiv:2104.01778 [cs.SD]"},{"key":"e_1_3_2_1_9_1","first-page":"3","article-title":"Knocker","volume":"3","year":"2019","unstructured":"GongTaesik, ChoHyunsung, LeeBowon, and LeeSung-Ju. 2019 . Knocker . Proceedings ofthe ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies 3 , 3 (sep 2019), 1\u201321. https:\/\/doi.org\/10.1145\/3351240 GongTaesik, ChoHyunsung, LeeBowon, and LeeSung-Ju. 2019. Knocker. Proceedings ofthe ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies 3, 3 (sep 2019), 1\u201321. https:\/\/doi.org\/10.1145\/3351240","journal-title":"Proceedings ofthe ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings ofthe 24th Annual ACMSymposium on User Interface Software andTechnology","author":"Harrison Chris","year":"2047","unstructured":"Chris Harrison , Julia Schwarz , and Scott E. Hudson . 2011. TapSense: Enhancing Finger Interaction on Touch Surfaces . In Proceedings ofthe 24th Annual ACMSymposium on User Interface Software andTechnology ( Santa Barbara, California, USA) (UIST\u201911). Association for Computing Machinery, New York, NY, USA, 627\u2013636. https:\/\/doi.org\/10.1145\/ 2047 196.2047279 Chris Harrison, Julia Schwarz, and Scott E. Hudson. 2011. TapSense: Enhancing Finger Interaction on Touch Surfaces. In Proceedings ofthe 24th Annual ACMSymposium on User Interface Software andTechnology (Santa Barbara, California, USA) (UIST\u201911). Association for Computing Machinery, New York, NY, USA, 627\u2013636. https:\/\/doi.org\/10.1145\/2047196.2047279"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2380116.2380187"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Shawn Hershey Sourish Chaudhuri Daniel P. W. Ellis Jort F. Gemmeke Aren Jansen R. Channing Moore Manoj Plakal Devin Platt Rif A. Saurous Bryan Seybold Malcolm Slaney Ron J. Weiss and Kevin Wilson. 2017. CNN Architectures for Large-Scale Audio Classification. arXiv:1609.09430 [cs.SD]  Shawn Hershey Sourish Chaudhuri Daniel P. W. Ellis Jort F. Gemmeke Aren Jansen R. Channing Moore Manoj Plakal Devin Platt Rif A. Saurous Bryan Seybold Malcolm Slaney Ron J. Weiss and Kevin Wilson. 2017. CNN Architectures for Large-Scale Audio Classification. arXiv:1609.09430 [cs.SD]","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"e_1_3_2_1_13_1","unstructured":"Jonathan J Huang and Juan Jose Alvarado Leanos. 2018. AclNet: efficient end-to-end audio classification CNN. arXiv:1811.06669 [cs.SD]  Jonathan J Huang and Juan Jose Alvarado Leanos. 2018. AclNet: efficient end-to-end audio classification CNN. arXiv:1811.06669 [cs.SD]"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.7838\/jsebs.2016.21.4.029"},{"key":"e_1_3_2_1_15_1","first-page":"53","article-title":"Extracting usability dimensions of the voice user interface: Focusing on AI assistants","volume":"15","author":"Kim Minjung","year":"2020","unstructured":"Minjung Kim , Jieun Han , Hyo-Jin Kang , and Gyu Hyun Kwon . 2020 . Extracting usability dimensions of the voice user interface: Focusing on AI assistants . Journal ofthe HCISociety ofKorea 15 , 1 (2020), 53 \u2013 64 . Minjung Kim, Jieun Han, Hyo-Jin Kang, and Gyu Hyun Kwon. 2020. Extracting usability dimensions of the voice user interface: Focusing on AI assistants. Journal ofthe HCISociety ofKorea 15, 1 (2020), 53\u201364.","journal-title":"Journal ofthe HCISociety ofKorea"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3030497"},{"key":"e_1_3_2_1_17_1","volume-title":"Plumbley","author":"Kong Qiuqiang","year":"2019","unstructured":"Qiuqiang Kong , Yin Cao , Turab Iqbal , Yong Xu , Wenwu Wang , and Mark D . Plumbley . 2019 . Cross-task learning for audio tagging, sound event detection and spatial localization: DCASE 2019 baseline systems. arXiv:1904.03476 [cs.SD] Qiuqiang Kong, Yin Cao, Turab Iqbal, Yong Xu, Wenwu Wang, and Mark D. Plumbley. 2019. Cross-task learning for audio tagging, sound event detection and spatial localization: DCASE 2019 baseline systems. arXiv:1904.03476 [cs.SD]"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3025773"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2076354.2076364"},{"key":"e_1_3_2_1_20_1","volume-title":"Knock-Knock: Acoustic Object Recognition by using Stacked Denoising Autoencoders. Neurocomputing 267 (aug","author":"Luo Shan","year":"2017","unstructured":"Shan Luo , Leqi Zhu , Kaspar Althoefer , and Hongbin Liu . 2017. Knock-Knock: Acoustic Object Recognition by using Stacked Denoising Autoencoders. Neurocomputing 267 (aug 2017 ), 18\u201324. https:\/\/doi.org\/10.1016\/j.neucom.2017.03.014 arXiv:1708.04432 Shan Luo, Leqi Zhu, Kaspar Althoefer, and Hongbin Liu. 2017. Knock-Knock: Acoustic Object Recognition by using Stacked Denoising Autoencoders. Neurocomputing 267 (aug 2017), 18\u201324. https:\/\/doi.org\/10.1016\/j.neucom.2017.03.014 arXiv:1708.04432"},{"key":"e_1_3_2_1_21_1","unstructured":"Annamaria Mesaros Toni Heittola and Tuomas Virtanen. 2018. A multi-device dataset for urban acoustic scene classification. arXiv:1807.09840 [eess.AS]  Annamaria Mesaros Toni Heittola and Tuomas Virtanen. 2018. A multi-device dataset for urban acoustic scene classification. arXiv:1807.09840 [eess.AS]"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","key":"e_1_3_2_1_23_1","unstructured":"Adam Paszke , Sam Gross , Francisco Massa , Adam Lerer , James Bradbury , Gregory Chanan , Trevor Killeen , Zeming Lin , Natalia Gimelshein , Luca Antiga , Alban Desmaison , Andreas Kopf , Edward Yang , Zachary DeVito , Martin Raison , Alykhan Tejani , Sasank Chilamkurthy , Benoit Steiner , Lu Fang , Junjie Bai , and Soumith Chintala . 2019. PyTorch: An Imperative Style , High-Performance Deep Learning Library . In Advances in Neural Information Processing Systems 32, H. Wallach, H. Larochelle, A. Beygelzimer, F. d Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.). Curran Associates, Inc., California, CA, USA, 8024\u20138035. http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32, H. Wallach, H. Larochelle, A. Beygelzimer, F. d Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.). Curran Associates, Inc., California, CA, USA, 8024\u20138035. http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806390"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3229434.3229453"},{"key":"e_1_3_2_1_26_1","unstructured":"Seeeds studio. 2021. ReSpeaker Mic Array v2.0. https:\/\/wiki.seeedstudio.com\/ReSpeaker_Mic_Array_v2.0\/  Seeeds studio. 2021. ReSpeaker Mic Array v2.0. https:\/\/wiki.seeedstudio.com\/ReSpeaker_Mic_Array_v2.0\/"},{"key":"e_1_3_2_1_27_1","volume-title":"Audacity: Free, open source, cross-platform audio software. https:\/\/www.audacityteam.org\/","author":"Team Audacity","year":"2021","unstructured":"Audacity Team . 2021 . Audacity: Free, open source, cross-platform audio software. https:\/\/www.audacityteam.org\/ Audacity Team. 2021. Audacity: Free, open source, cross-platform audio software. https:\/\/www.audacityteam.org\/"},{"key":"e_1_3_2_1_28_1","unstructured":"Jae Thomas. 2021. Knocki turns any surface into a smart remote control. https:\/\/mashable.com\/deals\/feb-2-knocki-smart-remote-control  Jae Thomas. 2021. Knocki turns any surface into a smart remote control. https:\/\/mashable.com\/deals\/feb-2-knocki-smart-remote-control"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-2756-2_7"},{"key":"e_1_3_2_1_30_1","unstructured":"Hongyi Zhang Moustapha Cisse Yann N. Dauphin and David Lopez-Paz. 2018. mixup: Beyond Empirical Risk Minimization. arXiv:1710.09412 [cs.LG]  Hongyi Zhang Moustapha Cisse Yann N. Dauphin and David Lopez-Paz. 2018. mixup: Beyond Empirical Risk Minimization. arXiv:1710.09412 [cs.LG]"}],"event":{"name":"ICMI '21: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"],"location":"Montreal QC Canada","acronym":"ICMI '21"},"container-title":["Companion Publication of the 2021 International Conference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3461615.3485428","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3461615.3485428","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T17:45:10Z","timestamp":1750268710000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3461615.3485428"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,18]]},"references-count":30,"alternative-id":["10.1145\/3461615.3485428","10.1145\/3461615"],"URL":"https:\/\/doi.org\/10.1145\/3461615.3485428","relation":{},"subject":[],"published":{"date-parts":[[2021,10,18]]},"assertion":[{"value":"2021-12-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}