{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T17:34:11Z","timestamp":1762277651537,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":19,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,28]],"date-time":"2022-10-28T00:00:00Z","timestamp":1666915200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"JST Moonshot R&D","award":["JPMJMS2012"],"award-info":[{"award-number":["JPMJMS2012"]}]},{"name":"JST CREST","award":["JPMJCR17A3"],"award-info":[{"award-number":["JPMJCR17A3"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,29]]},"DOI":"10.1145\/3526114.3558727","type":"proceedings-article","created":{"date-parts":[[2022,10,28]],"date-time":"2022-10-28T12:41:45Z","timestamp":1666960905000},"page":"1-3","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["AIx speed: Playback Speed Optimization using Listening Comprehension of Speech Recognition Models"],"prefix":"10.1145","author":[{"given":"Kazuki","family":"Kawamura","sequence":"first","affiliation":[{"name":"The University of Tokyo, Japan and Sony CSL Kyoto, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jun","family":"Rekimoto","sequence":"additional","affiliation":[{"name":"The University of Tokyo, Japan and Sony CSL Kyoto, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"International Conference on Learning Representations.","author":"Baevski Alexei","year":"2020","unstructured":"Alexei Baevski, Steffen Schneider, and Michael Auli. 2020. vq-wav2vec: Self-Supervised Learning of Discrete Speech Representations. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_2_1","unstructured":"Alexei Baevski Yuhao Zhou Abdelrahman Mohamed and Michael Auli. 2020. wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1518701.1518823"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCIME49369.2019.00087"},{"key":"e_1_3_2_1_5_1","volume-title":"Proc. ACM Conference on Human Factors in Computing Systems.","author":"Higuchi Keita","year":"2017","unstructured":"Keita Higuchi, Ryo Yonetani, and Yoichi Sato. 2017. EgoScanning: Quickly Scanning First-Person Videos with Egocentric Elastic Timelines. In Proc. ACM Conference on Human Factors in Computing Systems."},{"key":"e_1_3_2_1_6_1","unstructured":"Wei-Ning Hsu Benjamin Bolte Yao-Hung\u00a0Hubert Tsai Kushal Lakhotia Ruslan Salakhutdinov and Abdelrahman Mohamed. 2021. HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units. (2021)."},{"key":"e_1_3_2_1_7_1","volume-title":"Efficient Video Viewing System for Racquet Sports with Automatic Summarization Focusing on Rally Scenes. In ACM SIGGRAPH 2014 Posters.","author":"Kawamura Shunya","year":"2014","unstructured":"Shunya Kawamura, Tsukasa Fukusato, Tatsunori Hirai, and Shigeo Morishima. 2014. Efficient Video Viewing System for Racquet Sports with Automatic Summarization Focusing on Rally Scenes. In ACM SIGGRAPH 2014 Posters."},{"key":"e_1_3_2_1_8_1","volume-title":"Dynamic Object Scanning: Object-Based Elastic Timeline for Quickly Browsing First-Person Videos. In Extended Abstracts of the 2018 CHI Conference on Human Factors in Computing Systems.","author":"Kayukawa Seita","year":"2018","unstructured":"Seita Kayukawa, Keita Higuchi, Ryo Yonetani, Masanori Nakamura, Yoichi Sato, and Shigeo Morishima. 2018. Dynamic Object Scanning: Object-Based Elastic Timeline for Quickly Browsing First-Person Videos. In Extended Abstracts of the 2018 CHI Conference on Human Factors in Computing Systems."},{"key":"e_1_3_2_1_9_1","volume-title":"Proc. of the Workshop on Advanced Visual Interfaces AVI.","author":"Kurihara Kazutaka","year":"2011","unstructured":"Kazutaka Kurihara. 2011. CinemaGazer: a System for Watching Video at Very High Speed. In Proc. of the Workshop on Advanced Visual Interfaces AVI."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3375462.3375466"},{"key":"e_1_3_2_1_11_1","volume-title":"Decoupled Weight Decay Regularization. In International Conference on Learning Representations.","author":"Loshchilov Ilya","year":"2019","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled Weight Decay Regularization. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_12_1","volume-title":"International Journal for Educational Media and Technology","author":"Nagahama Toru","year":"2017","unstructured":"Toru Nagahama and Yusuke Morita. 2017. Effect Analysis of Playback Speed for Lecture Video Including Instructor Images. International Journal for Educational Media and Technology (2017)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491140.3528299"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-4009"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1873"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2702613.2732711"},{"volume-title":"Achieving Human Parity in Conversational Speech Recognition","author":"Xiong Wayne","key":"e_1_3_2_1_18_1","unstructured":"Wayne Xiong, Jasha Droppo, Xuedong Huang, Frank Seide, Michael Seltzer, Andreas Stolcke, Dong Yu, and Geoffrey Zweig. 2016. Achieving Human Parity in Conversational Speech Recognition. IEEE\/ACM Transactions on Audio, Speech, and Language Processing."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376322"}],"event":{"name":"UIST '22: The 35th Annual ACM Symposium on User Interface Software and Technology","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"],"location":"Bend OR USA","acronym":"UIST '22"},"container-title":["Adjunct Proceedings of the 35th Annual ACM Symposium on User Interface Software and Technology"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3526114.3558727","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3526114.3558727","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,4]],"date-time":"2025-11-04T17:28:25Z","timestamp":1762277305000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3526114.3558727"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,28]]},"references-count":19,"alternative-id":["10.1145\/3526114.3558727","10.1145\/3526114"],"URL":"https:\/\/doi.org\/10.1145\/3526114.3558727","relation":{},"subject":[],"published":{"date-parts":[[2022,10,28]]},"assertion":[{"value":"2022-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}