{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T19:17:10Z","timestamp":1776107830485,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,4]],"date-time":"2024-04-04T00:00:00Z","timestamp":1712188800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"JST Moonshot R&D Grant","award":["JPMJMS2012"],"award-info":[{"award-number":["JPMJMS2012"]}]},{"name":"JST CREST Grant","award":["JPMJCR17A3"],"award-info":[{"award-number":["JPMJCR17A3"]}]},{"name":"the commissioned research by NICT Japan","award":["JPJ012368C02901"],"award-info":[{"award-number":["JPJ012368C02901"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,4]]},"DOI":"10.1145\/3652920.3652922","type":"proceedings-article","created":{"date-parts":[[2024,5,1]],"date-time":"2024-05-01T18:10:28Z","timestamp":1714587028000},"page":"205-216","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["FastPerson: Enhancing Video-Based Learning through Video Summarization that Preserves Linguistic and Visual Contexts"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5181-320X","authenticated-orcid":false,"given":"Kazuki","family":"Kawamura","sequence":"first","affiliation":[{"name":"Rekimoto Laboratory, The University of Tokyo, JP and Kyoto Laboratory, Sony Computer Science Laboratories, inc., Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3629-2514","authenticated-orcid":false,"given":"Jun","family":"Rekimoto","sequence":"additional","affiliation":[{"name":"Rekimoto Laboratory, The University of Tokyo, JP and Kyoto Laboratory, Sony Computer Science Laboratories, inc., Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,5]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1101826.1101841"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compedu.2018.07.021"},{"key":"e_1_3_2_1_3_1","volume-title":"Proc. ICML. 195\u2013204","author":"Chrzanowski Mike","year":"2017","unstructured":"Sercan\u00a0\u00d6. Arik, Mike Chrzanowski, Adam Coates, Gregory Diamos, Andrew Gibiansky, Yongguo Kang, Xian Li, John Miller, Andrew Ng, Jonathan Raiman, Shubho Sengupta, and Mohammad Shoeybi. 2017. Deep Voice: Real-Time Neural Text-to-Speech. In Proc. ICML. 195\u2013204."},{"key":"e_1_3_2_1_4_1","volume-title":"Falcon-7b-instruct, and OpenAI Chat-GPT Models. arXiv preprint","author":"Basyal Lochan","year":"2023","unstructured":"Lochan Basyal and Mihir Sanghvi. 2023. Text Summarization Using Large Language Models: A Comparative Study of MPT-7b-instruct, Falcon-7b-instruct, and OpenAI Chat-GPT Models. arXiv preprint (2023)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.nedt.2015.12.010"},{"key":"e_1_3_2_1_6_1","first-page":"71","article-title":"Enabling a comprehensive teaching strategy: Video lectures. Journal of Information Technology Education","volume":"7","author":"Brecht H\u00a0David","year":"2008","unstructured":"H\u00a0David Brecht and Suzanne\u00a0M Ogilby. 2008. Enabling a comprehensive teaching strategy: Video lectures. Journal of Information Technology Education. Innovations in Practice 7 (2008), 71.","journal-title":"Innovations in Practice"},{"key":"e_1_3_2_1_7_1","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared\u00a0D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, 2020. Language models are few-shot learners. Proc. NeurIPS 33 (2020), 1877\u20131901.","journal-title":"Proc. NeurIPS"},{"key":"e_1_3_2_1_8_1","volume-title":"Udemy: a case study in online education and training. Revista Economic\u0103 70, 3","author":"Cetina Iuliana","year":"2018","unstructured":"Iuliana Cetina, Dumitru Goldbach, and Natalia Manea. 2018. Udemy: a case study in online education and training. Revista Economic\u0103 70, 3 (2018), 46\u201354."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCN.2013.6614209"},{"key":"e_1_3_2_1_10_1","volume-title":"Proc. UIST. 93\u2013102","author":"Chi Pei-Yu","year":"2012","unstructured":"Pei-Yu Chi, Sally Ahn, Amanda Ren, Mira Dontcheva, Wilmot Li, and Bj\u00f6rn Hartmann. 2012. MixT: automatic generation of step-by-step mixed media tutorials. In Proc. UIST. 93\u2013102."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-1012"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11234"},{"key":"e_1_3_2_1_13_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint (2018)."},{"key":"e_1_3_2_1_14_1","volume-title":"Neocognitron: A self-organizing neural network model for a mechanism of pattern recognition unaffected by shift in position. Biological cybernetics 36, 4","author":"Fukushima Kunihiko","year":"1980","unstructured":"Kunihiko Fukushima. 1980. Neocognitron: A self-organizing neural network model for a mechanism of pattern recognition unaffected by shift in position. Biological cybernetics 36, 4 (1980), 193\u2013202."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2556325.2566239"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10584-0_33"},{"key":"e_1_3_2_1_18_1","volume-title":"Deep speech: Scaling up end-to-end speech recognition. arXiv preprint","author":"Hannun Awni","year":"2014","unstructured":"Awni Hannun, Carl Case, Jared Casper, Bryan Catanzaro, Greg Diamos, Erich Elsen, Ryan Prenger, Sanjeev Satheesh, Shubho Sengupta, Adam Coates, 2014. Deep speech: Scaling up end-to-end speech recognition. arXiv preprint (2014)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/319463.319691"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.edurev.2014.05.001"},{"key":"e_1_3_2_1_21_1","volume-title":"Proc. CHI. 6536\u20136546","author":"Higuchi Keita","year":"2017","unstructured":"Keita Higuchi, Ryo Yonetani, and Yoichi Sato. 2017. Egoscanning: Quickly scanning first-person videos with egocentric elastic timelines. In Proc. CHI. 6536\u20136546."},{"key":"e_1_3_2_1_22_1","volume-title":"Long short-term memory. Neural computation 9, 8","author":"Hochreiter Sepp","year":"1997","unstructured":"Sepp Hochreiter and J\u00fcrgen Schmidhuber. 1997. Long short-term memory. Neural computation 9, 8 (1997), 1735\u20131780."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123393"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.chb.2012.01.011"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2642918.2647389"},{"key":"e_1_3_2_1_26_1","volume-title":"Backpropagation applied to handwritten zip code recognition. Neural computation 1, 4","author":"LeCun Yann","year":"1989","unstructured":"Yann LeCun, Bernhard Boser, John\u00a0S Denker, Donnie Henderson, Richard\u00a0E Howard, Wayne Hubbard, and Lawrence\u00a0D Jackel. 1989. Backpropagation applied to handwritten zip code recognition. Neural computation 1, 4 (1989), 541\u2013551."},{"key":"e_1_3_2_1_27_1","volume-title":"Proc. ACCV.","author":"Liu Yen-Ting","year":"2020","unstructured":"Yen-Ting Liu, Yu-Jhe Li, and Yu-Chiang\u00a0Frank Wang. 2020. Transforming multi-concept attention into video summarization. In Proc. ACCV."},{"key":"e_1_3_2_1_28_1","volume-title":"Chatting about ChatGPT: how may AI and GPT impact academia and libraries?Library Hi Tech News 40, 3","author":"Lund D","year":"2023","unstructured":"Brady\u00a0D Lund and Ting Wang. 2023. Chatting about ChatGPT: how may AI and GPT impact academia and libraries?Library Hi Tech News 40, 3 (2023), 26\u201329."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tate.2011.04.004"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2007.04.002"},{"key":"e_1_3_2_1_31_1","volume-title":"Multimodal abstractive summarization for how2 videos. arXiv preprint","author":"Palaskar Shruti","year":"2019","unstructured":"Shruti Palaskar, Jindrich Libovick\u1ef3, Spandana Gella, and Florian Metze. 2019. Multimodal abstractive summarization for how2 videos. arXiv preprint (2019)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58595-2_39"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2642918.2647400"},{"key":"e_1_3_2_1_34_1","volume-title":"Proc. ICML. 28492\u201328518","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust speech recognition via large-scale weak supervision. In Proc. ICML. 28492\u201328518."},{"key":"e_1_3_2_1_35_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_1_36_1","volume-title":"Language models are unsupervised multitask learners. OpenAI blog 1, 8","author":"Radford Alec","year":"2019","unstructured":"Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, Ilya Sutskever, 2019. Language models are unsupervised multitask learners. OpenAI blog 1, 8 (2019), 9."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-30157-5_76"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_39_1","volume-title":"ChatGPT: A comprehensive review on background, applications, key challenges, bias, ethics, limitations and future scope. Internet of Things and Cyber-Physical Systems","author":"Ray Partha\u00a0Pratim","year":"2023","unstructured":"Partha\u00a0Pratim Ray. 2023. ChatGPT: A comprehensive review on background, applications, key challenges, bias, ethics, limitations and future scope. Internet of Things and Cyber-Physical Systems (2023)."},{"key":"e_1_3_2_1_40_1","volume-title":"Proc. NeurIPS, C.\u00a0Cortes, N.\u00a0Lawrence, D.\u00a0Lee, M.\u00a0Sugiyama, and R.\u00a0Garnett (Eds.). Vol.\u00a028","author":"Ren Shaoqing","year":"2015","unstructured":"Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. In Proc. NeurIPS, C.\u00a0Cortes, N.\u00a0Lawrence, D.\u00a0Lee, M.\u00a0Sugiyama, and R.\u00a0Garnett (Eds.). Vol.\u00a028."},{"key":"e_1_3_2_1_41_1","volume-title":"A neural attention model for abstractive sentence summarization. arXiv preprint","author":"Rush M","year":"2015","unstructured":"Alexander\u00a0M Rush, Sumit Chopra, and Jason Weston. 2015. A neural attention model for abstractive sentence summarization. arXiv preprint (2015)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10758-020-09455-5"},{"key":"e_1_3_2_1_43_1","volume-title":"Proc. ICASSP, Vol.\u00a02. 1331\u20131334","author":"Scheirer E.","unstructured":"E. Scheirer and M. Slaney. 1997. Construction and evaluation of a robust multifeature speech\/music discriminator. In Proc. ICASSP, Vol.\u00a02. 1331\u20131334."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475321"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2007.4376991"},{"key":"e_1_3_2_1_47_1","volume-title":"Proc. CVPR. 5179\u20135187","author":"Song Yale","year":"2015","unstructured":"Yale Song, Jordi Vallmitjana, Amanda Stent, and Alejandro Jaimes. 2015. Tvsum: Summarizing web videos using titles. In Proc. CVPR. 5179\u20135187."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/2207676.2208622"},{"key":"e_1_3_2_1_49_1","first-page":"1","article-title":"How Khan Academy is changing the rules of education","volume":"126","author":"Thompson Clive","year":"2011","unstructured":"Clive Thompson. 2011. How Khan Academy is changing the rules of education. Wired Magazine 126 (2011), 1\u20135.","journal-title":"Wired Magazine"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/1198302.1198305"},{"key":"e_1_3_2_1_51_1","volume-title":"Proc. SSW. 125","author":"van\u00a0den Oord Aaron","year":"2016","unstructured":"Aaron van\u00a0den Oord, Sander Dieleman, Heiga Zen, Karen Simonyan, Oriol Vinyals, Alex Graves, Nal Kalchbrenner, Andrew Senior, and Koray Kavukcuoglu. 2016. WaveNet: A Generative Model for Raw Audio. In Proc. SSW. 125."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806367"},{"key":"e_1_3_2_1_53_1","volume-title":"Proc. NeurIPS, Vol.\u00a030","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141\u00a0ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Proc. NeurIPS, Vol.\u00a030."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.aacl-demo.6"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.112"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.5555\/1140892.1709370"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46478-7_47"},{"key":"e_1_3_2_1_58_1","volume-title":"Audiovisual video summarization","author":"Zhao Bin","year":"2021","unstructured":"Bin Zhao, Maoguo Gong, and Xuelong Li. 2021. Audiovisual video summarization. IEEE Transactions on Neural Networks and Learning Systems (2021)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123328"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12255"}],"event":{"name":"AHs 2024: The Augmented Humans International Conference","location":"Melbourne VIC Australia","acronym":"AHs 2024"},"container-title":["Proceedings of the Augmented Humans International Conference 2024"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652920.3652922","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3652920.3652922","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:33:50Z","timestamp":1755909230000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652920.3652922"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,4]]},"references-count":60,"alternative-id":["10.1145\/3652920.3652922","10.1145\/3652920"],"URL":"https:\/\/doi.org\/10.1145\/3652920.3652922","relation":{},"subject":[],"published":{"date-parts":[[2024,4,4]]},"assertion":[{"value":"2024-05-01","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}