{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T15:52:14Z","timestamp":1769183534938,"version":"3.49.0"},"publisher-location":"Cham","reference-count":37,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031815416","type":"print"},{"value":"9783031815423","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-81542-3_15","type":"book-chapter","created":{"date-parts":[[2025,1,31]],"date-time":"2025-01-31T14:06:06Z","timestamp":1738332366000},"page":"184-198","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Enhancing Question Answering in\u00a0Lecture Videos with\u00a0a\u00a0Multimodal Retrieval-Augmented Generation Framework"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-3258-5385","authenticated-orcid":false,"given":"Thomas","family":"Tanner","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-3433-578X","authenticated-orcid":false,"given":"Andreas","family":"Marfurt","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6043-1860","authenticated-orcid":false,"given":"Hasan","family":"O\u01e7ul","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,2,1]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","unstructured":"Akiba, T., Sano, S., Yanase, T., Ohta, T., Koyama, M.: Optuna: a next-generation hyperparameter optimization framework. In: The 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, pp. 2623\u20132631 (2019)","DOI":"10.1145\/3292500.3330701"},{"key":"15_CR2","doi-asserted-by":"publisher","unstructured":"Ali, A., Renals, S.: Word error rate estimation for speech recognition: e-WER. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pp. 20\u201324. Association for Computational Linguistics, Melbourne, Australia (2018). https:\/\/doi.org\/10.18653\/v1\/P18-2004. http:\/\/aclweb.org\/anthology\/P18-2004","DOI":"10.18653\/v1\/P18-2004"},{"key":"15_CR3","doi-asserted-by":"publisher","unstructured":"Asai, A., Wu, Z., Wang, Y., Sil, A., Hajishirzi, H.: Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection (2023). https:\/\/doi.org\/10.48550\/arXiv.2310.11511","DOI":"10.48550\/arXiv.2310.11511"},{"key":"15_CR4","doi-asserted-by":"publisher","unstructured":"Chand, D., O\u01e7ul, H.: A framework for lecture video segmentation from extracted speech content. In: 2021 IEEE 19th World Symposium on Applied Machine Intelligence and Informatics (SAMI), pp. 000299\u2013000304 (2021). https:\/\/doi.org\/10.1109\/SAMI50585.2021.9378632. https:\/\/ieeexplore.ieee.org\/document\/9378632","DOI":"10.1109\/SAMI50585.2021.9378632"},{"key":"15_CR5","unstructured":"Chroma: chroma-core\/chroma (2024). https:\/\/github.com\/chroma-core\/chroma"},{"key":"15_CR6","unstructured":"Chroma: Python Collection $$|$$ Chroma Docs (2024). https:\/\/docs.trychroma.com\/reference\/py-collection"},{"key":"15_CR7","unstructured":"Class Central: By The Numbers: MOOCs in 2021 (2021). https:\/\/www.classcentral.com\/report\/mooc-stats-2021\/"},{"key":"15_CR8","unstructured":"Class Central: About (2023). https:\/\/www.classcentral.com\/about"},{"key":"15_CR9","doi-asserted-by":"publisher","unstructured":"Colas, A., Kim, S., Dernoncourt, F., Gupte, S., Wang, D.Z., Kim, D.S.: TutorialVQA: question answering dataset for tutorial videos (2020). https:\/\/doi.org\/10.48550\/arXiv.1912.01046, version: 2","DOI":"10.48550\/arXiv.1912.01046"},{"key":"15_CR10","unstructured":"Confident AI: confident-ai\/deepeval (2024). https:\/\/github.com\/confident-ai\/deepeval"},{"key":"15_CR11","doi-asserted-by":"publisher","unstructured":"Es, S., James, J., Espinosa-Anke, L., Schockaert, S.: RAGAS: Automated Evaluation of Retrieval Augmented Generation (2023). https:\/\/doi.org\/10.48550\/arXiv.2309.15217","DOI":"10.48550\/arXiv.2309.15217"},{"key":"15_CR12","unstructured":"Exploding Gradients: Ragas Answer Correctness Metric (2024). https:\/\/docs.ragas.io\/en\/latest\/concepts\/metrics\/answer_correctness.html"},{"key":"15_CR13","unstructured":"Gerganov, G.: ggerganov\/llama.cpp (2024). https:\/\/github.com\/ggerganov\/llama.cpp"},{"key":"15_CR14","unstructured":"Harvard: CS50\u2019s Introduction to Artificial Intelligence with Python (2024). https:\/\/cs50.harvard.edu\/ai\/2023\/"},{"key":"15_CR15","unstructured":"HuggingFace: mixedbread-ai\/mxbai-rerank-large-v1 $$\\cdot $$ Hugging Face (2024). https:\/\/huggingface.co\/mixedbread-ai\/mxbai-rerank-large-v1"},{"key":"15_CR16","unstructured":"HuggingFace: sentence-transformers\/all-MiniLM-L6-v2 $$\\cdot $$ Hugging Face (2024). https:\/\/huggingface.co\/sentence-transformers\/all-MiniLM-L6-v2"},{"key":"15_CR17","unstructured":"JaidedAI: EasyOCR (2020). https:\/\/github.com\/JaidedAI\/EasyOCR"},{"key":"15_CR18","doi-asserted-by":"publisher","unstructured":"Khurana, K., Deshpande, U.: Video question-answering techniques, benchmark datasets and evaluation metrics leveraging video captioning: a comprehensive survey. IEEE Access 9, 43799\u201343823 (2021). https:\/\/doi.org\/10.1109\/ACCESS.2021.3058248. https:\/\/ieeexplore.ieee.org\/document\/9350580","DOI":"10.1109\/ACCESS.2021.3058248"},{"key":"15_CR19","doi-asserted-by":"publisher","unstructured":"Ko, D., Lee, J.S., Kang, W., Roh, B., Kim, H.J.: Large Language Models are Temporal and Causal Reasoners for Video Question Answering (2023). https:\/\/doi.org\/10.48550\/arXiv.2310.15747","DOI":"10.48550\/arXiv.2310.15747"},{"key":"15_CR20","doi-asserted-by":"publisher","unstructured":"Kumbham, S., Debnath, A., Rao, K.S.: Efficient Indexing of Meta-Data (Extracted from Educational Videos) (2023). https:\/\/doi.org\/10.48550\/arXiv.2401.01356, [cs]","DOI":"10.48550\/arXiv.2401.01356"},{"key":"15_CR21","doi-asserted-by":"publisher","unstructured":"Lewis, P., et al.: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks (2021). https:\/\/doi.org\/10.48550\/arXiv.2005.11401, version: 4","DOI":"10.48550\/arXiv.2005.11401"},{"key":"15_CR22","unstructured":"MIT: Introduction to Psychology $$|$$ Brain and Cognitive Sciences (2024). https:\/\/ocw.mit.edu\/courses\/9-00sc-introduction-to-psychology-fall-2011\/"},{"key":"15_CR23","unstructured":"MIT: Introduction to Special Relativity $$|$$ Physics (2024). https:\/\/ocw.mit.edu\/courses\/8-20-introduction-to-special-relativity-january-iap-2021\/"},{"key":"15_CR24","doi-asserted-by":"publisher","unstructured":"Muennighoff, N., Tazi, N., Magne, L., Reimers, N.: MTEB: Massive Text Embedding Benchmark (2023). https:\/\/doi.org\/10.48550\/arXiv.2210.07316","DOI":"10.48550\/arXiv.2210.07316"},{"key":"15_CR25","doi-asserted-by":"publisher","unstructured":"Muszynska, E.: Semantic chunking. University of Cambridge Repository (2020). https:\/\/doi.org\/10.17863\/CAM.59299. https:\/\/www.repository.cam.ac.uk\/handle\/1810\/312207","DOI":"10.17863\/CAM.59299"},{"key":"15_CR26","doi-asserted-by":"publisher","unstructured":"Nogueira, R., Cho, K.: Passage Re-ranking with BERT (2020). https:\/\/doi.org\/10.48550\/arXiv.1901.04085","DOI":"10.48550\/arXiv.1901.04085"},{"key":"15_CR27","doi-asserted-by":"publisher","unstructured":"Nussbaum, Z., Morris, J.X., Duderstadt, B., Mulyar, A.: Nomic Embed: Training a Reproducible Long Context Text Embedder (2024). https:\/\/doi.org\/10.48550\/arXiv.2402.01613","DOI":"10.48550\/arXiv.2402.01613"},{"key":"15_CR28","unstructured":"OpenAI: openai\/whisper (2024). https:\/\/github.com\/openai\/whisper"},{"key":"15_CR29","unstructured":"PDM: pdm-project\/pdm (2024). https:\/\/github.com\/pdm-project\/pdm"},{"key":"15_CR30","doi-asserted-by":"publisher","unstructured":"Repp, S., Linckels, S., Meinel, C.: Question answering from lecture videos based on an automatic semantic annotation. In: Proceedings of the 13th Annual Conference on Innovation and Technology in Computer Science Education, ITiCSE 2008, pp. 17\u201321. Association for Computing Machinery, New York (2008). https:\/\/doi.org\/10.1145\/1384271.1384278","DOI":"10.1145\/1384271.1384278"},{"key":"15_CR31","doi-asserted-by":"publisher","unstructured":"Sercan\u00a0A\u01e7z\u0131ya\u01e7l\u0131, V., O\u01e7ul, H.: Multi-level lecture video classification using text content. In: 2020 IEEE 14th International Conference on Application of Information and Communication Technologies (AICT), pp.\u00a01\u20135 (2020). https:\/\/doi.org\/10.1109\/AICT50176.2020.9368692. https:\/\/ieeexplore.ieee.org\/document\/9368692. ISSN 2472-8586","DOI":"10.1109\/AICT50176.2020.9368692"},{"key":"15_CR32","doi-asserted-by":"publisher","unstructured":"Smith, R.: An overview of the tesseract OCR engine. In: Ninth International Conference on Document Analysis and Recognition (ICDAR 2007), vol.\u00a02, pp. 629\u2013633 (2007). https:\/\/doi.org\/10.1109\/ICDAR.2007.4376991. https:\/\/ieeexplore.ieee.org\/document\/4376991. ISSN 2379-2140","DOI":"10.1109\/ICDAR.2007.4376991"},{"key":"15_CR33","unstructured":"SQLite: sqlite\/sqlite (2024). https:\/\/github.com\/sqlite\/sqlite"},{"key":"15_CR34","doi-asserted-by":"publisher","unstructured":"Sreepathy, G.: Automated analysis and indexing of lecture videos. In: Iowa State University (2020). https:\/\/doi.org\/10.31274\/etd-20210114-142. https:\/\/lib.dr.iastate.edu\/etd\/18407","DOI":"10.31274\/etd-20210114-142"},{"key":"15_CR35","unstructured":"Wikipedia: Massive open online course (2023). https:\/\/en.wikipedia.org\/w\/index.php?title=Massive_open_online_course&oldid=1171690342, page Version ID: 1171690342"},{"key":"15_CR36","doi-asserted-by":"publisher","unstructured":"Yan, S.Q., Gu, J.C., Zhu, Y., Ling, Z.H.: Corrective Retrieval Augmented Generation (2024). https:\/\/doi.org\/10.48550\/arXiv.2401.15884, version: 1","DOI":"10.48550\/arXiv.2401.15884"},{"key":"15_CR37","doi-asserted-by":"publisher","unstructured":"Yang, H., Meinel, C.: Content based lecture video retrieval using speech and video text information. IEEE Trans. Learn. Technol. 7(2), 142\u2013154 (2014). https:\/\/doi.org\/10.1109\/TLT.2014.2307305. https:\/\/ieeexplore.ieee.org\/document\/6750040","DOI":"10.1109\/TLT.2014.2307305"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence: Methodology, Systems, and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-81542-3_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,31]],"date-time":"2025-01-31T14:06:17Z","timestamp":1738332377000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-81542-3_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031815416","9783031815423"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-81542-3_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"1 February 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"AIMSA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Intelligence: Methodology, Systems, and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Varna","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bulgaria","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aimsa2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.aimsaconference.org\/main.php","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}