{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T22:01:46Z","timestamp":1743026506261,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031204999"},{"type":"electronic","value":"9783031205002"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-20500-2_13","type":"book-chapter","created":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T05:12:32Z","timestamp":1672549952000},"page":"159-170","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Following the\u00a0Lecturer: Hierarchical Knowledge Concepts Prediction for\u00a0Educational Videos"],"prefix":"10.1007","author":[{"given":"Xin","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qi","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weidong","family":"He","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tong","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ye","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,1,1]]},"reference":[{"key":"13_CR1","doi-asserted-by":"crossref","unstructured":"Aly, R., Remus, S., Biemann, C.: Hierarchical multi-label classification of text with capsule networks. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop, pp. 323\u2013330 (2019)","DOI":"10.18653\/v1\/P19-2045"},{"key":"13_CR2","doi-asserted-by":"crossref","unstructured":"Arandjelovic, R., Gronat, P., Torii, A., Pajdla, T., Sivic, J.: Netvlad: CNN architecture for weakly supervised place recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5297\u20135307 (2016)","DOI":"10.1109\/CVPR.2016.572"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Arnab, A., Dehghani, M., Heigold, G., Sun, C., Lu\u010di\u0107, M., Schmid, C.: Vivit: a video vision transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6836\u20136846 (2021)","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"13_CR4","unstructured":"Bertasius, G., Wang, H., Torresani, L.: Is space-time attention all you need for video understanding? In: ICML, vol. 2, p. 4 (2021)"},{"key":"13_CR5","doi-asserted-by":"crossref","unstructured":"Bhardwaj, S., Srinivasan, M., Khapra, M.M.: Efficient video classification using fewer frames. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 354\u2013363 (2019)","DOI":"10.1109\/CVPR.2019.00044"},{"issue":"7","key":"13_CR6","doi-asserted-by":"publisher","first-page":"495","DOI":"10.1631\/jzus.CIDE1303","volume":"14","author":"J Cao","year":"2013","unstructured":"Cao, J., Mao, D.H., Cai, Q., Li, H.S., Du, J.P.: A review of object representation based on local features. J. Zhejiang Univ. Sci. C 14(7), 495\u2013504 (2013). https:\/\/doi.org\/10.1631\/jzus.CIDE1303","journal-title":"J. Zhejiang Univ. Sci. C"},{"key":"13_CR7","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? a new model and the kinetics dataset. In: proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"13_CR8","doi-asserted-by":"crossref","unstructured":"Donahue, J., et al.: Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2625\u20132634 (2015)","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"13_CR9","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: Slowfast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6202\u20136211 (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"issue":"4","key":"13_CR10","doi-asserted-by":"publisher","first-page":"138","DOI":"10.3390\/a10040138","volume":"10","author":"S Feng","year":"2017","unstructured":"Feng, S., Fu, P., Zheng, W.: A hierarchical multi-label classification algorithm for gene function prediction. Algorithms 10(4), 138 (2017)","journal-title":"Algorithms"},{"key":"13_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1007\/978-3-030-58548-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"V Gabeur","year":"2020","unstructured":"Gabeur, V., Sun, C., Alahari, K., Schmid, C.: Multi-modal transformer for video retrieval. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12349, pp. 214\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58548-8_13"},{"key":"13_CR12","doi-asserted-by":"crossref","unstructured":"Guo, P.J., Kim, J., Rubin, R.: How video production affects student engagement: an empirical study of MOOC videos. In: Proceedings of the first ACM Conference on Learning@ Scale Conference, pp. 41\u201350 (2014)","DOI":"10.1145\/2556325.2566239"},{"key":"13_CR13","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/j.imavis.2017.01.010","volume":"60","author":"S Herath","year":"2017","unstructured":"Herath, S., Harandi, M., Porikli, F.: Going deeper into action recognition: a survey. Image Vis. Comput. 60, 4\u201321 (2017)","journal-title":"Image Vis. Comput."},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"Huang, W., et al.: Hierarchical multi-label text classification: an attention-based recurrent network approach. In: Proceedings of the 28th ACM International Conference on Information and Knowledge Management, pp. 1051\u20131060 (2019)","DOI":"10.1145\/3357384.3357885"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Liang, M., Cao, X., Du, J., et al.: Dual-pathway attention based supervised adversarial hashing for cross-modal retrieval. In: 2021 IEEE International Conference on Big Data and Smart Computing (BigComp), pp. 168\u2013171. IEEE (2021)","DOI":"10.1109\/BigComp51126.2021.00040"},{"key":"13_CR16","first-page":"14200","volume":"34","author":"A Nagrani","year":"2021","unstructured":"Nagrani, A., Yang, S., Arnab, A., Jansen, A., Schmid, C., Sun, C.: Attention bottlenecks for multimodal fusion. Adv. Neural. Inf. Process. Syst. 34, 14200\u201314213 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"13_CR17","doi-asserted-by":"crossref","unstructured":"Neimark, D., Bar, O., Zohar, M., Asselmann, D.: Video transformer network. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3163\u20133172 (2021)","DOI":"10.1109\/ICCVW54120.2021.00355"},{"key":"13_CR18","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1016\/j.imavis.2016.06.002","volume":"58","author":"JJ Seo","year":"2017","unstructured":"Seo, J.J., Kim, H.I., De Neve, W., Ro, Y.M.: Effective and efficient human action recognition using dynamic frame skipping and trajectory rejection. Image Vis. Comput. 58, 76\u201385 (2017)","journal-title":"Image Vis. Comput."},{"key":"13_CR19","doi-asserted-by":"crossref","unstructured":"Shang, X., Yuan, Z., Wang, A., Wang, C.: Multimodal video summarization via time-aware transformers. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 1756\u20131765 (2021)","DOI":"10.1145\/3474085.3475321"},{"key":"13_CR20","doi-asserted-by":"crossref","unstructured":"Shen, J., Qiu, W., Meng, Y., Shang, J., Ren, X., Han, J.: Taxoclass: hierarchical multi-label text classification using only class names. In: Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 4239\u20134249 (2021)","DOI":"10.18653\/v1\/2021.naacl-main.335"},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"Shimura, K., Li, J., Fukumoto, F.: HFT-CNN: learning hierarchical category structure for multi-label short text categorization. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 811\u2013816 (2018)","DOI":"10.18653\/v1\/D18-1093"},{"key":"13_CR22","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: Advances in Neural Information Processing Systems, vol. 27 (2014)"},{"key":"13_CR23","doi-asserted-by":"crossref","unstructured":"Sun, C., Myers, A., Vondrick, C., Murphy, K., Schmid, C.: Videobert: a joint model for video and language representation learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7464\u20137473 (2019)","DOI":"10.1109\/ICCV.2019.00756"},{"issue":"1","key":"13_CR24","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1007\/s11063-016-9526-x","volume":"45","author":"Z Sun","year":"2017","unstructured":"Sun, Z., Zhao, Y., Cao, D., Hao, H.: Hierarchical multilabel classification with optimal path prediction. Neural Process. Lett. 45(1), 263\u2013277 (2017). https:\/\/doi.org\/10.1007\/s11063-016-9526-x","journal-title":"Neural Process. Lett."},{"key":"13_CR25","unstructured":"Targ, S., Almeida, D., Lyman, K.: Resnet in resnet: generalizing residual architectures. arXiv preprint. arXiv:1603.08029 (2016)"},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"Tran, D., Wang, H., Torresani, L., Ray, J., LeCun, Y., Paluri, M.: A closer look at spatiotemporal convolutions for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6450\u20136459 (2018)","DOI":"10.1109\/CVPR.2018.00675"},{"key":"13_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/978-3-319-46484-8_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"L Wang","year":"2016","unstructured":"Wang, L., et al.: Temporal segment networks: towards good practices for deep action recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 20\u201336. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_2"},{"key":"13_CR28","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: Fine-grained similarity measurement between educational videos and exercises. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 331\u2013339 (2020)","DOI":"10.1145\/3394171.3413783"},{"key":"13_CR29","doi-asserted-by":"crossref","unstructured":"Wehrmann, J., Cerri, R., Barros, R.: Hierarchical multi-label classification networks. In: International Conference on Machine Learning, pp. 5075\u20135084. PMLR (2018)","DOI":"10.1145\/3019612.3019664"},{"key":"13_CR30","doi-asserted-by":"crossref","unstructured":"Wu, C.Y., Feichtenhofer, C., Fan, H., He, K., Krahenbuhl, P., Girshick, R.: Long-term feature banks for detailed video understanding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 284\u2013293 (2019)","DOI":"10.1109\/CVPR.2019.00037"},{"key":"13_CR31","doi-asserted-by":"crossref","unstructured":"Wu, C.Y., Krahenbuhl, P.: Towards long-form video understanding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1884\u20131894 (2021)","DOI":"10.1109\/CVPR46437.2021.00192"},{"key":"13_CR32","unstructured":"Zhang, S., Guo, S., Huang, W., Scott, M.R., Wang, L.: V4d: 4d convolutional neural networks for video-level representation learning. arXiv preprint. arXiv:2002.07442 (2020)"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-20500-2_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T05:31:36Z","timestamp":1672551096000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-20500-2_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031204999","9783031205002"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-20500-2_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"1 January 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CICAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CAAI International Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Beijing","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 August 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cicai2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/cicai.caai.cn\/#\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"472","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"164","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"35% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.1","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}