{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T09:53:36Z","timestamp":1761126816690,"version":"3.37.3"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,3,30]],"date-time":"2024-03-30T00:00:00Z","timestamp":1711756800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,30]],"date-time":"2024-03-30T00:00:00Z","timestamp":1711756800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100004663","name":"Ministry of Science and Technology, Taiwan","doi-asserted-by":"publisher","award":["107-2410-H-006 040-MY3"],"award-info":[{"award-number":["107-2410-H-006 040-MY3"]}],"id":[{"id":"10.13039\/501100004663","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1007\/s00530-024-01301-3","type":"journal-article","created":{"date-parts":[[2024,3,30]],"date-time":"2024-03-30T16:02:01Z","timestamp":1711814521000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Personalized time-sync comment generation based on a multimodal transformer"],"prefix":"10.1007","volume":"30","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5790-7506","authenticated-orcid":false,"given":"Hei-Chia","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8437-2992","authenticated-orcid":false,"given":"Martinus","family":"Maslim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei-Ting","family":"Hong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,3,30]]},"reference":[{"issue":"2","key":"1301_CR1","doi-asserted-by":"publisher","first-page":"970","DOI":"10.3390\/s23020970","volume":"23","author":"MU Alam","year":"2023","unstructured":"Alam, M.U., Rahmani, R.: FedSepsis: a federated multi-modal deep learning-based Internet of medical things application for early detection of sepsis from electronic health records using raspberry Pi and Jetson nano devices. Sensors 23(2), 970 (2023)","journal-title":"Sensors"},{"key":"1301_CR2","doi-asserted-by":"publisher","DOI":"10.1177\/21582440211040804","author":"R Allam","year":"2021","unstructured":"Allam, R., Dinana, H.: The future of TV and online video platforms: a study on predictors of use and interaction with content in the Egyptian evolving telecomm. Media Entertain. Ind. (2021). https:\/\/doi.org\/10.1177\/21582440211040804","journal-title":"Media Entertain. Ind."},{"key":"1301_CR3","doi-asserted-by":"publisher","first-page":"228","DOI":"10.1016\/j.neucom.2021.04.118","volume":"454","author":"Q Bai","year":"2021","unstructured":"Bai, Q., Wu, Y., Zhou, J., He, L.: Aligned variational autoencoder for matching Danmaku and video storylines. Neurocomputing 454, 228\u2013237 (2021). https:\/\/doi.org\/10.1016\/j.neucom.2021.04.118","journal-title":"Neurocomputing"},{"issue":"2","key":"1301_CR4","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1109\/TPAMI.2018.2798607","volume":"41","author":"T Baltru\u0161aitis","year":"2018","unstructured":"Baltru\u0161aitis, T., Ahuja, C., Morency, L.P.: Multimodal machine learning: a survey and taxonomy. IEEE Trans. Pattern Anal. Mach. Intell. 41(2), 423\u2013443 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1301_CR5","unstructured":"Bayoudh, K., Knani, R., Hamdaoui, F., Mtibaa, A.: A survey on deep multimodal learning for computer vision: advances, trends, applications, and datasets, springer the visual computer June (10), 603\u2013616 (2021)"},{"key":"1301_CR6","doi-asserted-by":"crossref","unstructured":"Chen, J., Wu, W., Hu, W., & He, L. (2020). TSCREC: time-sync comment recommendation in Danmu-enabled videos. In: Paper presented at the 2020 IEEE 32nd International Conference on Tools with Artificial Intelligence (ICTAI), Baltimore, MD, USA","DOI":"10.1109\/ICTAI50040.2020.00021"},{"key":"1301_CR7","doi-asserted-by":"crossref","unstructured":"Chen, X., Zhang, Y., Ai, Q., Xu, H., Yan, J., Qin, Z.: Personalized key frame recommendation. In: Paper presented at the 40th International ACM SIGIR Conference on Research and Development in Information (2017)","DOI":"10.1145\/3077136.3080776"},{"issue":"7","key":"1301_CR8","doi-asserted-by":"publisher","first-page":"5105","DOI":"10.1007\/s00500-020-05513-3","volume":"25","author":"X Chi","year":"2021","unstructured":"Chi, X., Fan, Z.-P., Wang, X.: Pricing mode selection for the online short video platform. Soft. Comput. 25(7), 5105\u20135120 (2021). https:\/\/doi.org\/10.1007\/s00500-020-05513-3","journal-title":"Soft. Comput."},{"key":"1301_CR9","unstructured":"CNNIC.: The 46th China statistical report on the Internet development (In Chinese) (2020). http:\/\/www.cnnic.cn\/gywm\/xwzx\/rdxw\/202009\/W020200929343125745019.pdf. Accessed 4 July 2022"},{"key":"1301_CR10","unstructured":"Duan, C., Cui, L., Ma, S., Wei, F., Zhu, C., Zhao, T. (2020). Multimodal matching transformer for live commenting. In: Paper Presented at the European Conference on Artificial Intelligence, Santiago de Compostela, Spain"},{"key":"1301_CR11","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1016\/j.neucom.2022.09.136","volume":"515","author":"X Han","year":"2023","unstructured":"Han, X., Wang, Y.T., Feng, J.L., Deng, C., Chen, Z.H., Huang, Y.A., Hu, P.W.: A survey of transformer-based multimodal pre-trained modals. Neurocomputing 515, 89\u2013106 (2023)","journal-title":"Neurocomputing"},{"key":"1301_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J. (2016). Deep residual learning for image recognition. In: Paper Presented at the Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, Las Vegas, NV, USA","DOI":"10.1109\/CVPR.2016.90"},{"key":"1301_CR13","doi-asserted-by":"crossref","unstructured":"Hu, R., Singh, A.: UniT: multimodal multitask learning with a unified transformer. Faceb. AI Res. (2021)","DOI":"10.1109\/ICCV48922.2021.00147"},{"key":"1301_CR14","doi-asserted-by":"crossref","unstructured":"Jiang, R., Qu, C., Wang, J., Wang, C., Zheng, Y.: Towards extracting highlights from recorded live videos: an implicit crowdsourcing approach. In: Paper Presented at the 2020 IEEE 36th International Conference on Data Engineering (ICDE), Dallas, TX, USA (2020)","DOI":"10.1109\/ICDE48307.2020.00176"},{"key":"1301_CR15","unstructured":"Kim, W., Son, B., Kim, I.: Vilt: vision-and-language transformer without convolution or region supervision. In: Paper Presented at the The Thirty-Eighth International Conference on Machine Learning, Virtual Conference (2021)"},{"key":"1301_CR16","doi-asserted-by":"publisher","first-page":"256","DOI":"10.1016\/j.patrec.2020.05.004","volume":"135","author":"Z Liao","year":"2020","unstructured":"Liao, Z., Xian, Y., Li, J., Zhang, C., Zhao, S.: Time-sync comments denoising via graph convolutional and contextual encoding. Pattern Recogn. Lett. 135, 256\u2013263 (2020). https:\/\/doi.org\/10.1016\/j.patrec.2020.05.004","journal-title":"Pattern Recogn. Lett."},{"key":"1301_CR17","doi-asserted-by":"crossref","unstructured":"Liao, Z., Xian, Y., Yang, X., Zhao, Q., Zhang, C., Li, J.: TSCSet: a crowdsourced time-sync comment dataset for exploration of user experience improvement. In: Paper Presented at the 23rd International Conference on Intelligent User Interfaces, Tokyo, Japan (2018)","DOI":"10.1145\/3172944.3172966"},{"key":"1301_CR18","doi-asserted-by":"crossref","unstructured":"Ma, S., Cui, L., Dai, D., Wei, F., Sun, X.: Livebot: generating live video comments based on visual and textual contexts. In: Paper Presented at the Thirty-Third AAAI Conference on Artificial Intelligence, Hilton Hawaiian Village, Honolulu, Hawaii, USA (2019)","DOI":"10.1609\/aaai.v33i01.33016810"},{"key":"1301_CR19","doi-asserted-by":"crossref","unstructured":"Manzoor, M.A., Albarri, S., Xian, Z., Meng, Z., Nakov, P., Liang, S.: Multimodality representation learning: a survey on evolution, pretraining and its applications (2023). arXiv:2302.00389","DOI":"10.1145\/3617833"},{"issue":"45\u201346","key":"1301_CR20","doi-asserted-by":"publisher","first-page":"33449","DOI":"10.1007\/s11042-019-7578-4","volume":"79","author":"Z Pan","year":"2019","unstructured":"Pan, Z., Li, X., Cui, L., Zhang, Z.: Video clip recommendation model by sentiment analysis of time-sync comments. Multim. Tools Appl. 79(45\u201346), 33449\u201333466 (2019). https:\/\/doi.org\/10.1007\/s11042-019-7578-4","journal-title":"Multim. Tools Appl."},{"key":"1301_CR21","doi-asserted-by":"crossref","unstructured":"Ping, Q.: Video recommendation using crowdsourced time-sync comments. In: Paper Presented at the 12th ACM Conference on Recommender Systems (2018)","DOI":"10.1145\/3240323.3240329"},{"key":"1301_CR22","doi-asserted-by":"publisher","first-page":"28750","DOI":"10.1109\/access.2022.3157712","volume":"10","author":"Q Qi","year":"2022","unstructured":"Qi, Q., Lin, L., Zhang, R., Xue, C.: MEDT: using multimodal encoding-decoding network as in transformer for multimodal sentiment analysis. IEEE Access 10, 28750\u201328759 (2022). https:\/\/doi.org\/10.1109\/access.2022.3157712","journal-title":"IEEE Access"},{"key":"1301_CR23","unstructured":"Research, i.: China short video market research report (in Chinese) (2017). https:\/\/www.iimedia.cn\/c400\/56105.htm. Accessed 4 July 2022"},{"issue":"1\u20134","key":"1301_CR24","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1177\/20570473211048029","volume":"6","author":"F Schneider","year":"2021","unstructured":"Schneider, F.: China\u2019s viral villages: digital nationalism and the COVID-19 crisis on online video-sharing platform Bilibili. Commun. Public 6(1\u20134), 48\u201366 (2021). https:\/\/doi.org\/10.1177\/20570473211048029","journal-title":"Commun. Public"},{"key":"1301_CR25","unstructured":"Statista.: Online Video & Entertainment. In (2020)"},{"key":"1301_CR26","doi-asserted-by":"publisher","first-page":"1377","DOI":"10.1109\/lsp.2022.3181849","volume":"29","author":"Y Teng","year":"2022","unstructured":"Teng, Y., Song, C., Wu, B.: Learning social relationship from videos via pre-trained multimodal transformer. IEEE Signal Process. Lett. 29, 1377\u20131381 (2022). https:\/\/doi.org\/10.1109\/lsp.2022.3181849","journal-title":"IEEE Signal Process. Lett."},{"key":"1301_CR27","unstructured":"TwitchTracker. Twitch statistics & charts. In (2018)"},{"key":"1301_CR28","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Polosukhin, I.: Attention is all you need. In: Paper Presented at the Advances in Neural Information Processing Systems, Long Beach, CA, USA (2017)"},{"key":"1301_CR29","unstructured":"Wallach, O.: Which streaming service has the most subscriptions? (2021). https:\/\/www.visualcapitalist.com\/which-streaming-service-has-the-most-subscriptions\/. Accessed 4 July 2022"},{"key":"1301_CR30","doi-asserted-by":"publisher","first-page":"27630","DOI":"10.1109\/access.2022.3157716","volume":"10","author":"M Wang","year":"2022","unstructured":"Wang, M., Tang, X., Chen, F., Lu, Q.: Encrypted live streaming channel identification with time-sync comments. IEEE Access 10, 27630\u201327642 (2022). https:\/\/doi.org\/10.1109\/access.2022.3157716","journal-title":"IEEE Access"},{"key":"1301_CR31","doi-asserted-by":"crossref","unstructured":"Wang, W., Chen, J., Jin, Q.: VideoIC: a video interactive comments dataset and multimodal multitask learning for comments generation. In: Paper Presented at the 28th ACM International Conference on Multimedia, New York, NY, United States (2020)","DOI":"10.1145\/3394171.3413890"},{"key":"1301_CR32","unstructured":"Wikipedia, T.F.E.: Online video platform (2021a). https:\/\/en.wikipedia.org\/wiki\/Online_video_platform. Accessed 5 July 2022"},{"key":"1301_CR33","unstructured":"Wikipedia, T.F.E.: Streamimg media (2021b). https:\/\/en.wikipedia.org\/wiki\/Streaming_media. Accessed 5 July 2022"},{"key":"1301_CR34","unstructured":"Wikipedia, T.F.E.: \u5f71\u7247\u5206\u4eab\u7db2\u7ad9 (2021c). https:\/\/zh.wikipedia.org\/wiki\/%E5%BD%B1%E7%89%87%E5%88%86%E4%BA%AB%E7%B6%B2%E7%AB%99. Accessed 5 July 2022"},{"issue":"6","key":"1301_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2021.102687","volume":"58","author":"D Xi","year":"2021","unstructured":"Xi, D., Xu, W., Chen, R., Zhou, Y., Yang, Z.: Sending or not? A multimodal framework for Danmaku comment prediction. Inf. Process. Manag. 58(6), 102687 (2021)","journal-title":"Inf. Process. Manag."},{"key":"1301_CR36","doi-asserted-by":"crossref","unstructured":"Xu, L., Zhang, C.: Bridging video content and comments: synchronized video description with temporal summarization of crowdsourced time-sync comments. In: Paper Presented at the Thirty-First AAAI Conference on Artificial Intelligence (2017)","DOI":"10.1609\/aaai.v31i1.10753"},{"issue":"4","key":"1301_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3332932","volume":"13","author":"W Yang","year":"2019","unstructured":"Yang, W., Wang, K., Ruan, N., Gao, W., Jia, W., Zhao, W., Zhang, Y.: Time-sync video tag extraction using semantic association graph. ACM Trans. Knowl. Discov. Data 13(4), 1\u201324 (2019). https:\/\/doi.org\/10.1145\/3332932","journal-title":"ACM Trans. Knowl. Discov. Data"},{"key":"1301_CR38","unstructured":"Zhang, T., Kishore, V., Wu, F., Weinberger, K.Q., Artzi, Y.: Bertscore: evaluating text generation with bert (2019). arXiv:1904.09675"},{"issue":"8","key":"1301_CR39","doi-asserted-by":"publisher","first-page":"1429","DOI":"10.1080\/00343404.2021.1902493","volume":"55","author":"X Zhang","year":"2021","unstructured":"Zhang, X., Sun, C., Mei, L.: Agglomerative patterns and cooperative networks of the online video industry in China. Reg. Stud. 55(8), 1429\u20131441 (2021). https:\/\/doi.org\/10.1080\/00343404.2021.1902493","journal-title":"Reg. Stud."},{"key":"1301_CR40","doi-asserted-by":"publisher","first-page":"360","DOI":"10.1016\/j.neucom.2021.10.039","volume":"468","author":"B Zhao","year":"2022","unstructured":"Zhao, B., Gong, M., Li, X.: Hierarchical multimodal transformer to summarize videos. Neurocomputing 468, 360\u2013369 (2022). https:\/\/doi.org\/10.1016\/j.neucom.2021.10.039","journal-title":"Neurocomputing"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01301-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-024-01301-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01301-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,12]],"date-time":"2024-04-12T13:16:29Z","timestamp":1712927789000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-024-01301-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,30]]},"references-count":40,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,4]]}},"alternative-id":["1301"],"URL":"https:\/\/doi.org\/10.1007\/s00530-024-01301-3","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"type":"print","value":"0942-4962"},{"type":"electronic","value":"1432-1882"}],"subject":[],"published":{"date-parts":[[2024,3,30]]},"assertion":[{"value":"24 November 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 February 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 March 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors of this study declare no conflicts of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"105"}}