{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T18:58:10Z","timestamp":1772823490571,"version":"3.50.1"},"reference-count":60,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T00:00:00Z","timestamp":1740700800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T00:00:00Z","timestamp":1740700800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1007\/s11760-025-03951-w","type":"journal-article","created":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T10:45:26Z","timestamp":1740739526000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["The future of action recognition: are multi-modal visual language models the key?"],"prefix":"10.1007","volume":"19","author":[{"given":"Enes","family":"G\u00fcm\u00fc\u015fkaynak","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"S\u00fcleyman","family":"Eken","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,2,28]]},"reference":[{"key":"3951_CR1","first-page":"24206","volume":"34","author":"H Akbari","year":"2021","unstructured":"Akbari, H., Yuan, L., Qian, R., Chuang, W.-H., Chang, S.-F., Cui, Y., Gong, B.: Vatt: transformers for multimodal self-supervised learning from raw video, audio and text. Adv. Neural. Inf. Process. Syst. 34, 24206\u201324221 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3951_CR2","doi-asserted-by":"crossref","unstructured":"Alayrac, J.-B., Bojanowski, P., Agrawal, N., Sivic, J., Laptev, I., Lacoste-Julien, S.: Unsupervised learning from narrated instruction videos. In: Proceedings of the ieee conference on computer vision and pattern recognition, pp. 4575\u20134583 (2016)","DOI":"10.1109\/CVPR.2016.495"},{"key":"3951_CR3","doi-asserted-by":"crossref","unstructured":"Arnab, A., Dehghani, M., Heigold, G., Sun, C., Lucic, M., Schmid, C.: Vivit: A video vision transformer. Iccv, pp. 6816\u20136826 (2021)","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"3951_CR4","unstructured":"Baevski, A., Hsu, W.-N., Xu, Q., Babu, A., Gu, J., Auli, M.: Data2vec: A general framework for self-supervised learning in speech, vision and language. In: International conference on machine learning, pp. 1298\u20131312 (2022)"},{"key":"3951_CR5","unstructured":"Bertasius, G., Wang, H., Torresani, L.: Is space-time attention all you need for video understanding? Icml, pp. 813\u2013824 (2021)"},{"key":"3951_CR6","unstructured":"Beyer, L., Steiner, A., Pinto, A.S., Kolesnikov, A., Wang, X., Salz, D., ... others: Paligemma: A versatile 3b vlm for transfer. arXiv preprint arXiv:2407.07726 (2024)"},{"key":"3951_CR7","unstructured":"Carreira, J., Koppula, S., Zoran, D., Recasens, A., Ionescu, C., Henaff, O., .. others: Hip: Hierarchical perceiver. arXiv preprint arXiv:2202.10890 (2022)"},{"issue":"2","key":"3951_CR8","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3440755","volume":"54","author":"D Chandrasekaran","year":"2021","unstructured":"Chandrasekaran, D., Mago, V.: Evolution of semantic similarity\u2014a survey. ACM Comput. Surv. (CSUR) 54(2), 1\u201337 (2021)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"3951_CR9","doi-asserted-by":"crossref","unstructured":"Chen, M., Peng, H., Fu, J., Ling, H.: Autoformer: Searching transformers for visual recognition. In: Proceedings of the ieee\/cvf international conference on computer vision (iccv), p.12270-12280 (2021)","DOI":"10.1109\/ICCV48922.2021.01205"},{"key":"3951_CR10","doi-asserted-by":"crossref","unstructured":"Chen, Z., Wang, W., Tian, H., Ye, S., Gao, Z., Cui, E., ... others: How far are we to gpt-4v? closing the gap to commercial multimodal models with open-source suites. arXiv preprint arXiv:2404.16821 (2024)","DOI":"10.1007\/s11432-024-4231-5"},{"key":"3951_CR11","doi-asserted-by":"crossref","unstructured":"Choi, J.-H., Lee, J.-S.: Embracenet for activity: A deep multimodal fusion architecture for activity recognition. In: Adjunct proceedings of the 2019 acm international joint conference on pervasive and ubiquitous computing and proceedings of the 2019 acm international symposium on wearable computers, pp. 693\u2013698 (2019)","DOI":"10.1145\/3341162.3344871"},{"key":"3951_CR12","doi-asserted-by":"crossref","unstructured":"Dalal, N., Triggs, B.: Histograms of oriented gradients for human detection. In: 2005 ieee computer society conference on computer vision and pattern recognition (cvpr\u201905), Vol.\u00a01, p.886-893 vol. 1 (2005)","DOI":"10.1109\/CVPR.2005.177"},{"key":"3951_CR13","doi-asserted-by":"crossref","unstructured":"Donahue, J., Anne\u00a0Hendricks, L., Guadarrama, S., Rohrbach, M., Venugopalan, S., Saenko, K., Darrell, T.: Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the ieee conference on computer vision and pattern recognition, pp. 2625\u20132634 (2015)","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"3951_CR14","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., ... others: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"3951_CR15","unstructured":"Dosovitskiy, A., Springenberg, J.T., Riedmiller, M., Brox, T.: Discriminative unsupervised feature learning with convolutional neural networks. Advances in neural information processing systems, 27 (2014)"},{"key":"3951_CR16","unstructured":"Feichtenhofer, C., Fan, H., Li, Y., He, K.: Masked autoencoders as spatiotemporal learners. Neurips (2022)"},{"key":"3951_CR17","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: Slowfast networks for video recognition. Iccv, pp. 6201\u20136210 (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"key":"3951_CR18","doi-asserted-by":"crossref","unstructured":"Girdhar, R., Singh, M., Ravi, N., Van Der\u00a0Maaten, L., Joulin, A., Misra, I.: Omnivore: A single model for many visual modalities. In: Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp. 16102\u201316112 (2022)","DOI":"10.1109\/CVPR52688.2022.01563"},{"issue":"8","key":"3951_CR19","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"issue":"1","key":"3951_CR20","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1016\/0004-3702(81)90024-2","volume":"17","author":"BK Horn","year":"1981","unstructured":"Horn, B.K., Schunck, B.G.: Determining optical flow. Artif. Intell. 17(1), 185\u2013203 (1981). https:\/\/doi.org\/10.1016\/0004-3702(81)90024-2","journal-title":"Artif. Intell."},{"key":"3951_CR21","doi-asserted-by":"crossref","unstructured":"Hu, F., Chen, A., Wang, Z., Zhou, F., Dong, J., Li, X.: Lightweight attentional feature fusion: In: A new baseline for text-to-video retrieval. European conference on computer vision, pp. 444\u2013461 (2022)","DOI":"10.1007\/978-3-031-19781-9_26"},{"key":"3951_CR22","unstructured":"Jaegle, A., Gimeno, F., Brock, A., Vinyals, O., Zisserman, A., Carreira, J.: Perceiver: General perception with iterative attention. In: International conference on machine learning, pp. 4651\u20134664 (2021)"},{"key":"3951_CR23","unstructured":"Kay, W., Carreira, J., Simonyan, K., Zhang, B., Hillier, C., Vijayanarasimhan, S., ... others: The kinetics human action video dataset. arXiv preprint arXiv:1705.06950 (2017)"},{"key":"3951_CR24","unstructured":"Kiros, R., Salakhutdinov, R., Zemel, R.: Unifying visual-semantic embeddings with multimodal neural language models. In: 31st International Conference on Machine Learning, ICML 2014, 3 (2014)"},{"issue":"5","key":"3951_CR25","doi-asserted-by":"crossref","first-page":"1366","DOI":"10.1007\/s11263-022-01594-9","volume":"130","author":"Y Kong","year":"2022","unstructured":"Kong, Y., Fu, Y.: Human action recognition and prediction: a survey. Int. J. Comput. Vision 130(5), 1366\u20131401 (2022)","journal-title":"Int. J. Comput. Vision"},{"key":"3951_CR26","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1007\/s11263-005-1838-7","volume":"64","author":"I Laptev","year":"2005","unstructured":"Laptev, I.: On space-time interest points. Int. J. Comput. Vision 64, 107\u2013123 (2005)","journal-title":"Int. J. Comput. Vision"},{"key":"3951_CR27","unstructured":"Li, C., Yang, J., Zhang, P., Gao, M., Xiao, B., Dai, X., Gao, J.: Efficient self-supervised vision transformers for representation learning. In: International conference on learning representations (2022)"},{"issue":"10","key":"3951_CR28","doi-asserted-by":"crossref","first-page":"12581","DOI":"10.1109\/TPAMI.2023.3282631","volume":"45","author":"K Li","year":"2023","unstructured":"Li, K., Wang, Y., Zhang, J., Gao, P., Song, G., Liu, Y., Qiao, Y.: Uniformer: Unifying convolution and self-attention for visual recognition. IEEE Trans. Pattern Anal. Mach. Intell. 45(10), 12581\u201312600 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3951_CR29","doi-asserted-by":"crossref","unstructured":"Li, Y., Ji, B., Shi, X., Zhang, J., Kang, B., Wang, L.: Tea: Temporal excitation and aggregation for action recognition. In: Proceedings of the ieee\/cvf conference on computer vision and pattern recognition (cvpr) (2020)","DOI":"10.1109\/CVPR42600.2020.00099"},{"key":"3951_CR30","doi-asserted-by":"crossref","unstructured":"Li, Y., Wu, C.-Y., Fan, H., Mangalam, K., Xiong, B., Malik, J., Feichtenhofer, C.: Mvitv2: Improved multiscale vision transformers for classification and detection. Cvpr, pp. 4794\u20134804 (2022)","DOI":"10.1109\/CVPR52688.2022.00476"},{"key":"3951_CR31","doi-asserted-by":"crossref","unstructured":"Liu, Z., Luo, D., Wang, Y., Wang, L., Tai, Y., Wang, C., Lu, T.: Teinet: Towards an efficient architecture for video recognition. In: Proceedings of the aaai conference on artificial intelligence 34, 11669\u201311676 (2020)","DOI":"10.1609\/aaai.v34i07.6836"},{"key":"3951_CR32","doi-asserted-by":"crossref","unstructured":"Liu, Z., Ning, J., Cao, Y., Wei, Y., Zhang, Z., Lin, S., Hu, H.: Video swin transformer. Cvpr, pp. 3192\u20133201 (2022)","DOI":"10.1109\/CVPR52688.2022.00320"},{"issue":"2","key":"3951_CR33","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Low","year":"2004","unstructured":"Low, D.G.: Distinctive image features from scale-invariant keypoints. J. Comput. Vision 60(2), 91\u2013110 (2004)","journal-title":"J. Comput. Vision"},{"key":"3951_CR34","doi-asserted-by":"crossref","unstructured":"Mahajan, D., Girshick, R., Ramanathan, V., He, K., Paluri, M., Li, Y., Van Der\u00a0Maaten, L.: Exploring the limits of weakly supervised pretraining. In: Proceedings of the European conference on computer vision (eccv), pp. 181\u2013196 (2018)","DOI":"10.1007\/978-3-030-01216-8_12"},{"key":"3951_CR35","unstructured":"Name, A.: Moondream2 (2024) https:\/\/huggingface.co\/vikhyatk\/moondream2"},{"issue":"5","key":"3951_CR36","doi-asserted-by":"crossref","first-page":"1591","DOI":"10.1007\/s00778-024-00864-x","volume":"33","author":"JJ Pan","year":"2024","unstructured":"Pan, J.J., Wang, J., Li, G.: Survey of vector database management systems. VLDB J. 33(5), 1591\u20131615 (2024)","journal-title":"VLDB J."},{"key":"3951_CR37","unstructured":"Patrick, M., Campbell, D., Asano, Y., Misra, I., Metze, F., Feichtenhofer, C., Henriques, J.a.F.: Keeping your eye on the ball: Trajectory attention in video transformers. M.\u00a0Ranzato, A.\u00a0Beygelzimer, Y.\u00a0Dauphin, P.\u00a0Liang, and J.W.\u00a0Vaughan (Eds.), Advances in neural information processing systems (Vol.\u00a034, pp. 12493\u201312506). Curran Associates, Inc (2021)"},{"key":"3951_CR38","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., ... others: Learning transferable visual models from natural language supervision. In: International conference on machine learning, p. 8748\u20138763 (2021)"},{"key":"3951_CR39","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. Advances in neural information processing systems, 27 (2014)"},{"key":"3951_CR40","doi-asserted-by":"crossref","unstructured":"Sivic, Zisserman.: Video google: a text retrieval approach to object matching in videos. In: Proceedings ninth ieee international conference on computer vision, vol.2, p.1470-1477 (2003)","DOI":"10.1109\/ICCV.2003.1238663"},{"key":"3951_CR41","unstructured":"Soomro, K., Zamir, A.R., Shah, M.: Ucf101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)"},{"key":"3951_CR42","unstructured":"Tan, H., Lei, J., Wolf, T., Bansal, M.: Vimpac: Video pre-training via masked token prediction and contrastive learning. arXiv preprint arXiv:2106.11250 (2021)"},{"issue":"3","key":"3951_CR43","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3571735","volume":"19","author":"H Tang","year":"2023","unstructured":"Tang, H., Ding, L., Wu, S., Ren, B., Sebe, N., Rota, P.: Deep unsupervised key frame extraction for efficient video classification. ACM Trans. Multimed. Comput. Commun. Appl. 19(3), 1\u201317 (2023)","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"3951_CR44","unstructured":"Team, C.: Chameleon: Mixed-modal early-fusion foundation models. arXiv preprint arXiv:2405.09818 (2024)"},{"key":"3951_CR45","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3d convolutional networks. In: Proceedings of the ieee international conference on computer vision, pp. 4489\u20134497 (2015)","DOI":"10.1109\/ICCV.2015.510"},{"key":"3951_CR46","unstructured":"Tran, D., Ray, J., Shou, Z., Chang, S.-F., Paluri, M.: Convnet architecture search for spatiotemporal feature learning. arXiv preprint arXiv:1708.05038 (2017)"},{"key":"3951_CR47","unstructured":"Vaswani, A.: Attention is all you need. arXiv preprint arXiv:1706.03762 (2017)"},{"key":"3951_CR48","first-page":"3169","volume":"2011","author":"H Wang","year":"2011","unstructured":"Wang, H., Kl\u00e4ser, A., Schmid, C., Liu, C.-L.: Action recognition by dense trajectories. Cvpr 2011, 3169\u20133176 (2011)","journal-title":"Cvpr"},{"key":"3951_CR49","doi-asserted-by":"crossref","unstructured":"Wang, H., Schmid, C.: Action recognition with improved trajectories. In: 2013 ieee international conference on computer vision, p.3551-3558 (2013)","DOI":"10.1109\/ICCV.2013.441"},{"key":"3951_CR50","doi-asserted-by":"crossref","unstructured":"Wang, L., Huang, B., Zhao, Z., Tong, Z., He, Y., Wang, Y.. Qiao, Y.: Videomae v2: Scaling video masked autoencoders with dual masking. In: 2023 ieee\/cvf conference on computer vision and pattern recognition (cvpr), p.14549-14560 (2023)","DOI":"10.1109\/CVPR52729.2023.01398"},{"key":"3951_CR51","doi-asserted-by":"crossref","unstructured":"Wang, L., Tong, Z., Ji, B., Wu, G.: Tdn: temporal difference networks for efficient action recognition. Cvpr, pp. 1895\u20131904 (2021)","DOI":"10.1109\/CVPR46437.2021.00193"},{"key":"3951_CR52","doi-asserted-by":"crossref","unstructured":"Wang, R., Chen, D., Wu, Z., Chen, Y., Dai, X., Liu, M.. Yuan, L.: Bevt: Bert pretraining of video transformers. In: Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp. 14733\u201314743 (2022)","DOI":"10.1109\/CVPR52688.2022.01432"},{"key":"3951_CR53","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R.B., Gupta, A., He, K.: Non-local neural networks. Cvpr, pp. 7794\u20137803 (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"3951_CR54","doi-asserted-by":"crossref","unstructured":"Wei, C., Fan, H., Xie, S., Wu, C.-Y., Yuille, A.L., Feichtenhofer, C.: Masked feature prediction for self-supervised visual pre-training. Cvpr, pp. 14648\u201314658 (2022)","DOI":"10.1109\/CVPR52688.2022.01426"},{"key":"3951_CR55","doi-asserted-by":"crossref","unstructured":"Xiao, B., Wu, H., Xu, W., Dai, X., Hu, H., Lu, Y., Yuan, L.: Florence-2: Advancing a unified representation for a variety of vision tasks. In: Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp. 4818\u20134829 (2024)","DOI":"10.1109\/CVPR52733.2024.00461"},{"issue":"10","key":"3951_CR56","doi-asserted-by":"crossref","first-page":"12113","DOI":"10.1109\/TPAMI.2023.3275156","volume":"45","author":"P Xu","year":"2023","unstructured":"Xu, P., Zhu, X., Clifton, D.A.: Multimodal learning with transformers: a survey. IEEE Trans. Pattern Anal. Mach. Intell. 45(10), 12113\u201312132 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3951_CR57","doi-asserted-by":"crossref","unstructured":"Yan, S., Xiong, X., Arnab, A., Lu, Z., Zhang, M., Sun, C., Schmid, C.: Multiview transformers for video recognition. Cvpr, pp. 3323\u20133333 (2022)","DOI":"10.1109\/CVPR52688.2022.00333"},{"key":"3951_CR58","doi-asserted-by":"crossref","unstructured":"Yang, A., Nagrani, A., Seo, P.H., Miech, A., Pont-Tuset, J., Laptev, I., Schmid, C.: Vid2seq: Large-scale pretraining of a visual language model for dense video captioning. In: Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp. 10714\u201310726 (2023)","DOI":"10.1109\/CVPR52729.2023.01032"},{"key":"3951_CR59","doi-asserted-by":"crossref","unstructured":"Yang, J., Li, B., Zeng, A., Zhang, L., Zhang, R.: Open-world human-object interaction detection via multi-modal prompts. In: Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp. 16954\u201316964 (2024)","DOI":"10.1109\/CVPR52733.2024.01604"},{"issue":"8","key":"3951_CR60","doi-asserted-by":"publisher","first-page":"5625","DOI":"10.1109\/TPAMI.2024.3369699","volume":"46","author":"J Zhang","year":"2024","unstructured":"Zhang, J., Huang, J., Jin, S., Lu, S.: Vision-language models for vision tasks: a survey. IEEE Trans. Pattern Anal. Mach. Intell. 46(8), 5625\u20135644 (2024). https:\/\/doi.org\/10.1109\/TPAMI.2024.3369699","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-03951-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-025-03951-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-03951-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,2]],"date-time":"2025-04-02T03:22:36Z","timestamp":1743564156000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-025-03951-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,28]]},"references-count":60,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["3951"],"URL":"https:\/\/doi.org\/10.1007\/s11760-025-03951-w","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,28]]},"assertion":[{"value":"3 September 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 February 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 February 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 February 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The author declares that he has no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"Not applicable","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to Participate"}},{"value":"Not applicable","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to Publish"}}],"article-number":"345"}}