{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T02:16:42Z","timestamp":1768357002327,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":38,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819556786","type":"print"},{"value":"9789819556793","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5679-3_33","type":"book-chapter","created":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T18:37:04Z","timestamp":1768329424000},"page":"475-488","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["BTCD: Enabling Balanced Toxic Content Detection by\u00a0Collaborating VLMs and\u00a0CNNs"],"prefix":"10.1007","author":[{"given":"Yuantao","family":"Jia","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Feng","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bin","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haonan","family":"Yan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xing","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhangyu","family":"Gu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaopeng","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chaohao","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,14]]},"reference":[{"key":"33_CR1","doi-asserted-by":"publisher","unstructured":"Ahmed, S.H., Hu, S., Sukthankar, G.: The potential of vision-language models for content moderation of children\u2019s videos. In: 2023 International Conference on Machine Learning and Applications (ICMLA), pp. 1237\u20131241 (2023). https:\/\/doi.org\/10.1109\/ICMLA58977.2023.00186","DOI":"10.1109\/ICMLA58977.2023.00186"},{"key":"33_CR2","unstructured":"Ayadi, Y.: Automatic moderation of visual content: integration of emotions and semantic analysis through deep learning for the safety of young users. J. Ambient Intell. Hum. Comput., 1\u201314 (2025)"},{"key":"33_CR3","unstructured":"Bai, S., et\u00a0al.: Qwen2.5-Vl technical report. arXiv preprint arXiv:2502.13923 (2025)"},{"key":"33_CR4","doi-asserted-by":"publisher","unstructured":"Cheng, M., Cai, K., Li, M.: RWF-2000: an open large scale video database for violence detection. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 4183\u20134190 (2021). https:\/\/doi.org\/10.1109\/ICPR48806.2021.9412502","DOI":"10.1109\/ICPR48806.2021.9412502"},{"key":"33_CR5","unstructured":"Dehghani, M., Yazdanparast, Z.: Political sentiment analysis of Persian tweets using CNN-LSTM model. arXiv preprint arXiv:2307.07740 (2023)"},{"issue":"2","key":"33_CR6","doi-asserted-by":"publisher","first-page":"611","DOI":"10.1007\/s10044-019-00821-3","volume":"23","author":"I Febin","year":"2020","unstructured":"Febin, I., Jayasree, K., Joy, P.T.: Violence detection in videos for an intelligent surveillance system using MoBSIFT and movement filtering algorithm. Pattern Anal. Appl. 23(2), 611\u2013623 (2020)","journal-title":"Pattern Anal. Appl."},{"key":"33_CR7","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1016\/j.neucom.2021.02.056","volume":"445","author":"A Gangwar","year":"2021","unstructured":"Gangwar, A., Gonz\u00e1lez-Castro, V., Alegre, E., Fidalgo, E.: AttM-CNN: attention and metric learning based CNN for pornography, age and child sexual abuse (CSA) detection in images. Neurocomputing 445, 81\u2013104 (2021)","journal-title":"Neurocomputing"},{"key":"33_CR8","unstructured":"Guo, C., Pleiss, G., Sun, Y., Weinberger, K.Q.: On calibration of modern neural networks. In: International Conference on Machine Learning, pp. 1321\u20131330. PMLR (2017)"},{"key":"33_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"33_CR10","doi-asserted-by":"crossref","unstructured":"Howard, A., et\u00a0al.: Searching for MobileNetV3. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1314\u20131324 (2019)","DOI":"10.1109\/ICCV.2019.00140"},{"key":"33_CR11","doi-asserted-by":"crossref","unstructured":"Jha, P., Jain, R., Mandal, K., Chadha, A., Saha, S., Bhattacharyya, P.: MemeGuard: an LLM and VLM-based framework for advancing content moderation via meme intervention. arXiv preprint arXiv:2406.05344 (2024)","DOI":"10.18653\/v1\/2024.acl-long.439"},{"key":"33_CR12","doi-asserted-by":"crossref","unstructured":"Karmanov, A., Guan, D., Lu, S., El\u00a0Saddik, A., Xing, E.: Efficient test-time adaptation of vision-language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14162\u201314171 (2024)","DOI":"10.1109\/CVPR52733.2024.01343"},{"key":"33_CR13","unstructured":"Kiela, D., et al.: The hateful memes challenge: detecting hate speech in multimodal memes. In: Advances in Neural Information Processing Systems, vol. 33, pp. 2611\u20132624 (2020)"},{"key":"33_CR14","doi-asserted-by":"crossref","unstructured":"Kumar, G.K., Nandakumar, K.: Hate-CLIPper: multimodal hateful meme classification based on cross-modal interaction of clip features. arXiv preprint arXiv:2210.05916 (2022)","DOI":"10.18653\/v1\/2022.nlp4pi-1.20"},{"key":"33_CR15","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"33_CR16","unstructured":"Lu, H., et\u00a0al.: DeepSeek-VL: towards real-world vision-language understanding. arXiv preprint arXiv:2403.05525 (2024)"},{"key":"33_CR17","doi-asserted-by":"publisher","unstructured":"Matan, P., Velvizhy, P.: A comprehensive review of supervised fine-tuning for large language models in creative applications and content moderation. In: 2025 International Conference on Inventive Computation Technologies (ICICT), pp. 1294\u20131299 (2025). https:\/\/doi.org\/10.1109\/ICICT64420.2025.11005111","DOI":"10.1109\/ICICT64420.2025.11005111"},{"key":"33_CR18","doi-asserted-by":"publisher","unstructured":"Pandey, A., Moharana, S., Mohanty, D.P., Panwar, A., Agarwal, D., Thota, S.P.: On-device content moderation. In: 2021 International Joint Conference on Neural Networks (IJCNN), pp.\u00a01\u20137 (2021). https:\/\/doi.org\/10.1109\/IJCNN52387.2021.9534227","DOI":"10.1109\/IJCNN52387.2021.9534227"},{"key":"33_CR19","unstructured":"Patel, M.: Real-time violence detection using CNN-LSTM. arXiv preprint arXiv:2107.07578 (2021)"},{"key":"33_CR20","doi-asserted-by":"publisher","unstructured":"Pawar, V., Gawande, M., Kollu, A., Bile, A.S.: Exploring the potential of prompt engineering: a comprehensive analysis of interacting with large language models. In: 2024 8th International Conference on Computing, Communication, Control and Automation (ICCUBEA), pp.\u00a01\u20139 (2024). https:\/\/doi.org\/10.1109\/ICCUBEA61740.2024.10775016","DOI":"10.1109\/ICCUBEA61740.2024.10775016"},{"key":"33_CR21","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"33_CR22","unstructured":"Reddy, B., Kim, Y., Yun, S., Seo, C., Jang, J.: Real-time weapon detection using YOLOv4. In: International Conference on Pattern Recognition and Machine Intelligence, pp. 228\u2013237. Springer (2020)"},{"key":"33_CR23","unstructured":"Redmon, J., Farhadi, A.: YOLOv3: an incremental improvement. arXiv preprint arXiv:1804.02767 (2018)"},{"key":"33_CR24","doi-asserted-by":"publisher","first-page":"129471","DOI":"10.1109\/ACCESS.2021.3112806","volume":"9","author":"H Saleh","year":"2021","unstructured":"Saleh, H., Alharbi, A., Alsamhi, S.H.: OPCNN-FAKE: optimized convolutional neural network for fake news detection. IEEE Access 9, 129471\u2013129489 (2021)","journal-title":"IEEE Access"},{"key":"33_CR25","doi-asserted-by":"publisher","unstructured":"Singh, L., Singh, G., Agnihotri, N., Dhaliwal, B.: Firearm classification based on CNN, vision transformer and Swin transformer models. In: 2024 International Conference on Communication, Computer Sciences and Engineering (IC3SE), pp.\u00a01\u20135 (2024). https:\/\/doi.org\/10.1109\/IC3SE62002.2024.10592921","DOI":"10.1109\/IC3SE62002.2024.10592921"},{"key":"33_CR26","unstructured":"Tan, M., Le, Q.: EfficientNet: rethinking model scaling for convolutional neural networks. In: Chaudhuri, K., Salakhutdinov, R. (eds.) Proceedings of the 36th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a097, pp. 6105\u20136114. PMLR, 09\u201315 June 2019"},{"issue":"3","key":"33_CR27","doi-asserted-by":"publisher","first-page":"52","DOI":"10.5815\/ijcnis.2024.03.05","volume":"16","author":"I Tereikovskyi","year":"2024","unstructured":"Tereikovskyi, I., et al.: Method for constructing neural network means for recognizing scenes of political extremism in graphic materials of online social networks. Int. J. Comput. Netw. Inf. Secur. (IJCNIS) 16(3), 52\u201369 (2024). https:\/\/doi.org\/10.5815\/ijcnis.2024.03.05","journal-title":"Int. J. Comput. Netw. Inf. Secur. (IJCNIS)"},{"key":"33_CR28","doi-asserted-by":"crossref","unstructured":"Wang, B., Wang, S., Li, C., Guan, R., Li, X.: Harmfully manipulated images matter in multimodal misinformation detection. In: Proceedings of the 32nd ACM International Conference on Multimedia, pp. 2262\u20132271 (2024)","DOI":"10.1145\/3664647.3681322"},{"key":"33_CR29","doi-asserted-by":"crossref","unstructured":"Wang, Y., Li, W.: Pornographic image recognition based on high and low level feature fusion with human body masking and attention. In: Proceedings of the 2022 3rd International Conference on Control, Robotics and Intelligent System, pp. 199\u2013204 (2022)","DOI":"10.1145\/3562007.3562046"},{"key":"33_CR30","unstructured":"Wang, Y., et\u00a0al.: Vision-language models for content moderation: challenges and opportunities. arXiv preprint arXiv:2303.08987 (2023)"},{"key":"33_CR31","unstructured":"Wei, H., Xie, R., Cheng, H., Feng, L., An, B., Li, Y.: Mitigating neural network overconfidence with logit normalization, pp. 23631\u201323644 (2022)"},{"key":"33_CR32","doi-asserted-by":"crossref","unstructured":"Won, D., Steinert-Threlkeld, Z.C., Joo, J.: Protest activity detection and perceived violence estimation from social media images. In: Proceedings of the 25th ACM International Conference on Multimedia, pp. 786\u2013794. ACM (2017)","DOI":"10.1145\/3123266.3123282"},{"key":"33_CR33","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.Y., Kweon, I.S.: CBAM: convolutional block attention module. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 3\u201319 (2018)","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"33_CR34","unstructured":"Wu, J., et al.: Visual prompting in multimodal large language models: a survey. arXiv preprint arXiv:2409.15310 (2024)"},{"key":"33_CR35","unstructured":"Yang, Z., et al.: The dawn of LMMs: preliminary explorations with GPT-4V(ision). arXiv preprint arXiv:2309.17421 (2023)"},{"key":"33_CR36","doi-asserted-by":"publisher","unstructured":"Zhang, W., Li, Z., Sun, J.: CMFusion: channel and modal fusion for multimodal hate video detection. In: Proceedings of the ACM International Conference on Multimedia (ACM MM) (2025). https:\/\/doi.org\/10.1145\/3704424.3704435","DOI":"10.1145\/3704424.3704435"},{"key":"33_CR37","doi-asserted-by":"publisher","unstructured":"Zhou, K., Yang, J., Loy, C.C., et\u00a0al.: Learning to prompt for vision-language models. Int. J. Comput. Vis. 130, 2337\u20132348 (2022). https:\/\/doi.org\/10.1007\/s11263-022-01653-1","DOI":"10.1007\/s11263-022-01653-1"},{"key":"33_CR38","doi-asserted-by":"publisher","unstructured":"Zhu, K., et\u00a0al.: PromptRobust: towards evaluating the robustness of large language models on adversarial prompts. arXiv preprint arXiv:2306.04528 (2023). https:\/\/doi.org\/10.48550\/ARXIV.2306.04528","DOI":"10.48550\/ARXIV.2306.04528"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5679-3_33","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T18:37:07Z","timestamp":1768329427000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5679-3_33"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819556786","9789819556793"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5679-3_33","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"14 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}