{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T14:49:42Z","timestamp":1776782982391,"version":"3.51.2"},"reference-count":58,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T00:00:00Z","timestamp":1694822400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T00:00:00Z","timestamp":1694822400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-16727-6","type":"journal-article","created":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T04:01:32Z","timestamp":1694836892000},"page":"31317-31340","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["An attention mechanism-based CNN-BiLSTM classification model for detection of inappropriate content in cartoon videos"],"prefix":"10.1007","volume":"83","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2198-6932","authenticated-orcid":false,"given":"Kanwal","family":"Yousaf","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tabassam","family":"Nawaz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,9,16]]},"reference":[{"key":"16727_CR1","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1145\/2959100.2959190","volume-title":"Proceedings of the 10th ACM conference on recommender systems","author":"P Covington","year":"2016","unstructured":"Covington P, Adams J, Sargin E (2016) Deep neural networks for youtube recommendations. In: Proceedings of the 10th ACM conference on recommender systems. ACM, New York, NY, USA, pp 191\u2013198. https:\/\/doi.org\/10.1145\/2959100.2959190"},{"key":"16727_CR2","doi-asserted-by":"publisher","first-page":"101648","DOI":"10.1016\/j.techsoc.2021.101648","volume":"66","author":"R Lozano-Blasco","year":"2021","unstructured":"Lozano-Blasco R, Quilez-Robres A, Delgado-Bujedo D, Latorre-Mart\u00ednez MP (2021) YouTube's growth in use among children 0\u20135 during COVID19: the occidental European case. Technol Soc 66:101648. https:\/\/doi.org\/10.1016\/j.techsoc.2021.101648","journal-title":"Technol Soc"},{"key":"16727_CR3","unstructured":"Maheshwari S (2017) On YouTube kids, startling videos slip past filters. The New York Times https:\/\/www.nytimes.com\/2017\/11\/04\/business\/media\/youtube-kids-paw-patrol.html. Accessed November 23, 2021"},{"key":"16727_CR4","doi-asserted-by":"publisher","first-page":"501","DOI":"10.1007\/978-3-030-03398-9_43","volume-title":"Chinese conference on pattern recognition and computer vision (PRCV)","author":"C Hou","year":"2018","unstructured":"Hou C, Wu X, Wang G (2018) End-to-end bloody video recognition by audio-visual feature fusion. In: Chinese conference on pattern recognition and computer vision (PRCV). Springer, Cham, pp 501\u2013510. https:\/\/doi.org\/10.1007\/978-3-030-03398-9_43"},{"key":"16727_CR5","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1007\/978-3-319-72550-5_22","volume-title":"International conference on soft computing and data mining","author":"A Ali","year":"2018","unstructured":"Ali A, Senan N (2018) Violence video classification performance using deep neural networks. In: International conference on soft computing and data mining. Springer, Cham, pp 225\u2013233. https:\/\/doi.org\/10.1007\/978-3-319-72550-5_22"},{"key":"16727_CR6","doi-asserted-by":"publisher","first-page":"301022","DOI":"10.1016\/j.fsidi.2020.301022","volume":"34","author":"H-E Lee","year":"2020","unstructured":"Lee H-E, Ermakova T, Ververis V, Fabian B (2020) Detecting child sexual abuse material: a comprehensive survey. Forensic Sci Int Digit Inv 34:301022. https:\/\/doi.org\/10.1016\/j.fsidi.2020.301022","journal-title":"Forensic Sci Int Digit Inv"},{"key":"16727_CR7","doi-asserted-by":"publisher","unstructured":"Papadamou K, Papasavva A, Zannettou S, Blackburn J, Kourtellis N, Leontiadis I, Stringhini G, Sirivianos M (2020) Disturbed YouTube for kids: characterizing and detecting inappropriate videos targeting young children. In: proceedings of the international AAAI conference on web and social media. Pp 522-533. https:\/\/doi.org\/10.48550\/arXiv.1901.07046","DOI":"10.48550\/arXiv.1901.07046"},{"key":"16727_CR8","first-page":"8","volume":"10","author":"H Wilson","year":"2020","unstructured":"Wilson H (2020) Youtube is unsafe for children: Youtube's safeguards and the current legal framework are inadequate to protect children from disturbing content. Seattle J Technol Environ Innov Law 10:8 https:\/\/digitalcommons.law.seattleu.edu\/sjteil\/vol10\/iss1\/8","journal-title":"Seattle J Technol Environ Innov Law"},{"key":"16727_CR9","doi-asserted-by":"publisher","first-page":"508","DOI":"10.1145\/3442442.3452314","volume-title":"Companion proceedings of the web conference 2021","author":"S Alshamrani","year":"2021","unstructured":"Alshamrani S, Abusnaina A, Abuhamad M, Nyang D, Mohaisen D (2021) Hate, obscenity, and insults: measuring the exposure of children to inappropriate comments in YouTube. In: Companion proceedings of the web conference 2021. ACM, New York, NY, USA, pp 508\u2013515. https:\/\/doi.org\/10.1145\/3442442.3452314"},{"key":"16727_CR10","doi-asserted-by":"publisher","unstructured":"Elias N, Sulkin I (2017) YouTube viewers in diapers: an exploration of factors associated with amount of toddlers\u2019 online viewing. Cyberpsychology: J Psychosoc Res Cyberspace 11. https:\/\/doi.org\/10.5817\/cp2017-3-2","DOI":"10.5817\/cp2017-3-2"},{"key":"16727_CR11","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1177\/1329878X17693700","volume":"163","author":"D Craig","year":"2017","unstructured":"Craig D, Cunningham S (2017) Toy unboxing: living in a (n unregulated) material world. Media Int Aust 163:77\u201386. https:\/\/doi.org\/10.1177\/1329878X17693700","journal-title":"Media Int Aust"},{"key":"16727_CR12","unstructured":"Brandom R (2017) Inside elsagate, the conspiracy fueled war on creepy youtube kids videos. The Verge. www.theverge.com\/2017\/12\/8\/16751206\/elsagate-youtube-kids-creepy-conspiracy-theory. Accessed September 10, 2021"},{"key":"16727_CR13","unstructured":"Reddit (2017) What is ElsaGate? https:\/\/www.reddit.com\/r\/ElsaGate\/comments\/6o6baf\/. Accessed September 10, 2021"},{"key":"16727_CR14","doi-asserted-by":"publisher","first-page":"464","DOI":"10.1145\/3341161.3342913","volume-title":"Proceedings of the 2019 IEEE\/ACM international conference on advances in social networks analysis and mining","author":"R Tahir","year":"2019","unstructured":"Tahir R, Ahmed F, Saeed H, Ali S, Zaffar F, Wilson C (2019) Bringing the kid back into youtube kids: detecting inappropriate content on video streaming platforms. In: Proceedings of the 2019 IEEE\/ACM international conference on advances in social networks analysis and mining. ACM, New York, NY, USA, pp 464\u2013469. https:\/\/doi.org\/10.1145\/3341161.3342913"},{"key":"16727_CR15","doi-asserted-by":"publisher","first-page":"1725","DOI":"10.1109\/cvpr.2014.223","volume-title":"In: 2014 IEEE conference on computer vision and pattern recognition","author":"A Karpathy","year":"2014","unstructured":"Karpathy A, Toderici G, Shetty S, Leung T, Sukthankar R, Fei-Fei L (2014) Large-scale video classification with convolutional neural networks. In: In: 2014 IEEE conference on computer vision and pattern recognition. IEEE, Columbus, OH, USA, pp 1725\u20131732. https:\/\/doi.org\/10.1109\/cvpr.2014.223"},{"key":"16727_CR16","doi-asserted-by":"publisher","first-page":"4694","DOI":"10.1109\/cvpr.2015.7299101","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"J Yue-Hei Ng","year":"2015","unstructured":"Yue-Hei Ng J, Hausknecht M, Vijayanarasimhan S, Vinyals O, Monga R, Toderici G (2015) Beyond short snippets: deep networks for video classification. In: Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, Boston, MA, USA, pp 4694\u20134702. https:\/\/doi.org\/10.1109\/cvpr.2015.7299101"},{"key":"16727_CR17","doi-asserted-by":"publisher","first-page":"568","DOI":"10.5555\/2968826.2968890","volume-title":"NIPS\u201914: procs of the 27th Intl Conf. On neural information processing systems","author":"K Simonyan","year":"2014","unstructured":"Simonyan K, Zisserman A (2014) Two-stream convolutional networks for action recognition in videos. In: NIPS\u201914: procs of the 27th Intl Conf. On neural information processing systems. MIT Press, Cambridge, MA, pp 568\u2013576. https:\/\/doi.org\/10.5555\/2968826.2968890"},{"key":"16727_CR18","doi-asserted-by":"publisher","unstructured":"Wu Z, Wang X, Jiang Y-G, Ye H, Xue X (2015) Modeling spatial-temporal clues in a hybrid deep learning framework for video classification. In: proceedings of the 23rd ACM international conference on multimedia. ACM, pp 461-470. https:\/\/doi.org\/10.1145\/2733373.2806222","DOI":"10.1145\/2733373.2806222"},{"key":"16727_CR19","doi-asserted-by":"publisher","first-page":"3459","DOI":"10.1109\/tip.2018.2818328","volume":"27","author":"S Song","year":"2018","unstructured":"Song S, Lan C, Xing J, Zeng W, Liu J (2018) Spatio-temporal attention-based LSTM networks for 3D action recognition and detection. IEEE Trans Image Process 27:3459\u20133471. https:\/\/doi.org\/10.1109\/tip.2018.2818328","journal-title":"IEEE Trans Image Process"},{"key":"16727_CR20","doi-asserted-by":"publisher","first-page":"226","DOI":"10.1016\/j.patrec.2018.07.034","volume":"112","author":"T Yu","year":"2018","unstructured":"Yu T, Guo C, Wang L, Gu H, Xiang S, Pan C (2018) Joint spatial-temporal attention for action recognition. Pattern Recogn Lett 112:226\u2013233. https:\/\/doi.org\/10.1016\/j.patrec.2018.07.034","journal-title":"Pattern Recogn Lett"},{"key":"16727_CR21","doi-asserted-by":"publisher","first-page":"1761","DOI":"10.1109\/ICIP40778.2020.9190996","volume-title":"2020 IEEE international conference on image processing (ICIP)","author":"J You","year":"2020","unstructured":"You J, Korhonen J (2020) Attention boosted deep networks for video classification. In: 2020 IEEE international conference on image processing (ICIP). IEEE, Abu Dhabi, United Arab Emirates, pp 1761\u20131765. https:\/\/doi.org\/10.1109\/ICIP40778.2020.9190996"},{"key":"16727_CR22","doi-asserted-by":"publisher","first-page":"41","DOI":"10.5121\/ijscai.2016.5105","volume":"5","author":"JP Verma","year":"2016","unstructured":"Verma JP, Agrawal S, Patel B, Patel A (2016) Big data analytics: challenges and applications for text, audio, video, and social media data. International journal on soft computing. Artif Intell Appl (IJSCAI) 5:41\u201351. https:\/\/doi.org\/10.5121\/ijscai.2016.5105","journal-title":"Artif Intell Appl (IJSCAI)"},{"key":"16727_CR23","doi-asserted-by":"publisher","first-page":"81","DOI":"10.1109\/cvpr.1999.786951","volume":"46","author":"MJ Jones","year":"2002","unstructured":"Jones MJ, Rehg JM (2002) Statistical color models with application to skin detection. Int J Comput Vis 46:81\u201396. https:\/\/doi.org\/10.1109\/cvpr.1999.786951","journal-title":"Int J Comput Vis"},{"key":"16727_CR24","doi-asserted-by":"publisher","unstructured":"Endeshaw T, Garcia J, Jakobsson A (2008) Classification of indecent videos by low complexity repetitive motion detection. In: 2008 37th IEEE applied imagery pattern recognition workshop. IEEE, pp 1-7. https:\/\/doi.org\/10.1109\/aipr.2008.4906438","DOI":"10.1109\/aipr.2008.4906438"},{"key":"16727_CR25","doi-asserted-by":"publisher","unstructured":"Jansohn C, Ulges A, Breuel TM (2009) Detecting pornographic video content by combining image features with motion information. In: proceedings of the 17th ACM international conference on multimedia. ACM, pp 601-604. https:\/\/doi.org\/10.1145\/1631272.1631366","DOI":"10.1145\/1631272.1631366"},{"key":"16727_CR26","doi-asserted-by":"publisher","first-page":"106","DOI":"10.1049\/cp:20061978","volume-title":"The 3rd European conference on visual media production (CVMP 2006) - part of the 2nd multimedia conference 2006","author":"N Rea","year":"2006","unstructured":"Rea N, Lacey G, Dahyot R, Lambe C (2006) Multimodal periodicity analysis for illicit content detection in videos. In: The 3rd European conference on visual media production (CVMP 2006) - part of the 2nd multimedia conference 2006. IET, London, pp 106\u2013114. https:\/\/doi.org\/10.1049\/cp:20061978"},{"key":"16727_CR27","doi-asserted-by":"publisher","first-page":"1488","DOI":"10.1109\/trustcom.2011.205","volume-title":"In: 2011 IEEE 10th international conference on trust, security and privacy in computing and communications","author":"Y Liu","year":"2011","unstructured":"Liu Y, Wang X, Zhang Y, Tang S (2011) Fusing audio-words with visual features for pornographic video detection. In: In: 2011 IEEE 10th international conference on trust, security and privacy in computing and communications. IEEE, Changsha, China, pp 1488\u20131493. https:\/\/doi.org\/10.1109\/trustcom.2011.205"},{"key":"16727_CR28","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1145\/2390214.2390222","volume-title":"Proceedings of the 2012 ACM international workshop on audio and multimedia methods for large-scale video analysis","author":"A Ulges","year":"2012","unstructured":"Ulges A, Schulze C, Borth D, Stahl A (2012) Pornography detection in video benefits (a lot) from a multi-modal approach. In: Proceedings of the 2012 ACM international workshop on audio and multimedia methods for large-scale video analysis. ACM, Nara, Japan, pp 21\u201326. https:\/\/doi.org\/10.1145\/2390214.2390222"},{"key":"16727_CR29","doi-asserted-by":"publisher","unstructured":"Ochoa VMT, Yayilgan SY, Cheikh FA (2012) Adult video content detection using machine learning techniques. In: 2012 eighth international conference on signal image technology and internet based systems. IEEE, pp 967-974. https:\/\/doi.org\/10.1109\/sitis.2012.143","DOI":"10.1109\/sitis.2012.143"},{"key":"16727_CR30","doi-asserted-by":"publisher","first-page":"696","DOI":"10.1109\/tce.2014.7027345","volume":"60","author":"S Jung","year":"2014","unstructured":"Jung S, Youn J, Sull S (2014) A real-time system for detecting indecent videos based on spatiotemporal patterns. IEEE Trans Consum Electron 60:696\u2013701. https:\/\/doi.org\/10.1109\/tce.2014.7027345","journal-title":"IEEE Trans Consum Electron"},{"key":"16727_CR31","doi-asserted-by":"publisher","first-page":"1003","DOI":"10.1145\/1631272.1631490","volume-title":"Proceedings of the 17th ACM international conference on multimedia","author":"S Tang","year":"2009","unstructured":"Tang S, Li J, Zhang Y, Xie C, Li M, Liu Y, Hua X, Zheng Y-T, Tang J, Chua T-S (2009) Pornprobe: an lda-svm based pornography detection system. In: Proceedings of the 17th ACM international conference on multimedia. ACM, Beijing, China, pp 1003\u20131004. https:\/\/doi.org\/10.1145\/1631272.1631490"},{"key":"16727_CR32","doi-asserted-by":"publisher","unstructured":"Lopes APB, de Avila SE, Peixoto AN, Oliveira RS, Coelho MDM, Ara\u00fajo ADA (2009) Nude detection in video using bag-of-visual-features. In: 2009 XXII Brazilian Symposium on Computer Graphics and Image Processing. IEEE, pp 224\u2013231. https:\/\/doi.org\/10.1109\/sibgrapi.2009.32","DOI":"10.1109\/sibgrapi.2009.32"},{"key":"16727_CR33","doi-asserted-by":"publisher","unstructured":"Kaushal R, Saha S, Bajaj P, Kumaraguru P (2016) KidsTube: detection, characterization and analysis of child unsafe content & promoters on YouTube. In: 2016 14th annual conference on privacy, Security and Trust (PST). IEEE, pp. 157\u2013164. https:\/\/doi.org\/10.1109\/pst.2016.7906950","DOI":"10.1109\/pst.2016.7906950"},{"key":"16727_CR34","doi-asserted-by":"publisher","first-page":"39910","DOI":"10.1109\/ACCESS.2021.3064392","volume":"9","author":"N Aldahoul","year":"2021","unstructured":"Aldahoul N, Karim HA, Abdullah MHL, Wazir ASB, Fauzi MFA, Tan MJT, Mansor S, Lyn HS (2021) An evaluation of traditional and CNN-based feature descriptors for cartoon pornography detection. IEEE Access 9:39910\u201339925. https:\/\/doi.org\/10.1109\/ACCESS.2021.3064392","journal-title":"IEEE Access"},{"key":"16727_CR35","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1016\/j.neucom.2016.12.017","volume":"230","author":"M Perez","year":"2017","unstructured":"Perez M, Avila S, Moreira D, Moraes D, Testoni V, Valle E, Goldenstein S, Rocha A (2017) Video pornography detection through deep learning techniques and motion information. Neurocomputing 230:279\u2013293. https:\/\/doi.org\/10.1016\/j.neucom.2016.12.017","journal-title":"Neurocomputing"},{"key":"16727_CR36","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1007\/s41060-017-0088-4","volume":"6","author":"H Yenala","year":"2018","unstructured":"Yenala H, Jhanwar A, Chinnakotla MK, Goyal J (2018) Deep learning for detecting inappropriate content in text. Int J Data Sci Anal 6:273\u2013286. https:\/\/doi.org\/10.1007\/s41060-017-0088-4","journal-title":"Int J Data Sci Anal"},{"key":"16727_CR37","doi-asserted-by":"publisher","first-page":"3213","DOI":"10.1145\/3340531.3418511","volume-title":"Proceedings of the 29th ACM international conference on Information & Knowledge Management","author":"S Alshamrani","year":"2020","unstructured":"Alshamrani S (2020) Detecting and measuring the exposure of children and adolescents to inappropriate comments in YouTube. In: Proceedings of the 29th ACM international conference on Information & Knowledge Management. ACM, Ireland, pp 3213\u20133216. https:\/\/doi.org\/10.1145\/3340531.3418511"},{"key":"16727_CR38","doi-asserted-by":"publisher","unstructured":"Mariconti E, Suarez-Tangil G, Blackburn J, De Cristofaro E, Kourtellis N, Leontiadis I, Serrano JL, Stringhini G (2019) You know what to do proactive detection of YouTube videos targeted by coordinated hate attacks. In: proceedings of the ACM on human-computer interaction. ACM, pp 1-21. https:\/\/doi.org\/10.1145\/3359309","DOI":"10.1145\/3359309"},{"key":"16727_CR39","doi-asserted-by":"publisher","unstructured":"Alghowinem S (2018) A safer youtube kids: an extra layer of content filtering using automated multimodal analysis. In: Proceedings of SAI Intelligent Systems Conference. Springer, pp. 294\u2013308. https:\/\/doi.org\/10.1007\/978-3-030-01054-6_21","DOI":"10.1007\/978-3-030-01054-6_21"},{"key":"16727_CR40","doi-asserted-by":"publisher","unstructured":"Ishikawa A, Bollis E, Avila S (2019) Combating the elsagate phenomenon: deep learning architectures for disturbing cartoons. In: 2019 7th international workshop on biometrics and forensics (IWBF). IEEE, pp 1-6. https:\/\/doi.org\/10.1109\/iwbf.2019.8739202","DOI":"10.1109\/iwbf.2019.8739202"},{"key":"16727_CR41","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1016\/j.jvcir.2017.12.005","volume":"50","author":"P Vitorino","year":"2018","unstructured":"Vitorino P, Avila S, Perez M, Rocha A (2018) Leveraging deep neural networks to fight child pornography in the age of social media. J Vis Commun Image Represent 50:303\u2013313. https:\/\/doi.org\/10.1016\/j.jvcir.2017.12.005","journal-title":"J Vis Commun Image Represent"},{"key":"16727_CR42","doi-asserted-by":"publisher","first-page":"2104","DOI":"10.1145\/3297280.3297487","volume-title":"Proceedings of the 34th ACM\/SIGAPP symposium on applied computing","author":"S Singh","year":"2019","unstructured":"Singh S, Kaushal R, Buduru AB, Kumaraguru P (2019) KidsGUARD: fine grained approach for child unsafe video representation and detection. In: Proceedings of the 34th ACM\/SIGAPP symposium on applied computing. ACM, Limassol, Cyprus, pp 2104\u20132111. https:\/\/doi.org\/10.1145\/3297280.3297487"},{"key":"16727_CR43","doi-asserted-by":"publisher","unstructured":"Abu-El-Haija S, Kothari N, Lee J, Natsev P, Toderici G, Varadarajan B, Vijayanarasimhan S (2016) Youtube-8m: a large-scale video classification benchmark. arXiv preprint. https:\/\/doi.org\/10.48550\/arXiv.1609.08675. Accessed October 20, 2021","DOI":"10.48550\/arXiv.1609.08675"},{"key":"16727_CR44","doi-asserted-by":"publisher","unstructured":"Kim M, Kumar S, Pavlovic V, Rowley H (2008) Face tracking and recognition with visual constraints in real-world videos. In: 2008 IEEE conference on computer vision and pattern recognition. IEEE, pp 1\u20138. https:\/\/doi.org\/10.1109\/cvpr.2008.4587572","DOI":"10.1109\/cvpr.2008.4587572"},{"key":"16727_CR45","doi-asserted-by":"publisher","unstructured":"Bermingham A, Conway M, McInerney L, O'Hare N, Smeaton AF (2009) Combining social network analysis and sentiment analysis to explore the potential for online radicalisation. In: 2009 international conference on advances in social network analysis and mining. IEEE, pp 231-236. https:\/\/doi.org\/10.1109\/asonam.2009.31","DOI":"10.1109\/asonam.2009.31"},{"key":"16727_CR46","doi-asserted-by":"publisher","unstructured":"Kuehne H, Jhuang H, Garrote E, Poggio T, Serre T (2011) HMDB: a large video database for human motion recognition. In: 2011 international conference on computer vision. IEEE, pp 2556-2563. https:\/\/doi.org\/10.1007\/978-3-642-33374-3_41","DOI":"10.1007\/978-3-642-33374-3_41"},{"key":"16727_CR47","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1109\/cvpr.2011.5995566","volume-title":"CVPR 2011","author":"L Wolf","year":"2011","unstructured":"Wolf L, Hassner T, Maoz I (2011) Face recognition in unconstrained videos with matched background similarity. In: CVPR 2011. IEEE, Colorado Springs, CO, USA, pp 529\u2013534. https:\/\/doi.org\/10.1109\/cvpr.2011.5995566"},{"key":"16727_CR48","doi-asserted-by":"publisher","unstructured":"Soomro K, Zamir AR, Shah M (2012) UCF101: a dataset of 101 human actions classes from videos in the wild. arXiv preprint. https:\/\/doi.org\/10.48550\/arXiv.1212.0402. Accessed October 20, 2021","DOI":"10.48550\/arXiv.1212.0402"},{"key":"16727_CR49","doi-asserted-by":"publisher","first-page":"2712","DOI":"10.1109\/iccv.2013.337","volume-title":"Proceedings of the IEEE international conference on computer vision","author":"S Guadarrama","year":"2013","unstructured":"Guadarrama S, Krishnamoorthy N, Malkarnenkar G, Venugopalan S, Mooney R, Darrell T, Saenko K (2013) Youtube2text: recognizing and describing arbitrary activities using semantic hierarchies and zero-shot recognition. In: Proceedings of the IEEE international conference on computer vision. IEEE, Sydney, NSW, Australia, pp 2712\u20132719. https:\/\/doi.org\/10.1109\/iccv.2013.337"},{"key":"16727_CR50","doi-asserted-by":"publisher","first-page":"5288","DOI":"10.1109\/cvpr.2016.571","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","author":"J Xu","year":"2016","unstructured":"Xu J, Mei T, Yao T, Rui Y (2016) Msr-vtt: a large video description dataset for bridging video and language. In: Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, Las Vegas, NV, USA, pp 5288\u20135296. https:\/\/doi.org\/10.1109\/cvpr.2016.571"},{"key":"16727_CR51","doi-asserted-by":"publisher","unstructured":"Kay W, Carreira J, Simonyan K, Zhang B, Hillier C, Vijayanarasimhan S, Viola F, Green T, Back T, Natsev P (2017) The kinetics human action video dataset. arXiv preprint. https:\/\/doi.org\/10.48550\/arXiv.1705.06950. Accessed November 20, 2021","DOI":"10.48550\/arXiv.1705.06950"},{"key":"16727_CR52","doi-asserted-by":"publisher","unstructured":"Tan M, Le Q (2019) Efficientnet: rethinking model scaling for convolutional neural networks. In: international conference on machine learning. PMLR, pp 6105-6114. https:\/\/doi.org\/10.48550\/arXiv.1905.11946","DOI":"10.48550\/arXiv.1905.11946"},{"key":"16727_CR53","doi-asserted-by":"publisher","unstructured":"Deng J, Dong W, Socher R, Li L-J, Li K, Fei-Fei L (2009) Imagenet: A large-scale hierarchical image database. In: 2009 IEEE conference on computer vision and pattern recognition. IEEE, pp 248\u2013255. https:\/\/doi.org\/10.1109\/cvprw.2009.5206848","DOI":"10.1109\/cvprw.2009.5206848"},{"key":"16727_CR54","doi-asserted-by":"publisher","unstructured":"Kingma DP, Ba J (2014) Adam: a method for stochastic optimization. arXiv preprint. https:\/\/doi.org\/10.48550\/arXiv.1412.6980. Accessed October 24, 2021","DOI":"10.48550\/arXiv.1412.6980"},{"key":"16727_CR55","doi-asserted-by":"publisher","unstructured":"Ketkar N (2017) Introduction to keras. In: Deep learning with Python. Springer, pp. 97\u2013111. https:\/\/doi.org\/10.1007\/978-1-4842-2766-4_7","DOI":"10.1007\/978-1-4842-2766-4_7"},{"key":"16727_CR56","doi-asserted-by":"publisher","unstructured":"Abadi M, Barham P, Chen J, Chen Z, Davis A, Dean J, Devin M, Ghemawat S, Irving G, Isard M (2016) Tensorflow: a system for large-scale machine learning. In: proceedings of the 12th USENIX conference on operating systems design and implementation. USENIX association, Savannah, GA, USA, pp 265-283. https:\/\/doi.org\/10.48550\/arXiv.1605.08695","DOI":"10.48550\/arXiv.1605.08695"},{"key":"16727_CR57","doi-asserted-by":"publisher","unstructured":"Xu Z, Yang Y, Hauptmann AG (2015) a discriminative CNN video representation for event detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition. IEEE, pp 1798-1807. https:\/\/doi.org\/10.1109\/cvpr.2015.7298789","DOI":"10.1109\/cvpr.2015.7298789"},{"key":"16727_CR58","doi-asserted-by":"publisher","first-page":"432","DOI":"10.1016\/j.neucom.2017.07.012","volume":"272","author":"J Wehrmann","year":"2018","unstructured":"Wehrmann J, Sim\u00f5es GS, Barros RC, Cavalcante VF (2018) Adult content detection in videos with convolutional and recurrent neural networks. Neurocomputing 272:432\u2013438. https:\/\/doi.org\/10.1016\/j.neucom.2017.07.012","journal-title":"Neurocomputing"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16727-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-16727-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-16727-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,8]],"date-time":"2024-03-08T06:36:57Z","timestamp":1709879817000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-16727-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,16]]},"references-count":58,"journal-issue":{"issue":"11","published-online":{"date-parts":[[2024,3]]}},"alternative-id":["16727"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-16727-6","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,9,16]]},"assertion":[{"value":"7 January 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 July 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 August 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 September 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Research involving human participants and\/or animals"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}}]}}