{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T14:38:06Z","timestamp":1769092686790,"version":"3.49.0"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,8,1]],"date-time":"2022-08-01T00:00:00Z","timestamp":1659312000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,8,1]],"date-time":"2022-08-01T00:00:00Z","timestamp":1659312000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1007\/s11042-022-13473-z","type":"journal-article","created":{"date-parts":[[2022,8,1]],"date-time":"2022-08-01T08:04:57Z","timestamp":1659341097000},"page":"5857-5877","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Detection of helmetless motorcycle riders by video captioning using deep recurrent neural network"],"prefix":"10.1007","volume":"82","author":[{"given":"Madhuchhanda","family":"Dasgupta","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0348-371X","authenticated-orcid":false,"given":"Oishila","family":"Bandyopadhyay","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sanjay","family":"Chatterji","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,8,1]]},"reference":[{"key":"13473_CR1","unstructured":"Ba J, Mnih V, Kavukcuoglu K (2015) Multiple object recognition with visual attention. ICLR, arXiv:1412.7755"},{"key":"13473_CR2","unstructured":"Bahdanau D, Cho K, Bengio Y (2015) Neural machine translation by jointly learning to align and translate. International Conference on Learning Representations (ICLR), arXiv:1409.0473"},{"key":"13473_CR3","unstructured":"Banerjee S, Lavie A (June 2005) METEOR: an automatic metric for MT evaluation with improved correlation with human judgments. In: Proceedings of the ACL workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. https:\/\/www.aclweb.org\/anthology\/W05-0909. Association for Computational Linguistics, Ann Arbor, pp 65\u201372"},{"key":"13473_CR4","doi-asserted-by":"crossref","unstructured":"Bello I, Zoph B, Vaswani A, Shlens J, Le QV (2019) Attention augmented convolutional networks. In: Proceedings of the IEEE international conference on computer vision, pp 3286\u20133295","DOI":"10.1109\/ICCV.2019.00338"},{"key":"13473_CR5","doi-asserted-by":"crossref","unstructured":"Chen S, Jiang Y-G (2019) Motion guided spatial attention for video captioning. In: Proceedings of the AAAI conference on artificial intelligence, vol 33(01), pp 8191\u20138198","DOI":"10.1609\/aaai.v33i01.33018191"},{"issue":"3","key":"13473_CR6","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1049\/iet-its.2011.0138","volume":"6","author":"J Chiverton","year":"2012","unstructured":"Chiverton J (2012) Helmet presence classification with motorcycle detection and tracking. IET Intell Transp Syst 6(3):259\u2013269","journal-title":"IET Intell Transp Syst"},{"key":"13473_CR7","doi-asserted-by":"crossref","unstructured":"Cho K, Van Merri\u00ebnboer B, Bahdanau D, Bengio Y (2014) On the properties of neural machine translation: Encoder-decoder approaches. arXiv:1409.1259","DOI":"10.3115\/v1\/W14-4012"},{"key":"13473_CR8","doi-asserted-by":"crossref","unstructured":"Dasgupta M, Bandyopadhyay O, Chatterji S (2019) Automated helmet detection for multiple motorcycle riders using CNN. In: IEEE Conference on information and communication technology, pp 1\u20134","DOI":"10.1109\/CICT48419.2019.9066191"},{"key":"13473_CR9","doi-asserted-by":"crossref","unstructured":"Donahue J, Anne Hendricks L, Guadarrama S, Rohrbach M, Venugopalan S, Saenko K, Darrell T (2015) Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2625\u20132634","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"13473_CR10","doi-asserted-by":"crossref","unstructured":"E Silva RRV, Aires KRT, Veras RdMS (2014) Helmet detection on motorcyclists using image descriptors and classifiers. In: 2014 27th SIBGRAPI conference on graphics, patterns and images, IEEE, pp 141\u2013148","DOI":"10.1109\/SIBGRAPI.2014.28"},{"key":"13473_CR11","doi-asserted-by":"crossref","unstructured":"Espinosa JE, Velastin SA, Branch JW (2018) Motorcycle detection and classification in urban scenarios using a model based on faster r-cnn. ArXiv:1808.02299","DOI":"10.1049\/cp.2018.1292"},{"issue":"9","key":"13473_CR12","doi-asserted-by":"publisher","first-page":"2045","DOI":"10.1109\/TMM.2017.2729019","volume":"19","author":"L Gao","year":"2017","unstructured":"Gao L, Guo Z, Zhang H, Xu X, Shen HT (2017) Video captioning with attention-based lstm and semantic consistency. IEEE Trans Multimed 19 (9):2045\u20132055","journal-title":"IEEE Trans Multimed"},{"key":"13473_CR13","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast r-cnn. In: Proceedings of the IEEE international conference on computer vision, pp 1440\u20131448","DOI":"10.1109\/ICCV.2015.169"},{"key":"13473_CR14","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 580\u2013587","DOI":"10.1109\/CVPR.2014.81"},{"issue":"8","key":"13473_CR15","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neur Comput 9(8):1735\u20131780. https:\/\/doi.org\/10.1162\/neco.1997.9.8.1735","journal-title":"Neur Comput"},{"key":"13473_CR16","unstructured":"Howard AG, Zhu M, Chen B, Kalenichenko D, Wang W, Weyand T, Andreetto M, Adam H (2017) Mobilenets: efficient convolutional neural networks for mobile vision applications. arXiv:1704.04861"},{"key":"13473_CR17","first-page":"9401","volume":"31","author":"J Hu","year":"2018","unstructured":"Hu J, Shen L, Albanie S, Sun G, Vedaldi A (2018) Gather-excite: exploiting feature context in convolutional neural networks. Adv Neur Inform Process Syst 31:9401\u20139411","journal-title":"Adv Neur Inform Process Syst"},{"key":"13473_CR18","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7132\u20137141","DOI":"10.1109\/CVPR.2018.00745"},{"key":"13473_CR19","unstructured":"Iandola FN, Han S, Moskewicz MW, Ashraf K, Dally WJ, Keutzer K (2016) Squeezenet: Alexnet-level accuracy with 50x fewer parameters and<\u20090.5 mb model size. arXiv:1602.07360"},{"key":"13473_CR20","doi-asserted-by":"crossref","unstructured":"Karpathy A, Fei-Fei L (2014) Deep visual-semantic alignments for generating image descriptions. CVPR, arXiv:cs.CV\/1412.2306","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"13473_CR21","unstructured":"Kiros R, Salakhutdinov R, Zemel R (2014) Unifying visual-semantic embeddings with multimodal neural language models. ACL, arXiv:1411.2539"},{"issue":"6","key":"13473_CR22","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) Imagenet classification with deep convolutional neural networks. Commun ACM 60(6):84\u201390","journal-title":"Commun ACM"},{"key":"13473_CR23","unstructured":"Kunar A (2020) Object detection with ssd and mobilenet, https:\/\/aditya-kunar-52859.medium.com\/object-detection-with-ssd-and-mobilenet-aeedc5917ad0. Accessed 08 June 2021"},{"issue":"6","key":"13473_CR24","doi-asserted-by":"publisher","first-page":"5246","DOI":"10.1109\/TGRS.2020.3010106","volume":"59","author":"X Li","year":"2021","unstructured":"Li X, Zhang X, Huang W, Wang Q (2021) Truncation cross entropy loss for remote sensing image captioning. IEEE Trans Geosci Remote Sens 59(6):5246\u20135257. https:\/\/doi.org\/10.1109\/TGRS.2020.3010106https:\/\/doi.org\/10.1109\/TGRS.2020.3010106","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"13473_CR25","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu C-Yg, Berg AC (2016) Ssd: single shot multibox detector. In: European conference on computer vision. Springer, pp 21\u201337","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"13473_CR26","doi-asserted-by":"publisher","unstructured":"Mallela NC, Volety R, Srinivasa PR, Nadesh RK (2021) Detection of the triple riding and speed violation on two-wheelers using deep learning algorithms. Multimedia Tools and Application, https:\/\/doi.org\/10.1007\/s11042-020-10126-x","DOI":"10.1007\/s11042-020-10126-x"},{"issue":"3","key":"13473_CR27","first-page":"43","volume":"13","author":"S Panesar","year":"2019","unstructured":"Panesar S, Sanjeev KS (2019) Motorcycle helmet use and its correlates in fatal crashes. Prof RK Sharma 13(3):43","journal-title":"Prof RK Sharma"},{"key":"13473_CR28","doi-asserted-by":"crossref","unstructured":"Papineni K, Roukos S, Ward T, Zhu W-J (2002) Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th annual meeting of the association for computational linguistics, pp 311\u2013318","DOI":"10.3115\/1073083.1073135"},{"key":"13473_CR29","doi-asserted-by":"crossref","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: unified, real-time object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 779\u2013788","DOI":"10.1109\/CVPR.2016.91"},{"issue":"6","key":"13473_CR30","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2016","unstructured":"Ren S, He K, Girshick R, Sun J (2016) Faster r-cnn: towards real-time object detection with region proposal networks. IEEE Trans Pattern Anal Mach Intell 39(6):1137\u20131149","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"13473_CR31","unstructured":"Ruder S (2020) An overview of gradient descent optimization algorithms. arXiv:1609.04747"},{"key":"13473_CR32","unstructured":"Simonyan K, Zisserman A (2014) Two-stream convolutional networks for action recognition in videos. Advances in Neural Information Processing Systems, 27"},{"key":"13473_CR33","doi-asserted-by":"crossref","unstructured":"Sun S, Kuang Z, Sheng L, Ouyang W, Zhang W (2018) Optical flow guided feature: a fast and robust motion representation for video action recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1390\u20131399","DOI":"10.1109\/CVPR.2018.00151"},{"key":"13473_CR34","doi-asserted-by":"crossref","unstructured":"Venugopalan S, Rohrbach M, Donahue J, Mooney R, Darrell T, Saenko K (2015) Sequence to sequence-video to text. In: Proceedings of the IEEE international conference on computer vision, pp 4534\u20134542","DOI":"10.1109\/ICCV.2015.515"},{"key":"13473_CR35","doi-asserted-by":"crossref","unstructured":"Wang L, Xiong Y, Wang Z, Qiao Y, Lin D, Tang X, Gool LV (2016) Temporal segment networks: towards good practices for deep action recognition. In: European conference on computer vision, Springer, pp 20\u201336","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"13473_CR36","doi-asserted-by":"publisher","unstructured":"Wang Q, Huang W, Zhang X, Li X (2020) Word-sentence framework for remote sensing image captioning. IEEE Transactions on Geoscience and Remote Sensing, https:\/\/doi.org\/10.1109\/TGRS.2020.3044054https:\/\/doi.org\/10.1109\/TGRS.2020.3044054","DOI":"10.1109\/TGRS.2020.3044054 10.1109\/TGRS.2020.3044054"},{"key":"13473_CR37","doi-asserted-by":"crossref","unstructured":"Wang X, Girshick R, Gupta A, He K (2018) Non-local neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7794\u20137803","DOI":"10.1109\/CVPR.2018.00813"},{"key":"13473_CR38","unstructured":"Xu K, Ba J, Kiros R, Cho K, Courville A, Salakhudinov R, Zemel R, Bengio Y (2015) Show, attend and tell: neural image caption generation with visual attention. In: International conference on machine learning, pp 2048\u20132057"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-13473-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-022-13473-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-13473-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,25]],"date-time":"2023-01-25T08:29:02Z","timestamp":1674635342000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-022-13473-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,1]]},"references-count":38,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,2]]}},"alternative-id":["13473"],"URL":"https:\/\/doi.org\/10.1007\/s11042-022-13473-z","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,8,1]]},"assertion":[{"value":"21 March 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 March 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 July 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 August 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Conflicts of interest\/Competing interests"}}]}}