{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T14:02:01Z","timestamp":1725976921690},"publisher-location":"Singapore","reference-count":25,"publisher":"Springer Singapore","isbn-type":[{"type":"print","value":"9789811075896"},{"type":"electronic","value":"9789811075902"}],"license":[{"start":{"date-parts":[[2018,5,9]],"date-time":"2018-05-09T00:00:00Z","timestamp":1525824000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-981-10-7590-2_10","type":"book-chapter","created":{"date-parts":[[2018,5,8]],"date-time":"2018-05-08T02:23:32Z","timestamp":1525746212000},"page":"141-151","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Natural Language Description of Surveillance Events"],"prefix":"10.1007","author":[{"given":"Sk. Arif","family":"Ahmed","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Debi Prosad","family":"Dogra","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Samarjit","family":"Kar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Partha Pratim","family":"Roy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2018,5,9]]},"reference":[{"key":"10_CR1","doi-asserted-by":"crossref","unstructured":"Aradhye, H., Toderici, G., Yagnik, J.: Video2text: learning to annotate video content. In: IEEE International Conference on Data Mining Workshops, 2009. ICDMW\u201909, pp. 144\u2013151. IEEE (2009)","DOI":"10.1109\/ICDMW.2009.79"},{"key":"10_CR2","doi-asserted-by":"crossref","unstructured":"Chen, X., Lawrence, Z.C.: Mind\u2019s eye: a recurrent visual representation for image caption generation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2422\u20132431 (2015)","DOI":"10.1109\/CVPR.2015.7298856"},{"key":"10_CR3","doi-asserted-by":"crossref","unstructured":"Denkowski, M., Lavie, A.: Meteor universal: language specific translation evaluation for any target language. In: Proceedings of the Ninth Workshop on Statistical Machine Translation, pp. 376\u2013380 (2014)","DOI":"10.3115\/v1\/W14-3348"},{"issue":"11","key":"10_CR4","doi-asserted-by":"publisher","first-page":"6373","DOI":"10.1007\/s11042-015-2576-7","volume":"75","author":"DP Dogra","year":"2016","unstructured":"Dogra, D.P., Ahmed, A., Bhaskar, H.: Smart video summarization using mealy machine-based trajectory modelling for surveillance applications. Multimed. Tools Appl. 75(11), 6373\u20136401 (2016)","journal-title":"Multimed. Tools Appl."},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Donahue, J., Anne, H.L., Guadarrama, S., Rohrbach, M., Venugopalan, S., Saenko, K., Darrell, T.: Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2625\u20132634 (2015)","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"10_CR6","doi-asserted-by":"crossref","unstructured":"Guadarrama, S., Krishnamoorthy, N., Malkarnenkar, G., Venugopalan, S., Mooney, R., Darrell, T., Saenko, K.: Youtube2text: recognizing and describing arbitrary activities using semantic hierarchies and zero-shot recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2712\u20132719 (2013)","DOI":"10.1109\/ICCV.2013.337"},{"issue":"3","key":"10_CR7","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1109\/TPAMI.2014.2345390","volume":"37","author":"JF Henriques","year":"2015","unstructured":"Henriques, J.F., Caseiro, R., Martins, P., Batista, J.: High-speed tracking with kernelized correlation filters. IEEE Trans. Pattern Anal. Mach. Intell. 37(3), 583\u2013596 (2015)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10_CR8","doi-asserted-by":"crossref","first-page":"853","DOI":"10.1613\/jair.3994","volume":"47","author":"M Hodosh","year":"2013","unstructured":"Hodosh, M., Young, P., Hockenmaier, J.: Framing image description as a ranking task: data, models and evaluation metrics. J. Artif. Intell. Res. 47, 853\u2013899 (2013)","journal-title":"J. Artif. Intell. Res."},{"key":"10_CR9","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1007\/978-3-642-35795-4_21","volume-title":"Trustworthy Computing and Services","author":"Haiqi Huang","year":"2013","unstructured":"Huang, H., Lu, Y., Zhang, F., Sun, S.: A multi-modal clustering method for web videos. In: International Conference on Trustworthy Computing and Services, pp. 163\u2013169. Springer (2012)"},{"key":"10_CR10","doi-asserted-by":"crossref","unstructured":"Karpathy, A., Fei-Fei, L.: Deep visual-semantic alignments for generating image descriptions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3128\u20133137 (2015)","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"10_CR11","unstructured":"Kiros, R., Salakhutdinov, R., Zemel, R.S.: Unifying Visual-semantic Embeddings with Multimodal Neural Language Models (2014). arXiv:1411.2539"},{"key":"10_CR12","doi-asserted-by":"crossref","unstructured":"Krishnamoorthy, N., Malkarnenkar, G., Mooney, R.J., Saenko, K., Guadarrama, S.: Generating natural-language video descriptions using text-mined knowledge. In: AAAI, vol. 1, p. 2 (2013)","DOI":"10.1609\/aaai.v27i1.8679"},{"issue":"10","key":"10_CR13","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1162\/tacl_a_00188","volume":"2","author":"P Kuznetsova","year":"2014","unstructured":"Kuznetsova, P., Ordonez, V., Berg, T.L., Choi, Y.: Treetalk: composition and compression of trees for image descriptions. TACL 2(10), 351\u2013362 (2014)","journal-title":"TACL"},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Oh, S., Hoogs, A., Perera, A., Cuntoor, N., Chen, C.C., Lee, J.T., Mukherjee, S., Aggarwal, J., Lee, H., Davis, L., et\u00a0al.: A large-scale benchmark dataset for event recognition in surveillance video. In: IEEE conference on Computer Vision and Pattern Recognition (CVPR), pp. 3153\u20133160. IEEE (2011)","DOI":"10.1109\/CVPR.2011.5995586"},{"key":"10_CR15","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting on Association for Computational Linguistics, pp. 311\u2013318. Association for Computational Linguistics (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"10_CR16","doi-asserted-by":"crossref","unstructured":"Rohrbach, A., Rohrbach, M., Tandon, N., Schiele, B.: A dataset for movie description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3202\u20133212 (2015)","DOI":"10.1109\/CVPR.2015.7298940"},{"key":"10_CR17","doi-asserted-by":"crossref","unstructured":"Rohrbach, M., Qiu, W., Titov, I., Thater, S., Pinkal, M., Schiele, B.: Translating video content to natural language descriptions. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 433\u2013440 (2013)","DOI":"10.1109\/ICCV.2013.61"},{"key":"10_CR18","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Lawrence, Zitnick, C., Parikh, D.: Cider: consensus-based image description evaluation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4566\u20134575 (2015)","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"10_CR19","doi-asserted-by":"crossref","unstructured":"Venugopalan, S., Rohrbach, M., Donahue, J., Mooney, R., Darrell, T., Saenko, K.: Sequence to sequence-video to text. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4534\u20134542 (2015)","DOI":"10.1109\/ICCV.2015.515"},{"key":"10_CR20","unstructured":"Venugopalan, S., Xu, H., Donahue, J., Rohrbach, M., Mooney, R., Saenko, K.: Translating Videos to Natural Language Using Deep Recurrent Neural Networks (2014). arXiv:1412.4729"},{"key":"10_CR21","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3156\u20133164 (2015)","DOI":"10.1109\/CVPR.2015.7298935"},{"issue":"8","key":"10_CR22","doi-asserted-by":"publisher","first-page":"1191","DOI":"10.1109\/TKDE.2009.145","volume":"22","author":"S Wei","year":"2010","unstructured":"Wei, S., Zhao, Y., Zhu, Z., Liu, N.: Multimodal fusion for video searchreranking. IEEE Trans. Knowl. Data Eng. 22(8), 1191\u20131199 (2010)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10_CR23","unstructured":"Welch, G., Bishop, G.: An introduction to the Kalman filter. In: Annual Conference Computer Graphics Interactions Technology, pp. 12\u201317. ACM (2001)"},{"key":"10_CR24","doi-asserted-by":"crossref","unstructured":"Yao, L., Torabi, A., Cho, K., Ballas, N., Pal, C., Larochelle, H., Courville, A.: Describing videos by exploiting temporal structure. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4507\u20134515 (2015)","DOI":"10.1109\/ICCV.2015.512"},{"key":"10_CR25","doi-asserted-by":"crossref","unstructured":"Zivkovic, Z.: Improved adaptive Gaussian mixture model for background subtraction. In: Proceedings of the 17th International Conference on Pattern Recognition, 2004. ICPR 2004, vol. 2, pp. 28\u201331. IEEE (2004)","DOI":"10.1109\/ICPR.2004.1333992"}],"container-title":["Advances in Intelligent Systems and Computing","Information Technology and Applied Mathematics"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-10-7590-2_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,22]],"date-time":"2022-08-22T16:55:22Z","timestamp":1661187322000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-981-10-7590-2_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,5,9]]},"ISBN":["9789811075896","9789811075902"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-981-10-7590-2_10","relation":{},"ISSN":["2194-5357","2194-5365"],"issn-type":[{"type":"print","value":"2194-5357"},{"type":"electronic","value":"2194-5365"}],"subject":[],"published":{"date-parts":[[2018,5,9]]}}}