{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:46:43Z","timestamp":1777657603543,"version":"3.51.4"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319117515","type":"print"},{"value":"9783319117522","type":"electronic"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-11752-2_15","type":"book-chapter","created":{"date-parts":[[2014,10,14]],"date-time":"2014-10-14T06:05:43Z","timestamp":1413266743000},"page":"184-195","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":121,"title":["Coherent Multi-sentence Video Description with Variable Level of Detail"],"prefix":"10.1007","author":[{"given":"Anna","family":"Rohrbach","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marcus","family":"Rohrbach","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei","family":"Qiu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Annemarie","family":"Friedrich","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Manfred","family":"Pinkal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bernt","family":"Schiele","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2014,10,15]]},"reference":[{"key":"15_CR1","doi-asserted-by":"crossref","unstructured":"Das, P., Xu, C., Doell, R.F., Corso, J.: Thousand frames in just a few words: Lingual description of videos through latent topics and sparse object stitching. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2013)","DOI":"10.1109\/CVPR.2013.340"},{"key":"15_CR2","doi-asserted-by":"crossref","unstructured":"Dyer, C., Muresan, S., Resnik, P.: Generalizing word lattice translation. In: Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL) (2008)","DOI":"10.21236\/ADA482158"},{"key":"15_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/978-3-642-15561-1_2","volume-title":"Computer Vision \u2013 ECCV 2010","author":"A Farhadi","year":"2010","unstructured":"Farhadi, A., Hejrati, M., Sadeghi, M.A., Young, P., Rashtchian, C., Hockenmaier, J., Forsyth, D.: Every picture tells a story: generating sentences from images. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010, Part IV. LNCS, vol. 6314, pp. 15\u201329. Springer, Heidelberg (2010)"},{"key":"15_CR4","doi-asserted-by":"crossref","unstructured":"Guadarrama, S., Krishnamoorthy, N., Malkarnenkar, G., Mooney, R., Darrell, T., Saenko, K.: Youtube2text: Recognizing and describing arbitrary activities using semantic hierarchies and zero-shoot recognition. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV) (2013)","DOI":"10.1109\/ICCV.2013.337"},{"key":"15_CR5","doi-asserted-by":"crossref","unstructured":"Gupta, A., Srinivasan, P., Shi, J.B., Davis, L.: Understanding videos, constructing plots learning a visually grounded storyline model from annotated videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2009)","DOI":"10.1109\/CVPR.2009.5206492"},{"key":"15_CR6","doi-asserted-by":"crossref","unstructured":"Khan, M.U.G., Zhang, L., Gotoh, Y.: Human focused video description. In: Proceedings of the IEEE International Conference on Computer Vision Workshops (ICCV Workshops) (2011)","DOI":"10.1109\/ICCVW.2011.6130425"},{"key":"15_CR7","unstructured":"Koehn, P., Hoang, H., Birch, A., Callison-Burch, C., Federico, M., Bertoldi, N., Cowan, B., Shen, W., Moran, C., Zens, R., Dyer, C., Bojar, O., Constantin, A., Herbst, E.: Moses: Open source toolkit for statistical machine translation. In: Annual Meeting of the Association for Computational Linguistics: Human Language Technologies (demo) (2007)"},{"key":"15_CR8","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1023\/A:1020346032608","volume":"50","author":"A Kojima","year":"2002","unstructured":"Kojima, A., Tamura, T., Fukunaga, K.: Natural language description of human activities from video images based on concept hierarchy of actions. Int. J. Comput. Vis. (IJCV) 50, 171\u2013184 (2002)","journal-title":"Int. J. Comput. Vis. (IJCV)"},{"key":"15_CR9","doi-asserted-by":"crossref","unstructured":"Krishnamoorthy, N., Malkarnenkar, G., Mooney, R.J., Saenko, K., Guadarrama, S.: Generating natural-language video descriptions using text-mined knowledge. In: AAAI Conference on Artificial Intelligence (AAAI) (2013)","DOI":"10.1609\/aaai.v27i1.8679"},{"key":"15_CR10","doi-asserted-by":"crossref","unstructured":"Kulkarni, G., Premraj, V., Dhar, S., Li, S., Choi, Y., Berg, A.C., Berg, T.L.: Baby talk: Understanding and generating simple image descriptions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2011)","DOI":"10.1109\/CVPR.2011.5995466"},{"key":"15_CR11","unstructured":"Kuznetsova, P., Ordonez, V., Berg, A.C., Berg, T.L., Choi, Y.: Collective generation of natural image descriptions. In: Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL) (2012)"},{"key":"15_CR12","unstructured":"Mitchell, M., Dodge, J., Goyal, A., Yamaguchi, K., Stratos, K., Han, X., Mensch, A., Berg, A.C., Berg, T.L., III, H.D.: Midge: Generating image descriptions from computer vision detections. In: Proceedings of the Conference of the European Chapter of the Association for Computational Linguistics (EACL) (2012)"},{"key":"15_CR13","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1162\/tacl_a_00207","volume":"1","author":"M Regneri","year":"2013","unstructured":"Regneri, M., Rohrbach, M., Wetzel, D., Thater, S., Schiele, B., Pinkal, M.: Grounding action descriptions in videos. Trans. Assoc. Comput. Linguist. (TACL) 1, 25\u201336 (2013)","journal-title":"Trans. Assoc. Comput. Linguist. (TACL)"},{"key":"15_CR14","doi-asserted-by":"crossref","unstructured":"Rohrbach, M., Qiu, W., Titov, I., Thater, S., Pinkal, M., Schiele, B.: Translating video content to natural language descriptions. In: IEEE International Conference on Computer Vision (ICCV) (2013)","DOI":"10.1109\/ICCV.2013.61"},{"key":"15_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"144","DOI":"10.1007\/978-3-642-33718-5_11","volume-title":"Computer Vision \u2013 ECCV 2012","author":"M Rohrbach","year":"2012","unstructured":"Rohrbach, M., Regneri, M., Andriluka, M., Amin, S., Pinkal, M., Schiele, B.: Script data for attribute-based recognition of composite activities. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012, Part I. LNCS, vol. 7572, pp. 144\u2013157. Springer, Heidelberg (2012)"},{"key":"15_CR16","unstructured":"Schmidt, M.: UGM: Matlab code for undirected graphical models (2013). http:\/\/www.di.ens.fr\/~mschmidt\/Software\/UGM.html"},{"key":"15_CR17","doi-asserted-by":"crossref","unstructured":"Senina, A., Rohrbach, M., Qiu, W., Friedrich, A., Amin, S., Andriluka, M., Pinkal, M., Schiele, B.: Coherent multi-sentence video description with variable level of detail. arXiv:1403.6173 (2014)","DOI":"10.1007\/978-3-319-11752-2_15"},{"key":"15_CR18","doi-asserted-by":"crossref","unstructured":"Siddharth, N., Barbu, A., Siskind, J.M.: Seeing what youre told: Sentence-guided activity recognition in video. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2014)","DOI":"10.1109\/CVPR.2014.99"},{"key":"15_CR19","doi-asserted-by":"crossref","unstructured":"Tan, C.C., Jiang, Y.G., Ngo, C.W.: Towards textually describing complex video contents with audio-visual concept classifiers. In: ACM Multimedia (2011)","DOI":"10.1145\/2072298.2072411"},{"key":"15_CR20","unstructured":"Vedaldi, A., Fulkerson, B.: VLFeat: An open and portable library of computer vision algorithms (2008). http:\/\/www.vlfeat.org\/"},{"key":"15_CR21","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1007\/s11263-012-0594-8","volume":"103","author":"H Wang","year":"2013","unstructured":"Wang, H., Kl\u00e4ser, A., Schmid, C., Liu, C.: Dense trajectories and motion boundary descriptors for action recognition. Int. J. Comput. Vis. (IJCV) 103, 60\u201379 (2013)","journal-title":"Int. J. Comput. Vis. (IJCV)"},{"key":"15_CR22","unstructured":"Yu, H., Siskind, J.M.: Grounded language learning from videos described with sentences. In: Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL) (2013)"},{"key":"15_CR23","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1023\/A:1011174108613","volume":"11","author":"I Zukerman","year":"2001","unstructured":"Zukerman, I., Litman, D.: Natural language processing and user modeling: Synergies and limitations. User Model. User-Adap. Inter. 11, 129\u2013158 (2001)","journal-title":"User Model. User-Adap. Inter."}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-11752-2_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,10]],"date-time":"2023-02-10T02:53:08Z","timestamp":1675997588000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-11752-2_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319117515","9783319117522"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-11752-2_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"15 October 2014","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}