{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,30]],"date-time":"2025-04-30T04:26:08Z","timestamp":1745987168732,"version":"3.40.4"},"publisher-location":"Berlin, Heidelberg","reference-count":23,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642374463"},{"type":"electronic","value":"9783642374470"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013]]},"DOI":"10.1007\/978-3-642-37447-0_40","type":"book-chapter","created":{"date-parts":[[2013,3,27]],"date-time":"2013-03-27T08:09:04Z","timestamp":1364371744000},"page":"524-537","source":"Crossref","is-referenced-by-count":0,"title":["Unsupervised Language Learning for Discovered Visual Concepts"],"prefix":"10.1007","author":[{"given":"Prithwijit","family":"Guha","sequence":"first","affiliation":[]},{"given":"Amitabha","family":"Mukerjee","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"40_CR1","first-page":"1107","volume":"3","author":"K. Barnard","year":"2003","unstructured":"Barnard, K., Duygulu, P., Forsyth, D., de Freitas, N., Blei, D.M., Jordan, M.I.: Matching words and pictures. Journal of Machine Learning Research\u00a03, 1107\u20131135 (2003)","journal-title":"Journal of Machine Learning Research"},{"key":"40_CR2","doi-asserted-by":"crossref","unstructured":"Feng, S., Manmatha, R., Lavrenko, V.: Multiple bernoulli relevance models for image and video annotation. In: IEEE International Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1002\u20131009 (2004)","DOI":"10.1109\/CVPR.2004.1315274"},{"key":"40_CR3","doi-asserted-by":"crossref","unstructured":"Kulkarni, G., Premraj, V., Dhar, S., Li, S., Choi, Y., Berg, A.C., Berg, T.L.: Baby talk: Understanding and generating simple image descriptions. In: IEEE International Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1601\u20131608 (2011)","DOI":"10.1109\/CVPR.2011.5995466"},{"key":"40_CR4","doi-asserted-by":"crossref","unstructured":"Siddiquie, B., Gupta, A.: Beyond active noun tagging: Modeling contextual interactions for multi-class active learning. In: IEEE International Conference on Computer Vision and Pattern Recognition (CVPR) (2010)","DOI":"10.1109\/CVPR.2010.5540044"},{"key":"40_CR5","doi-asserted-by":"crossref","unstructured":"Quattoni, A., Collins, M., Darrell, T.: Learning visual representations using images with captions. In: IEEE International Conference on Computer Vision and Pattern Recognition (CVPR) (2007)","DOI":"10.1109\/CVPR.2007.383173"},{"key":"40_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"158","DOI":"10.1007\/978-3-540-88693-8_12","volume-title":"Computer Vision \u2013 ECCV 2008","author":"T. Cour","year":"2008","unstructured":"Cour, T., Jordan, C., Miltsakaki, E., Taskar, B.: Movie\/Script: Alignment and Parsing of Video and Text Transcription. In: Forsyth, D., Torr, P., Zisserman, A. (eds.) ECCV 2008, Part IV. LNCS, vol.\u00a05305, pp. 158\u2013171. Springer, Heidelberg (2008)"},{"key":"40_CR7","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1613\/jair.790","volume":"15","author":"J.M. Siskind","year":"2001","unstructured":"Siskind, J.M.: Grounding the lexical semantics of verbs in visual perception using force dynamics and event logic. Journal of Artificial Intelligence Research\u00a015, 31\u201390 (2001)","journal-title":"Journal of Artificial Intelligence Research"},{"key":"40_CR8","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1207\/s15516709cog2601_4","volume":"26","author":"D.K. Roy","year":"2002","unstructured":"Roy, D.K., Pentland, A.P.: Learning words from sights and sounds: a computational model. Cognitive Science\u00a026, 113\u2013146 (2002)","journal-title":"Cognitive Science"},{"key":"40_CR9","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1016\/j.artint.2005.06.007","volume":"167","author":"P. Dominey","year":"2005","unstructured":"Dominey, P., Boucher, J.: Learning to talk about events from narrated video in the construction grammar framework. Artificial Intelligence\u00a0167, 31\u201361 (2005)","journal-title":"Artificial Intelligence"},{"key":"40_CR10","doi-asserted-by":"publisher","first-page":"180","DOI":"10.1016\/j.bandl.2009.07.001","volume":"112","author":"C. Madden","year":"2010","unstructured":"Madden, C., Hoen, M., Dominey, P.: A cognitive neuroscience perspective on embodied language for human-robot cooperation. Brain and Language\u00a0112, 180\u2013188 (2010)","journal-title":"Brain and Language"},{"key":"40_CR11","doi-asserted-by":"crossref","unstructured":"Yu, C., Ballard, D.H.: A multimodal learning interface for grounding spoken language in sensory perceptions. ACM Transactions on Applied Perception (2004)","DOI":"10.1145\/1008722.1008727"},{"key":"40_CR12","unstructured":"Piaget, J.: The Construction of Reality in the Child. Basic Books (1994)"},{"key":"40_CR13","volume-title":"Foundations of Mind","author":"J.M. Mandler","year":"2004","unstructured":"Mandler, J.M.: Foundations of Mind. Oxford University Press, New York (2004)"},{"key":"40_CR14","volume-title":"Word and Object","author":"W.V.O. Quine","year":"1960","unstructured":"Quine, W.V.O.: Word and Object. John Wiley and Sons, New York (1960)"},{"key":"40_CR15","doi-asserted-by":"publisher","first-page":"194","DOI":"10.1038\/35058500","volume":"2","author":"L. Itti","year":"2001","unstructured":"Itti, L., Koch, C.: Computational modeling of visual attention. Nature Reviews Neuroscience\u00a02, 194\u2013203 (2001)","journal-title":"Nature Reviews Neuroscience"},{"key":"40_CR16","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1016\/S0163-6383(99)80010-1","volume":"22","author":"J.T. Coldren","year":"1999","unstructured":"Coldren, J.T., Haaf, R.A.: Priority of processing components of visual stimuli by 6-month-old infants. Infant Behavior and Development\u00a022, 131\u2013135 (1999)","journal-title":"Infant Behavior and Development"},{"key":"40_CR17","doi-asserted-by":"crossref","unstructured":"Zivkovic, Z.: Improved adaptive gaussian mixture model for background subtraction. In: Proceedings of the 17th International Conference on Pattern Recognition, vol.\u00a02, pp. 28\u201331 (2004)","DOI":"10.1109\/ICPR.2004.1333992"},{"key":"40_CR18","doi-asserted-by":"crossref","unstructured":"Guha, P., Mukerjee, A., Subramanian, V.K.: Formulation, detection and application of occlusion states (oc-7) in the context of multiple object tracking. In: 8th IEEE International Conference on Advanced Video and Signal-Based Surveillance (AVSS), pp. 1\u20136 (2011)","DOI":"10.1109\/AVSS.2011.6027318"},{"key":"40_CR19","doi-asserted-by":"crossref","unstructured":"Nandi, S., Guha, P., Venkatesh, K.: Objects from animacy: Discovery in joint shape and haar feature space. In: Indian Conference on Vision, Graphics and Image Processing (2008)","DOI":"10.1109\/ICVGIP.2008.78"},{"key":"40_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1007\/978-3-642-24088-1_8","volume-title":"Image Analysis and Processing \u2013 ICIAP 2011","author":"P. Guha","year":"2011","unstructured":"Guha, P., Mukerjee, A., Venkatesh, K.S.: Activity Discovery Using Compressed Suffix Trees. In: Maino, G., Foresti, G.L. (eds.) ICIAP 2011, Part II. LNCS, vol.\u00a06979, pp. 69\u201378. Springer, Heidelberg (2011)"},{"key":"40_CR21","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/3577.001.0001","volume-title":"How Children Learn the Meanings of Words","author":"P. Bloom","year":"2000","unstructured":"Bloom, P.: How Children Learn the Meanings of Words. MIT Press, Cambridge (2000)"},{"key":"40_CR22","unstructured":"Sarkar, M., Mukerjee, A.: Perceptual theory of mind: An intermediary between visual salience and noun\/verb acquisition. In: International Conference on Developmental Learning (ICDL 2006) (2006)"},{"key":"40_CR23","doi-asserted-by":"crossref","unstructured":"Mukerjee, A., Joshi, N., Mudgal, P., Srinath, S.: Bootstrapping word learning: A perception driven semantics first approach. In: IEEE International Conference on Development and Learning, vol.\u00a02, pp. 1\u20136 (2011)","DOI":"10.1109\/DEVLRN.2011.6037345"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2012"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-37447-0_40","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,30]],"date-time":"2025-04-30T02:16:40Z","timestamp":1745979400000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-37447-0_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013]]},"ISBN":["9783642374463","9783642374470"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-37447-0_40","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2013]]}}}