{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T06:49:33Z","timestamp":1775198973764,"version":"3.50.1"},"reference-count":22,"publisher":"Elsevier BV","issue":"3-4","license":[{"start":{"date-parts":[[2002,7,1]],"date-time":"2002-07-01T00:00:00Z","timestamp":1025481600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Computer Speech &amp; Language"],"published-print":{"date-parts":[[2002,7]]},"DOI":"10.1016\/s0885-2308(02)00024-4","type":"journal-article","created":{"date-parts":[[2002,10,8]],"date-time":"2002-10-08T14:34:08Z","timestamp":1034087648000},"page":"353-385","source":"Crossref","is-referenced-by-count":87,"title":["Learning visually grounded words and syntax for a scene description task"],"prefix":"10.1016","volume":"16","author":[{"given":"Deb K.","family":"Roy","sequence":"first","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/S0885-2308(02)00024-4_BIB1","doi-asserted-by":"crossref","DOI":"10.1007\/BF00849177","article-title":"Generating coherent presentations employing textual and visual material","volume":"9","author":"Andr\u00e9","year":"1995","journal-title":"Artificial Intelligence Review"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB2","doi-asserted-by":"crossref","first-page":"577","DOI":"10.1017\/S0140525X99002149","article-title":"Perceptual symbol systems","volume":"22","author":"Barsalou","year":"1999","journal-title":"Behavioural and Brain Sciences"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB3","series-title":"The Ontogenesis of Grammar","article-title":"On two types of models of the internalization of grammars","author":"Braine","year":"1971"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB4","series-title":"Elements of Information Theory","author":"Cover","year":"1991"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB5","series-title":"Generating Referring Expressions: Constructing Descriptions in a Domain of Objects and Processes","author":"Dale","year":"1992"},{"issue":"2","key":"10.1016\/S0885-2308(02)00024-4_BIB6","doi-asserted-by":"crossref","first-page":"233","DOI":"10.1207\/s15516709cog1902_3","article-title":"Computational interpretations of the gricean maxims in the generation of referring expressions","volume":"19","author":"Dale","year":"1995","journal-title":"Cognitive Science"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB7","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1093\/biomet\/40.3-4.237","article-title":"The population frequencies of species and the estimation of population parameters","volume":"40","author":"Good","year":"1953","journal-title":"Biometrika"},{"issue":"6","key":"10.1016\/S0885-2308(02)00024-4_BIB8","doi-asserted-by":"crossref","first-page":"3441","DOI":"10.1121\/1.412431","article-title":"On automated language acquisition","volume":"97","author":"Gorin","year":"1995","journal-title":"Journal of the Acoustic Society of America"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB9","doi-asserted-by":"crossref","first-page":"335","DOI":"10.1016\/0167-2789(90)90087-6","article-title":"The symbol grounding problem","volume":"42","author":"Harnad","year":"1990","journal-title":"Physica D"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB10","doi-asserted-by":"crossref","first-page":"175","DOI":"10.1007\/BF00849073","article-title":"VIsual TRAnslator: linking perceptions and natural language descriptions","volume":"8","author":"Herzog","year":"1994","journal-title":"Artificial Intelligence Review"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB11","series-title":"Proceedings of ACL","article-title":"Learning attribute selections for nonpronominal expressions","author":"Jordan","year":"2000"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB12","series-title":"Metaphors We Live By","author":"Lakoff","year":"1980"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB13","series-title":"Categorization and Naming in Children","author":"Markman","year":"1991"},{"issue":"2","key":"10.1016\/S0885-2308(02)00024-4_BIB14","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1109\/5.18626","article-title":"A tutorial on hidden markov models and selected applications in speech recognition","volume":"77","author":"Rabiner","year":"1989","journal-title":"Proceedings of the IEEE"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB15","series-title":"The Human Semantic Potential","author":"Regier","year":"1996"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB16","doi-asserted-by":"crossref","unstructured":"Roy, D. (2000). Grounded speech communication. Proceedings of the International Conference on Spoken Language Processing","DOI":"10.21437\/ICSLP.2000-753"},{"issue":"1","key":"10.1016\/S0885-2308(02)00024-4_BIB17","doi-asserted-by":"crossref","DOI":"10.1075\/eoc.4.1.04roy","article-title":"Learning visually grounded words and syntax of natural spoken language","volume":"4","author":"Roy","year":"2000","journal-title":"Evolution of Communication"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB18","article-title":"Grounded spoken language acquisition: experiments in word learning","author":"Roy","journal-title":"IEEE Transactions on Multimedia"},{"issue":"1","key":"10.1016\/S0885-2308(02)00024-4_BIB19","doi-asserted-by":"crossref","first-page":"113","DOI":"10.1207\/s15516709cog2601_4","article-title":"Learning words from sights and sounds: a computational model","volume":"26","author":"Roy","year":"2002","journal-title":"Cognitive Science"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB20","first-page":"31","article-title":"Grounding the lexical semantics of verbs in visual perception using force dynamics and event logic","volume":"15","author":"Siskind","year":"2001","journal-title":"Artificial Intelligence Review"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB21","series-title":"Decision, Estimation and Classification","author":"Therrien","year":"1989"},{"key":"10.1016\/S0885-2308(02)00024-4_BIB22","unstructured":"Yoder, B. (2001). Spontaneous speech recognition using hidden markov models. Master's Thesis, Massachusetts Institute of Technology, Cambridge, MA"}],"container-title":["Computer Speech &amp; Language"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0885230802000244?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0885230802000244?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2023,4,24]],"date-time":"2023-04-24T18:32:33Z","timestamp":1682361153000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0885230802000244"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2002,7]]},"references-count":22,"journal-issue":{"issue":"3-4","published-print":{"date-parts":[[2002,7]]}},"alternative-id":["S0885230802000244"],"URL":"https:\/\/doi.org\/10.1016\/s0885-2308(02)00024-4","relation":{},"ISSN":["0885-2308"],"issn-type":[{"value":"0885-2308","type":"print"}],"subject":[],"published":{"date-parts":[[2002,7]]}}}