{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T10:07:47Z","timestamp":1760609267459,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,2,15]],"date-time":"2020-02-15T00:00:00Z","timestamp":1581724800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Shenzhen Science and Technology Innovation Commission","award":["JCYJ20170410172100520"],"award-info":[{"award-number":["JCYJ20170410172100520"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,2,15]]},"DOI":"10.1145\/3383972.3384072","type":"proceedings-article","created":{"date-parts":[[2020,5,26]],"date-time":"2020-05-26T18:15:22Z","timestamp":1590516922000},"page":"426-432","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Image Captioning in Chinese and Its Application for Children with Autism Spectrum Disorder"],"prefix":"10.1145","author":[{"given":"Bin","family":"Zhang","sequence":"first","affiliation":[{"name":"School of Software &amp; Microelectronics, Peking University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lixin","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Software &amp; Microelectronics, Peking University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sifan","family":"Song","sequence":"additional","affiliation":[{"name":"Department of Mathematical Sciences, Xi'an Jiaotong-Liverpool University, Suzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lifu","family":"Chen","sequence":"additional","affiliation":[{"name":"DoGoodly International Education Center (Shenzhen) Co., Ltd. &amp; Smart Children Education Center, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zijian","family":"Jiang","sequence":"additional","affiliation":[{"name":"Shenzhen Institute of Artificial Intelligence and Robotics for Society, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiaming","family":"Zhang","sequence":"additional","affiliation":[{"name":"Institute of Robotics and Intelligent Manufacturing, The Chinese University of Hong Kong (Shenzhen) &amp; Shenzhen Institute of Artificial Intelligence and Robotics for Society, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2020,5,26]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Autism: A Global Framework for Action. World Innovation Summit for Health (WISH), Doha, Qatar","author":"M.","year":"2016","unstructured":"Munir, K., Lavelle, T., Helm, D., Thompson, D., Prestt, J., & Azeem, M. W. , 2016 . Autism: A Global Framework for Action. World Innovation Summit for Health (WISH), Doha, Qatar . http:\/\/www.wish.org.qa\/summits\/wish-2016\/forum-reports\/ Munir, K., Lavelle, T., Helm, D., Thompson, D., Prestt, J., & Azeem, M. W., 2016. Autism: A Global Framework for Action. World Innovation Summit for Health (WISH), Doha, Qatar. http:\/\/www.wish.org.qa\/summits\/wish-2016\/forum-reports\/"},{"volume-title":"Report on the Industry Development of Autism Education and Rehabilitation in China (II)","author":"WUCAILU ASD Research Institute","key":"e_1_3_2_1_2_1","unstructured":"WUCAILU ASD Research Institute , 2017. Report on the Industry Development of Autism Education and Rehabilitation in China (II) . Huaxia Publishing House , Beijing . (in Chinese) WUCAILU ASD Research Institute, 2017. Report on the Industry Development of Autism Education and Rehabilitation in China (II). Huaxia Publishing House, Beijing. (in Chinese)"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"R. E. Accordino C. Kidd L. C. Politte C. A. Henry and C. J. McDougle \"Psychopharmacological interventions in autism spectrum disorder \" Expert opinion on pharmacotherapy vol. 17 no. 7 pp. 937--952 2016.  R. E. Accordino C. Kidd L. C. Politte C. A. Henry and C. J. McDougle \"Psychopharmacological interventions in autism spectrum disorder \" Expert opinion on pharmacotherapy vol. 17 no. 7 pp. 937--952 2016.","DOI":"10.1517\/14656566.2016.1154536"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1542\/peds.2007-2362"},{"issue":"12","key":"e_1_3_2_1_5_1","first-page":"972","article-title":"Autism spectrum disorder: primary care principles","volume":"94","author":"Sanchack K. E.","year":"2016","unstructured":"K. E. Sanchack and C. A. Thomas , \" Autism spectrum disorder: primary care principles ,\" Am Fam Physician , vol. 94 , no. 12 , pp. 972 -- 979 , 2016 . K. E. Sanchack and C. A. Thomas, \"Autism spectrum disorder: primary care principles,\" Am Fam Physician, vol. 94, no. 12, pp. 972--979, 2016.","journal-title":"Am Fam Physician"},{"issue":"6","key":"e_1_3_2_1_6_1","first-page":"1","article-title":"Mapping Robots to Therapy and Educational Objectives for Children with Autism Spectrum Disorder[J]","volume":"46","author":"C A G J","year":"2016","unstructured":"Huijnen C A G J , Lexis MAS, Jansens R, Mapping Robots to Therapy and Educational Objectives for Children with Autism Spectrum Disorder[J] . Journal of Autism & Developmental Disorders , 2016 , 46 ( 6 ): 1 -- 15 . Huijnen C A G J, Lexis MAS, Jansens R, et al. Mapping Robots to Therapy and Educational Objectives for Children with Autism Spectrum Disorder[J]. Journal of Autism & Developmental Disorders, 2016, 46(6): 1--15.","journal-title":"Journal of Autism & Developmental Disorders"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1002\/aur.1527"},{"key":"e_1_3_2_1_8_1","first-page":"57","volume-title":"Can social interaction skills be taught by a social agent? The role of a robotic mediator in autism therapy,\" in International Conference on Cognitive Technology","author":"Werry I.","year":"2001","unstructured":"I. Werry , K. Dautenhahn , B. Ogden , and W. Harwin , \" Can social interaction skills be taught by a social agent? The role of a robotic mediator in autism therapy,\" in International Conference on Cognitive Technology , 2001 : Springer , pp. 57 -- 74 . I. Werry, K. Dautenhahn, B. Ogden, and W. Harwin, \"Can social interaction skills be taught by a social agent? The role of a robotic mediator in autism therapy,\" in International Conference on Cognitive Technology, 2001: Springer, pp. 57--74."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1075\/is.15.2.14pop"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.proeng.2012.07.334"},{"volume-title":"2009 IEEE International Conference on Robotics and Automation","author":"Gouaillier D.","key":"e_1_3_2_1_11_1","unstructured":"D. Gouaillier NAO humanoid,\" in 2009 IEEE International Conference on Robotics and Automation , 2009: IEEE, pp. 769--774. D. Gouaillier et al., \"Mechatronic design of NAO humanoid,\" in 2009 IEEE International Conference on Robotics and Automation, 2009: IEEE, pp. 769--774."},{"volume-title":"Understanding how adolescents with autism respond to facial expressions in virtual reality environments,\" IEEE transactions on visualization and computer graphics","author":"Bekele E.","key":"e_1_3_2_1_12_1","unstructured":"E. Bekele , Z. Zheng , A. Swanson , J. Crittendon , Z. Warren , and N. Sarkar , \" Understanding how adolescents with autism respond to facial expressions in virtual reality environments,\" IEEE transactions on visualization and computer graphics , vol. 19 , no. 4, pp. 711--720, 2013. E. Bekele, Z. Zheng, A. Swanson, J. Crittendon, Z. Warren, and N. Sarkar, \"Understanding how adolescents with autism respond to facial expressions in virtual reality environments,\" IEEE transactions on visualization and computer graphics, vol. 19, no. 4, pp. 711--720, 2013."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.proeng.2012.07.333"},{"key":"e_1_3_2_1_14_1","first-page":"3156","article-title":"Show and tell: A neural image caption generator","author":"Vinyals O.","year":"2015","unstructured":"O. Vinyals , A. Toshev , S. Bengio , and D. Erhan , \" Show and tell: A neural image caption generator ,\" in Proceedings of the IEEE conference on computer vision and pattern recognition , 2015 , pp. 3156 -- 3164 . O. Vinyals, A. Toshev, S. Bengio, and D. Erhan, \"Show and tell: A neural image caption generator,\" in Proceedings of the IEEE conference on computer vision and pattern recognition, 2015, pp. 3156--3164.","journal-title":"Proceedings of the IEEE conference on computer vision and pattern recognition"},{"key":"e_1_3_2_1_15_1","first-page":"1","article-title":"Going deeper with convolutions","author":"Szegedy C.","year":"2015","unstructured":"C. Szegedy , \" Going deeper with convolutions ,\" in Proceedings of the IEEE conference on computer vision and pattern recognition , 2015 , pp. 1 -- 9 . C. Szegedy et al., \"Going deeper with convolutions,\" in Proceedings of the IEEE conference on computer vision and pattern recognition, 2015, pp. 1--9.","journal-title":"Proceedings of the IEEE conference on computer vision and pattern recognition"},{"key":"e_1_3_2_1_16_1","first-page":"2625","article-title":"Long-term recurrent convolutional networks for visual recognition and description","author":"Donahue J.","year":"2015","unstructured":"J. Donahue , \" Long-term recurrent convolutional networks for visual recognition and description ,\" in Proceedings of the IEEE conference on computer vision and pattern recognition , 2015 , pp. 2625 -- 2634 . J. Donahue et al., \"Long-term recurrent convolutional networks for visual recognition and description,\" in Proceedings of the IEEE conference on computer vision and pattern recognition, 2015, pp. 2625--2634.","journal-title":"Proceedings of the IEEE conference on computer vision and pattern recognition"},{"key":"e_1_3_2_1_17_1","first-page":"3128","article-title":"Deep visual-semantic alignments for generating image descriptions","author":"Karpathy A.","year":"2015","unstructured":"A. Karpathy and L. Fei-Fei , \" Deep visual-semantic alignments for generating image descriptions ,\" in Proceedings of the IEEE conference on computer vision and pattern recognition , 2015 , pp. 3128 -- 3137 . A. Karpathy and L. Fei-Fei, \"Deep visual-semantic alignments for generating image descriptions,\" in Proceedings of the IEEE conference on computer vision and pattern recognition, 2015, pp. 3128--3137.","journal-title":"Proceedings of the IEEE conference on computer vision and pattern recognition"},{"key":"e_1_3_2_1_18_1","first-page":"1889","volume-title":"Deep fragment embeddings for bidirectional image sentence mapping,\" in Advances in neural information processing systems","author":"Karpathy A.","year":"2014","unstructured":"A. Karpathy , A. Joulin , and L. F. Fei-Fei , \" Deep fragment embeddings for bidirectional image sentence mapping,\" in Advances in neural information processing systems , 2014 , pp. 1889 -- 1897 . A. Karpathy, A. Joulin, and L. F. Fei-Fei, \"Deep fragment embeddings for bidirectional image sentence mapping,\" in Advances in neural information processing systems, 2014, pp. 1889--1897."},{"key":"e_1_3_2_1_19_1","volume-title":"Aligning where to see and what to tell: image caption with region-based attention and scene factorization,\" arXiv preprint arXiv: 1506.06272","author":"Jin J.","year":"2015","unstructured":"J. Jin , K. Fu , R. Cui , F. Sha , and C. Zhang , \" Aligning where to see and what to tell: image caption with region-based attention and scene factorization,\" arXiv preprint arXiv: 1506.06272 , 2015 . J. Jin, K. Fu, R. Cui, F. Sha, and C. Zhang, \"Aligning where to see and what to tell: image caption with region-based attention and scene factorization,\" arXiv preprint arXiv: 1506.06272, 2015."},{"key":"e_1_3_2_1_20_1","first-page":"2048","volume-title":"Show, attend and tell: Neural image caption generation with visual attention,\" in International conference on machine learning","author":"Xu K.","year":"2015","unstructured":"K. Xu , \" Show, attend and tell: Neural image caption generation with visual attention,\" in International conference on machine learning , 2015 , pp. 2048 -- 2057 . K. Xu et al., \"Show, attend and tell: Neural image caption generation with visual attention,\" in International conference on machine learning, 2015, pp. 2048--2057."},{"key":"e_1_3_2_1_21_1","first-page":"4565","article-title":"Densecap: Fully convolutional localization networks for dense captioning","author":"Johnson J.","year":"2016","unstructured":"J. Johnson , A. Karpathy , and L. Fei-Fei , \" Densecap: Fully convolutional localization networks for dense captioning ,\" in Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition , 2016 , pp. 4565 -- 4574 . J. Johnson, A. Karpathy, and L. Fei-Fei, \"Densecap: Fully convolutional localization networks for dense captioning,\" in Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2016, pp. 4565--4574.","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"e_1_3_2_1_22_1","first-page":"91","volume-title":"Faster r-cnn: Towards real-time object detection with region proposal networks,\" in Advances in neural information processing systems","author":"Ren S.","year":"2015","unstructured":"S. Ren , K. He , R. Girshick , and J. Sun , \" Faster r-cnn: Towards real-time object detection with region proposal networks,\" in Advances in neural information processing systems , 2015 , pp. 91 -- 99 . S. Ren, K. He, R. Girshick, and J. Sun, \"Faster r-cnn: Towards real-time object detection with region proposal networks,\" in Advances in neural information processing systems, 2015, pp. 91--99."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295748"},{"key":"e_1_3_2_1_24_1","first-page":"2193","article-title":"Dense captioning with joint inference and visual context","author":"Yang L.","year":"2017","unstructured":"L. Yang , K. Tang , J. Yang , and L.-J. Li , \" Dense captioning with joint inference and visual context ,\" in Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition , 2017 , pp. 2193 -- 2202 . L. Yang, K. Tang, J. Yang, and L.-J. Li, \"Dense captioning with joint inference and visual context,\" in Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2017, pp. 2193--2202.","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"e_1_3_2_1_25_1","volume-title":"Tensor2tensor for neural machine translation,\" arXiv preprint arXiv","author":"Vaswani A.","year":"1803","unstructured":"A. Vaswani , \" Tensor2tensor for neural machine translation,\" arXiv preprint arXiv : 1803 .07416, 2018. A. Vaswani et al., \"Tensor2tensor for neural machine translation,\" arXiv preprint arXiv: 1803.07416, 2018."},{"key":"e_1_3_2_1_26_1","volume-title":"Rinehart and Winston","author":"Mayer G. R.","year":"1977","unstructured":"G. R. Mayer and B. Sulzer-Azaroff , Applying behavior-analysis procedures with children and youth. Holt , Rinehart and Winston , 1977 . G. R. Mayer and B. Sulzer-Azaroff, Applying behavior-analysis procedures with children and youth. Holt, Rinehart and Winston, 1977."},{"key":"e_1_3_2_1_27_1","first-page":"5998","volume-title":"Attention is all you need,\" in Advances in neural information processing systems","author":"Vaswani A.","year":"2017","unstructured":"A. Vaswani , \" Attention is all you need,\" in Advances in neural information processing systems , 2017 , pp. 5998 -- 6008 . A. Vaswani et al., \"Attention is all you need,\" in Advances in neural information processing systems, 2017, pp. 5998--6008."},{"key":"e_1_3_2_1_28_1","volume-title":"Microsoft coco captions: Data collection and evaluation server,\" arXiv preprint arXiv: 1504.00325","author":"Chen X.","year":"2015","unstructured":"X. Chen , \" Microsoft coco captions: Data collection and evaluation server,\" arXiv preprint arXiv: 1504.00325 , 2015 . X. Chen et al., \"Microsoft coco captions: Data collection and evaluation server,\" arXiv preprint arXiv: 1504.00325, 2015."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"e_1_3_2_1_31_1","volume-title":"Tensorflow: Large-scale machine learning on heterogeneous distributed systems,\" arXiv preprint arXiv: 1603.04467","author":"Abadi M.","year":"2016","unstructured":"M. Abadi , \" Tensorflow: Large-scale machine learning on heterogeneous distributed systems,\" arXiv preprint arXiv: 1603.04467 , 2016 . M. Abadi et al., \"Tensorflow: Large-scale machine learning on heterogeneous distributed systems,\" arXiv preprint arXiv: 1603.04467, 2016."},{"key":"e_1_3_2_1_32_1","first-page":"675","volume-title":"Caffe: Convolutional architecture for fast feature embedding,\" in Proceedings of the 22nd ACM international conference on Multimedia","author":"Jia Y.","year":"2014","unstructured":"Y. Jia , \" Caffe: Convolutional architecture for fast feature embedding,\" in Proceedings of the 22nd ACM international conference on Multimedia , 2014 : ACM , pp. 675 -- 678 . Y. Jia et al., \"Caffe: Convolutional architecture for fast feature embedding,\" in Proceedings of the 22nd ACM international conference on Multimedia, 2014: ACM, pp. 675--678."},{"key":"e_1_3_2_1_33_1","volume-title":"Very deep convolutional networks for large-scale image recognition,\" arXiv preprint arXiv: 1409.1556","author":"Simonyan K.","year":"2014","unstructured":"K. Simonyan and A. Zisserman , \" Very deep convolutional networks for large-scale image recognition,\" arXiv preprint arXiv: 1409.1556 , 2014 . K. Simonyan and A. Zisserman, \"Very deep convolutional networks for large-scale image recognition,\" arXiv preprint arXiv: 1409.1556, 2014."},{"key":"e_1_3_2_1_34_1","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He K.","year":"2016","unstructured":"K. He , X. Zhang , S. Ren , and J. Sun , \" Deep residual learning for image recognition ,\" in Proceedings of the IEEE conference on computer vision and pattern recognition , 2016 , pp. 770 -- 778 . K. He, X. Zhang, S. Ren, and J. Sun, \"Deep residual learning for image recognition,\" in Proceedings of the IEEE conference on computer vision and pattern recognition, 2016, pp. 770--778.","journal-title":"Proceedings of the IEEE conference on computer vision and pattern recognition"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.2478\/pralin-2018-0002"}],"event":{"name":"ICMLC 2020: 2020 12th International Conference on Machine Learning and Computing","sponsor":["Shenzhen University Shenzhen University"],"location":"Shenzhen China","acronym":"ICMLC 2020"},"container-title":["Proceedings of the 2020 12th International Conference on Machine Learning and Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3383972.3384072","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3383972.3384072","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:33:24Z","timestamp":1750199604000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3383972.3384072"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,2,15]]},"references-count":35,"alternative-id":["10.1145\/3383972.3384072","10.1145\/3383972"],"URL":"https:\/\/doi.org\/10.1145\/3383972.3384072","relation":{},"subject":[],"published":{"date-parts":[[2020,2,15]]},"assertion":[{"value":"2020-05-26","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}