{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,31]],"date-time":"2025-12-31T00:36:35Z","timestamp":1767141395165,"version":"build-2238731810"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2021,1,12]],"date-time":"2021-01-12T00:00:00Z","timestamp":1610409600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,12]],"date-time":"2021-01-12T00:00:00Z","timestamp":1610409600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2021,3]]},"DOI":"10.1007\/s11042-020-10292-y","type":"journal-article","created":{"date-parts":[[2021,1,12]],"date-time":"2021-01-12T17:56:57Z","timestamp":1610474217000},"page":"12525-12537","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Implementing a real-time image captioning service for scene identification using embedded system"],"prefix":"10.1007","volume":"80","author":[{"given":"He-Yen","family":"Hsieh","sequence":"first","affiliation":[]},{"given":"Sheng-An","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Jenq-Shiou","family":"Leu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,1,12]]},"reference":[{"issue":"2","key":"10292_CR1","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1109\/72.279181","volume":"5","author":"Y Bengio","year":"1994","unstructured":"Bengio Y, Simard PY, Frasconi P (1994) Learning long-term dependencies with gradient descent is difficult. IEEE Trans Neural Networks 5(2):157\u2013166","journal-title":"IEEE Trans Neural Networks"},{"key":"10292_CR2","unstructured":"Chen W, Wilson JT, Tyree S, Weinberger KQ, Chen Y (2015) Compressing neural networks with the hashing trick. In: Bach FR, Blei DM (eds) ICML, JMLR workshop and conference proceedings, vol 37, pp 2285\u20132294"},{"key":"10292_CR3","doi-asserted-by":"crossref","unstructured":"Cho K, van Merrienboer B, Gu\u0307l\u010behre \u00c7, Bahdanau D, Bougares F, Schwenk H, Bengio Y (2014) Learning phrase representations using RNN encoder-decoder for statistical machine translation. In: Moschitti A, Pang B, Daelemans W (eds) EMNLP, pp 1724\u20131734","DOI":"10.3115\/v1\/D14-1179"},{"issue":"8","key":"10292_CR4","doi-asserted-by":"publisher","first-page":"2151","DOI":"10.1162\/NECO_a_00312","volume":"24","author":"M Denil","year":"2012","unstructured":"Denil M, Bazzani L, Larochelle H, de Freitas N (2012) Learning where to attend with deep architectures for image tracking. Neural Comput 24 (8):2151\u20132184","journal-title":"Neural Comput"},{"issue":"4","key":"10292_CR5","doi-asserted-by":"publisher","first-page":"677","DOI":"10.1109\/TPAMI.2016.2599174","volume":"39","author":"J Donahue","year":"2017","unstructured":"Donahue J, Hendricks LA, Rohrbach M, Venugopalan S, Guadarrama S, Saenko K, Darrell T (2017) Long-term recurrent convolutional networks for visual recognition and description. IEEE Trans Pattern Anal Mach. Intell. 39 (4):677\u2013691","journal-title":"IEEE Trans Pattern Anal Mach. Intell."},{"key":"10292_CR6","unstructured":"Elliott D, Keller F (2013) Image description using visual dependency representations. In: EMNLP, pp 1292\u20131302"},{"key":"10292_CR7","doi-asserted-by":"crossref","unstructured":"Fan D, Wang W, Cheng M, Shen J (2019) Shifting more attention to video salient object detection. In: CVPR, pp 8554\u20138564","DOI":"10.1109\/CVPR.2019.00875"},{"key":"10292_CR8","doi-asserted-by":"crossref","unstructured":"Fang H, Gupta S, Iandola FN, Srivastava RK, Deng L, Dolla\u0307r P, Gao J, He X, Mitchell M, Platt JC, Zitnick CL, Zweig G (2015) From captions to visual concepts and back. In: CVPR, pp. 1473\u20131482","DOI":"10.1109\/CVPR.2015.7298754"},{"key":"10292_CR9","doi-asserted-by":"crossref","unstructured":"Fu K, Fan D, Ji G, Zhao Q (2020) JL-DCF: Joint learning and densely-cooperative fusion framework for RGB-d salient object detection. In: CVPR, pp 3049\u20133059","DOI":"10.1109\/CVPR42600.2020.00312"},{"key":"10292_CR10","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/j.neucom.2019.04.062","volume":"356","author":"K Fu","year":"2019","unstructured":"Fu K, Zhao Q, Gu IY, Yang J (2019) Deepside: A general deep framework for salient object detection. Neurocomputing 356:69\u201382","journal-title":"Neurocomputing"},{"key":"10292_CR11","unstructured":"Gong Y, Liu L, Yang M, Bourdev LD (2014) Compressing deep convolutional networks using vector quantization. arXiv:1412.6115"},{"key":"10292_CR12","unstructured":"Han S, Mao H, Dally WJ (2016) Deep compression: Compressing deep neural network with pruning, trained quantization and huffman coding. In: Bengio Y, LeCun Y (eds) ICLR"},{"key":"10292_CR13","unstructured":"Hanson SJ, Pratt LY (1988) Comparing biases for minimal network construction with back-propagation. In: Touretzky DS (ed) NIPS pp 177\u2013185"},{"key":"10292_CR14","unstructured":"Hinton GE, Vinyals O, Dean J (2015) Distilling the knowledge in a neural network. arXiv:1503.02531"},{"issue":"8","key":"10292_CR15","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"key":"10292_CR16","doi-asserted-by":"crossref","unstructured":"Howard A, Pang R, Adam H, Le QV, Sandler M, Chen B, Wang W, Chen L, Tan M, Chu G, Vasudevan V, Zhu Y (2019) Searching for mobilenetv3. In: ICCV, pp 1314\u20131324","DOI":"10.1109\/ICCV.2019.00140"},{"key":"10292_CR17","unstructured":"Howard AG, Zhu M, Chen B, Kalenichenko D, Wang W, Weyand T, Andreetto M, Adam H (2017) Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv:1704.04861"},{"issue":"4","key":"10292_CR18","doi-asserted-by":"publisher","first-page":"664","DOI":"10.1109\/TPAMI.2016.2598339","volume":"39","author":"A Karpathy","year":"2017","unstructured":"Karpathy A, Fei-fei L. (2017) Deep visual-semantic alignments for generating image descriptions. IEEE Trans Pattern Anal Mach Intell 39(4):664\u2013676","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10292_CR19","doi-asserted-by":"crossref","unstructured":"Khan SH, Rahmani H, Shah SAA, Bennamoun M (2018) A guide to convolutional neural networks for computer vision synthesis lectures on computer vision","DOI":"10.2200\/S00822ED1V01Y201712COV015"},{"key":"10292_CR20","unstructured":"Kiros R, Salakhutdinov R, Zemel RS (2014) Multimodal neural language models. In: ICML, pp 595\u2013603"},{"key":"10292_CR21","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1016\/j.jvcir.2019.01.024","volume":"59","author":"Y Kong","year":"2019","unstructured":"Kong Y, Huang J, Huang S, Wei Z, Wang S (2019) Learning spatiotemporal representations for human fall detection in surveillance video. J Vis Commun Image Represent 59:215\u2013230","journal-title":"J Vis Commun Image Represent"},{"issue":"12","key":"10292_CR22","doi-asserted-by":"publisher","first-page":"2891","DOI":"10.1109\/TPAMI.2012.162","volume":"35","author":"G Kulkarni","year":"2013","unstructured":"Kulkarni G, Premraj V, Ordonez V, Dhar S, Li S, Choi Y, Berg AC, Berg TL (2013) Babytalk: Understanding and generating simple image descriptions. IEEE Trans Pattern Anal Mach Intell 35(12):2891\u20132903","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10292_CR23","unstructured":"Kuznetsova P, Ordonez V, Berg AC, Berg TL, Choi Y (2012) Collective generation of natural image descriptions. In: ACL, pp 359\u2013368"},{"key":"10292_CR24","doi-asserted-by":"publisher","first-page":"351","DOI":"10.1162\/tacl_a_00188","volume":"2","author":"P Kuznetsova","year":"2014","unstructured":"Kuznetsova P, Ordonez V, Berg TL, Choi Y (2014) TREETALK: Composition and compression of trees for image descriptions. Trans Assoc Comput Linguistics 2:351\u2013362","journal-title":"Trans Assoc Comput Linguistics"},{"key":"10292_CR25","unstructured":"Larochelle H, Hinton GE, Shawe-taylor J, Zeme RS, Culotta A (2010) Learning to combine foveal glimpses with a third-order boltzmann machine. In: Lafferty JD, Williams CKI (eds) NIPS, Curran Associates, Inc, pp 1243\u20131251"},{"key":"10292_CR26","doi-asserted-by":"crossref","unstructured":"Li H, Li C, Ding Y (2020) Fall detection based on fused saliency maps. Multimed Tools Appl 1\u201318","DOI":"10.1007\/s11042-020-09708-6"},{"key":"10292_CR27","unstructured":"Li S, Kulkarni G, Berg TL, Berg AC, Choi Y (2011) Composing simple image descriptions using web-scale n-grams. In: Goldwater S, Manning CD (eds) CoNLL, pp 220\u2013228"},{"key":"10292_CR28","unstructured":"Mao J, Xu W, Yang Y, Wang J, Yuille AL (2015) Deep captioning with multimodal recurrent neural networks (m-rnn). In: Bengio Y, LeCun Y (eds) ICLR"},{"key":"10292_CR29","unstructured":"Mitchell M, Dodge J, Goyal A, Yamaguchi K, Stratos K, Han X, Mensch AC, Berg AC, Berg TLHD III (2012) Midge: Generating image descriptions from computer vision detections. In: Daelemans W, Lapata M, Ma\u0307rquez L (eds) EACL, pp 747\u2013756"},{"key":"10292_CR30","doi-asserted-by":"crossref","unstructured":"Sandler M, Howard AG, Zhu M, Zhmoginov A, Chen L (2018) Mobilenetv2: Inverted residuals and linear bottlenecks. In: CVPR, pp. 4510\u20134520","DOI":"10.1109\/CVPR.2018.00474"},{"key":"10292_CR31","unstructured":"Simonyan K, Zisserman A (2015) Very deep convolutional networks for large-scale image recognition. In: Bengio Y, LeCun Y (eds) ICLR"},{"key":"10292_CR32","unstructured":"Sutskever I, Vinyals O, Le QV (2014) Sequence to sequence learning with neural networks. In: Ghahramani Z, Welling M, Cortes C, Lawrence ND, Weinberger KQ (eds) NIPS, pp 3104\u20133112"},{"key":"10292_CR33","unstructured":"Tang Y, Srivastava N, Salakhutdinov R (2014) Learning generative models with visual attention. In: Ghahramani Z, Welling M, Cortes C, Lawrence ND, Weinberger KQ (eds) NIPS, pp 1808\u20131816"},{"key":"10292_CR34","doi-asserted-by":"crossref","unstructured":"Vinyals O, Toshev A, Bengio S, Erhan D (2015) Show and tell: A neural image caption generator. In: CVPR, pp 3156\u20133164","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"10292_CR35","doi-asserted-by":"crossref","unstructured":"Wu J, Leng C, Wang Y, Hu Q, Cheng J (2016) Quantized convolutional neural networks for mobile devices. In: CVPR, pp 4820\u20134828. IEEE Computer Society","DOI":"10.1109\/CVPR.2016.521"},{"key":"10292_CR36","unstructured":"Xu K, Ba J, Kiros R, Cho K, Courville AC, Salakhutdinov R, Zemel RS, Bengio Y (2015) Show, attend and tell: Neural image caption generation with visual attention. In: Bach FR, Blei DM (eds) ICML. JMLR.org, pp 2048\u20132057"},{"key":"10292_CR37","unstructured":"Yang Y, Teo CLHD III, Aloimonos Y (2011) Corpus-guided sentence generation of natural images. In: EMNLP, pp 444\u2013454"},{"key":"10292_CR38","doi-asserted-by":"crossref","unstructured":"Zhang L, Zhang J, Lin Z, Lu H, He Y (2019) Capsal: Leveraging captioning to boost semantics for salient object detection. In: CVPR, pp. 6024\u20136033","DOI":"10.1109\/CVPR.2019.00618"},{"key":"10292_CR39","doi-asserted-by":"crossref","unstructured":"Zhao J, Liu J, Fan D, Cao Y, Yang J, Cheng M (2019) Egnet: Edge guidance network for salient object detection. In: ICCV, pp 8778\u20138787","DOI":"10.1109\/ICCV.2019.00887"}],"updated-by":[{"DOI":"10.1007\/s11042-021-10867-3","type":"correction","label":"Correction","source":"publisher","updated":{"date-parts":[[2021,3,31]],"date-time":"2021-03-31T00:00:00Z","timestamp":1617148800000}}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-10292-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-020-10292-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-10292-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,7,30]],"date-time":"2021-07-30T08:26:23Z","timestamp":1627633583000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-020-10292-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,12]]},"references-count":39,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2021,3]]}},"alternative-id":["10292"],"URL":"https:\/\/doi.org\/10.1007\/s11042-020-10292-y","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,1,12]]},"assertion":[{"value":"1 November 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 December 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 December 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 January 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 March 2021","order":5,"name":"change_date","label":"Change Date","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Correction","order":6,"name":"change_type","label":"Change Type","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"A Correction to this paper has been published:","order":7,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"https:\/\/doi.org\/10.1007\/s11042-021-10867-3","URL":"https:\/\/doi.org\/10.1007\/s11042-021-10867-3","order":8,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}}]}}