{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,15]],"date-time":"2025-12-15T14:16:28Z","timestamp":1765808188211,"version":"3.37.3"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"24","license":[{"start":{"date-parts":[[2023,2,16]],"date-time":"2023-02-16T00:00:00Z","timestamp":1676505600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,2,16]],"date-time":"2023-02-16T00:00:00Z","timestamp":1676505600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"National Science and Technology Council, Taiwan","award":["MOST 110-2221-E-002-128-MY3 and MOST 110-2634-F-002-050-"],"award-info":[{"award-number":["MOST 110-2221-E-002-128-MY3 and MOST 110-2634-F-002-050-"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2023,10]]},"DOI":"10.1007\/s11042-023-14344-x","type":"journal-article","created":{"date-parts":[[2023,2,16]],"date-time":"2023-02-16T08:02:51Z","timestamp":1676534571000},"page":"37757-37787","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Visual lifelog retrieval: humans and machines interpretation on first-person images"],"prefix":"10.1007","volume":"82","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5304-7230","authenticated-orcid":false,"given":"An-Zi","family":"Yen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Min-Huan","family":"Fu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wei-Hong","family":"Ang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tai-Te","family":"Chu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ssu-Hao","family":"Tsai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hen-Hsen","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hsin-Hsi","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,2,16]]},"reference":[{"key":"14344_CR1","doi-asserted-by":"crossref","unstructured":"Anderson P, He X, Buehler C, Teney D, Johnson M, Gould S, Zhang L (2018) Bottom-up and top-down attention for image captioning and visual question answering. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6077\u20136086","DOI":"10.1109\/CVPR.2018.00636"},{"key":"14344_CR2","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1162\/tacl_a_00051","volume":"5","author":"P Bojanowski","year":"2017","unstructured":"Bojanowski P, Grave E, Joulin A, Mikolov T (2017) Enriching word vectors with subword information. Trans Assoc Comput Linguist 5:135\u2013146","journal-title":"Trans Assoc Comput Linguist"},{"issue":"1","key":"14344_CR3","first-page":"77","volume":"47","author":"M Bola\u00f1os","year":"2016","unstructured":"Bola\u00f1os M, Dimiccoli M, Radeva P (2016) Toward storytelling from visual lifelogging: an overview. IEEE Trans Hum-Mach Syst 47(1):77\u201390","journal-title":"IEEE Trans Hum-Mach Syst"},{"key":"14344_CR4","doi-asserted-by":"crossref","unstructured":"Bolanos M, Mestre R, Talavera E, Gir\u00f3-i-Nieto X, Radeva P (2015) Visual summary of egocentric photostreams by representative keyframes. In: 2015 IEEE international conference on multimedia & expo workshops (ICMEW), IEEE, pp 1\u20136","DOI":"10.1109\/ICMEW.2015.7169863"},{"key":"14344_CR5","first-page":"120, 122","volume":"25","author":"G Bradski","year":"2000","unstructured":"Bradski G (2000) The openCV library. Dr Dobb\u2019s Journal of Software Tools 25:120, 122\u2013125","journal-title":"Dr Dobb\u2019s Journal of Software Tools"},{"key":"14344_CR6","doi-asserted-by":"crossref","unstructured":"Chang C-C, Fu M-H, Huang H-H, Chen H-H (2019) An interactive approach to integrating external textual knowledge for multimodal lifelog retrieval. In: Proceedings of the ACM workshop on lifelog search challenge, pp 41\u201344","DOI":"10.1145\/3326460.3329163"},{"key":"14344_CR7","doi-asserted-by":"crossref","unstructured":"Chu T-T, Chang C-C, Yen A-Z, Huang H-H, Chen H-H (2020) Multimodal retrieval through relations between subjects and objects in lifelog images. In: Proceedings of the third annual workshop on lifelog search challenge, pp 51\u201355","DOI":"10.1145\/3379172.3391723"},{"key":"14344_CR8","doi-asserted-by":"crossref","unstructured":"Chu T-H, Huang H-H, Chen H-H (2019) Image recall on image-text intertwined lifelogs. In: 2019 IEEE\/WIC\/ACM international conference on Web Intelligence (WI), IEEE, pp 398\u2013402","DOI":"10.1145\/3350546.3352555"},{"key":"14344_CR9","doi-asserted-by":"crossref","unstructured":"Collell G, Zhang T, Moens M-F (2017) Imagined visual representations as multimodal embeddings. In: Proceedings of the AAAI conference on artificial intelligence, vol 31. pp 4378\u20134384","DOI":"10.1609\/aaai.v31i1.11155"},{"key":"14344_CR10","doi-asserted-by":"crossref","unstructured":"Cornia M, Stefanini M, Baraldi L, Cucchiara R (2020) Meshed-memory transformer for image captioning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10578\u201310587","DOI":"10.1109\/CVPR42600.2020.01059"},{"key":"14344_CR11","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2019) Bert: Pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, vol 1. (Long and Short Papers), pp 4171\u20134186"},{"issue":"5","key":"14344_CR12","doi-asserted-by":"publisher","first-page":"1948","DOI":"10.1016\/j.chb.2011.05.002","volume":"27","author":"AR Doherty","year":"2011","unstructured":"Doherty AR, Caprani N, Kalnikaite V, Gurrin C, Smeaton AF, O\u2019Connor NE, et al. (2011) Passively recognising human activities through lifelogging. Comput Hum Behav 27(5):1948\u20131958","journal-title":"Comput Hum Behav"},{"key":"14344_CR13","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1016\/j.jvcir.2018.05.008","volume":"55","author":"C Fan","year":"2018","unstructured":"Fan C, Zhang Z, Crandall DJ (2018) Deepdiary: Lifelogging image captioning and summarization. J Vis Commun Image Represent 55:40\u201355","journal-title":"J Vis Commun Image Represent"},{"key":"14344_CR14","unstructured":"Fu M-H, Chang C-C, Huang H-H, Chen H-H (2019) Incorporating external textual knowledge for life event recognition and retrieval. In: Proceedings of the 14th NTCIR conference on evaluation of information access technologies, pp 61\u201371"},{"key":"14344_CR15","doi-asserted-by":"crossref","unstructured":"Fu M-H, Yen A-Z, Huang H-H, Chen H-H (2020) Incorporating semantic knowledge for visual lifelog activity recognition. In: Proceedings of the 2020 international conference on multimedia retrieval, pp 450\u2013456","DOI":"10.1145\/3372278.3390700"},{"key":"14344_CR16","unstructured":"Gurrin C, Joho H, Hopfgartner F, Zhou L, Ninh V-T, Le T-K, Albatal R, Dang-Nguyen D-T, Healy G (2019) Overview of the ntcir-14 lifelog-3 task. In: Proceedings of the 14th NTCIR conference, NII, pp 14\u201326"},{"key":"14344_CR17","doi-asserted-by":"crossref","unstructured":"Gurrin C, \u00f3r J\u00f3nsson B, Sch\u00f6ffmann K, Dang-Nguyen D-T, Loko\u010d J, Tran M-T, H\u00fcrst W, Rossetto L, Healy G (2021) Introduction to the fourth annual lifelog search challenge, lsc\u201921. In: Proc. international conference on multimedia retrieval (ICMR\u201921). ACM","DOI":"10.1145\/3460426.3470945"},{"key":"14344_CR18","doi-asserted-by":"crossref","unstructured":"Huang L, Wang W, Chen J, Wei X-Y (2019) Attention on attention for image captioning. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 4634\u20134643","DOI":"10.1109\/ICCV.2019.00473"},{"key":"14344_CR19","unstructured":"Karthikeyan T, Manikandaprabhu P, Nithya S (2014) A survey on text and content based image retrieval system for image mining. Int J Eng :3"},{"key":"14344_CR20","doi-asserted-by":"crossref","unstructured":"Krishna R, Zhu Y, Groth O, Johnson J, Hata K, Kravitz J, Chen S, Kalantidis Y, Li L-J, Shamma DA, Bernstein M, Fei-fei L (2016) Visual genome: Connecting language and vision using crowdsourced dense image annotations","DOI":"10.1007\/s11263-016-0981-7"},{"key":"14344_CR21","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. Adv Neural Inf Process Syst :25"},{"key":"14344_CR22","doi-asserted-by":"crossref","unstructured":"Latif A, Rasheed A, Sajid U, Ahmed J, Ali N, Ratyal NI, Zafar B, Dar SH, Sajid M, Khalil T (2019) Content-based image retrieval and feature extraction: a comprehensive review. Math Probl Eng :2019","DOI":"10.1155\/2019\/9658350"},{"key":"14344_CR23","doi-asserted-by":"crossref","unstructured":"Le N-K, Nguyen D-H, Hoang T-H, Nguyen T-A, Truong T-D, Dinh D-T, Luong Q-A, Vo-Ho V-K, Nguyen V-T, Tran M-T (2019) Smart lifelog retrieval system with habit-based concepts and moment visualization. In: Proceedings of the ACM workshop on lifelog search challenge, pp 1\u20136","DOI":"10.1145\/3326460.3329155"},{"key":"14344_CR24","doi-asserted-by":"crossref","unstructured":"Li Y, Ouyang W, Zhou B, Wang K, Wang X (2017) Scene graph generation from objects, phrases and region captions. In: Proceedings of the IEEE international conference on computer vision, pp 1261\u20131270","DOI":"10.1109\/ICCV.2017.142"},{"key":"14344_CR25","doi-asserted-by":"crossref","unstructured":"Li J, Zhang M, Ma W, Liu Y, Ma S (2020) A multi-level interactive lifelog search engine with user feedback. In: Proceedings of the third annual workshop on lifelog search challenge, pp 29\u201335","DOI":"10.1145\/3379172.3391720"},{"key":"14344_CR26","unstructured":"Lim J-H (2017) Visualizing personal lifelog data for deeper insights at the ntcir-13 lifelog-2 task"},{"key":"14344_CR27","unstructured":"Lin C-Y (2004) ROUGE: A package for automatic evaluation of summaries. In: Text summarization branches out, Association for computational linguistics, pp 74\u201381"},{"key":"14344_CR28","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: Common objects in context. In: European conference on computer vision, Springer, pp 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"14344_CR29","doi-asserted-by":"crossref","unstructured":"Maekawa T (2013) A sensor device for automatic food lifelogging that is embedded in home ceiling light: A preliminary investigation. In: 2013 7th international conference on pervasive computing technologies for healthcare and workshops, IEEE, pp 405\u2013407","DOI":"10.4108\/icst.pervasivehealth.2013.252128"},{"issue":"1","key":"14344_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40648-020-00181-2","volume":"7","author":"K Nakashima","year":"2020","unstructured":"Nakashima K, Iwashita Y, Kurazume R (2020) Lifelogging caption generation via fourth-person vision in a human\u2013robot symbiotic environment. ROBOMECH J 7(1):1\u201315","journal-title":"ROBOMECH J"},{"key":"14344_CR31","doi-asserted-by":"crossref","unstructured":"Pennington J, Socher R, Manning CD (2014) Glove: Global vectors for word representation. In: Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP), pp 1532\u20131543","DOI":"10.3115\/v1\/D14-1162"},{"key":"14344_CR32","doi-asserted-by":"crossref","unstructured":"Poleg Y, Arora C, Peleg S (2014) Temporal segmentation of egocentric videos. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2537\u20132544","DOI":"10.1109\/CVPR.2014.325"},{"key":"14344_CR33","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"issue":"4","key":"14344_CR34","doi-asserted-by":"publisher","first-page":"652","DOI":"10.1109\/TPAMI.2016.2587640","volume":"39","author":"O Vinyals","year":"2016","unstructured":"Vinyals O, Toshev A, Bengio S, Erhan D (2016) Show and tell: Lessons learned from the 2015 mscoco image captioning challenge. IEEE Trans Pattern Anal Mach Intell 39(4):652\u2013663","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"2","key":"14344_CR35","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1007\/s13735-012-0010-8","volume":"1","author":"P Wang","year":"2012","unstructured":"Wang P, Smeaton AF (2012) Semantics-based selection of everyday concepts in visual lifelogging. Int J Multimed Inf Retr 1(2):87\u2013101","journal-title":"Int J Multimed Inf Retr"},{"issue":"3","key":"14344_CR36","doi-asserted-by":"publisher","first-page":"340","DOI":"10.1080\/09658211.2014.886703","volume":"23","author":"E Woodberry","year":"2015","unstructured":"Woodberry E, Browne G, Hodges S, Watson P, Kapur N, Woodberry K (2015) The use of a wearable camera improves autobiographical memory in patients with alzheimer\u2019s disease. Memory 23(3):340\u2013349","journal-title":"Memory"},{"key":"14344_CR37","doi-asserted-by":"crossref","unstructured":"Xiong B, Grauman K (2014) Detecting snap points in egocentric video with a web photo prior. In: European conference on computer vision, Springer, pp 282\u2013298","DOI":"10.1007\/978-3-319-10602-1_19"},{"key":"14344_CR38","doi-asserted-by":"crossref","unstructured":"Yen A-Z, Huang H-H, Chen H-H (2021) Unanswerable question correction in question answering over personal knowledge base Thirty-fifth AAAI conference on artificial intelligence (AAAI-21)","DOI":"10.1145\/3511808.3557717"},{"key":"14344_CR39","doi-asserted-by":"crossref","unstructured":"Yen A-Z, Huang H-H, Chen H-H (2021) Ten questions in lifelog mining and information recall. In: Proceedings of the 2021 international conference on multimedia retrieval, pp 511\u2013518","DOI":"10.1145\/3460426.3463607"},{"key":"14344_CR40","unstructured":"Zhang T, Kishore V, Wu F, Weinberger KQ, Artzi Y (2020) Bertscore: Evaluating text generation with bert. In: International conference on learning representations"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-14344-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-14344-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-14344-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,3]],"date-time":"2023-10-03T09:21:37Z","timestamp":1696324897000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-14344-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,16]]},"references-count":40,"journal-issue":{"issue":"24","published-print":{"date-parts":[[2023,10]]}},"alternative-id":["14344"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-14344-x","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2023,2,16]]},"assertion":[{"value":"2 May 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 November 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 January 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 February 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Conflict of Interests"}}]}}