{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,2]],"date-time":"2025-11-02T15:30:08Z","timestamp":1762097408776,"version":"build-2065373602"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2023,12,24]],"date-time":"2023-12-24T00:00:00Z","timestamp":1703376000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,24]],"date-time":"2023-12-24T00:00:00Z","timestamp":1703376000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2024,3]]},"DOI":"10.1007\/s00521-023-09349-4","type":"journal-article","created":{"date-parts":[[2023,12,24]],"date-time":"2023-12-24T15:01:56Z","timestamp":1703430116000},"page":"4485-4501","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Foodnet: multi-scale and label dependency learning-based multi-task network for food and ingredient recognition"],"prefix":"10.1007","volume":"36","author":[{"given":"Feng","family":"Shuang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhouxian","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yong","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chao","family":"Han","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xia","family":"Gu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shidi","family":"Wei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,12,24]]},"reference":[{"key":"9349_CR1","doi-asserted-by":"crossref","unstructured":"Guillaumin M, Gool LV, et al. (2014) Food-101 - mining discriminative components with random forests. In: Proceeding of the 13 th European Conference Computer Vison, Springer, Cham, Switzerland, pp 446\u2013461","DOI":"10.1007\/978-3-319-10599-4_29"},{"key":"9349_CR2","doi-asserted-by":"crossref","unstructured":"He H, Kong F, Tan J (2016) DietCam: multi-view food recognition using a multikernel SVM. In: Proceedings of the IEEE journal of biomedical and health informatics, May, pp 848\u2013855","DOI":"10.1109\/JBHI.2015.2419251"},{"key":"9349_CR3","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: IEEE conference on computer vision and pattern recognition (CVPR), pp 770-778","DOI":"10.1109\/CVPR.2016.90"},{"key":"9349_CR4","doi-asserted-by":"crossref","unstructured":"Szegedy C, et al. (2014) Going deeper with convolutions. IEEE Comput Soc, pp 1-9","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"9349_CR5","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Conference on computer vision and pattern recognition (CVPR) , pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"9349_CR6","doi-asserted-by":"crossref","unstructured":"Zhang N, Donahue J, et al. (2014) Part-based r-cnns for fine-grained category detection. In: Proceedings of the ECCV. Springer , pp 834\u2013849","DOI":"10.1007\/978-3-319-10590-1_54"},{"key":"9349_CR7","doi-asserted-by":"crossref","unstructured":"Chen Y, Bai Y, Zhang W, Mei T (2019) Destruction and construction learning for fine-grained image recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 5152\u20135161","DOI":"10.1109\/CVPR.2019.00530"},{"key":"9349_CR8","doi-asserted-by":"crossref","unstructured":"Won CS (2020) Multi-scale CNN for fine-grained image recognition. In: IEEE Access , pp 116663\u2013116674","DOI":"10.1109\/ACCESS.2020.3005150"},{"key":"9349_CR9","doi-asserted-by":"crossref","unstructured":"Zhao Y, Yan K, Huang F, Li J (2021) Graph-based high-order relation discovery for fine-grained recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 15074\u201315083","DOI":"10.1109\/CVPR46437.2021.01483"},{"key":"9349_CR10","doi-asserted-by":"crossref","unstructured":"Lin T, RoyChowdhury A, Maji S (2018) Bilinear convolutional neural networks for fine-grained visual recognition. In: IEEE transactions on pattern analysis and machine intelligence, pp 1309\u20131322","DOI":"10.1109\/TPAMI.2017.2723400"},{"key":"9349_CR11","doi-asserted-by":"crossref","unstructured":"Fu J, Zheng H, Mei T (2017) Look closer to see better: recurrent attention convolutional neural network for fine-grained image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 4476\u20134484","DOI":"10.1109\/CVPR.2017.476"},{"key":"9349_CR12","doi-asserted-by":"crossref","unstructured":"Yan T, Li H, Sun B, Wang Z, Luo Z (2022) Discriminative feature mining and enhancement network for low-resolution fine-grained image recognition. In: IEEE transactions on circuits and systems for video technology","DOI":"10.1109\/TCSVT.2022.3144186"},{"issue":"6","key":"9349_CR13","doi-asserted-by":"publisher","first-page":"2480","DOI":"10.1109\/TCSVT.2020.3020079","volume":"31","author":"C Liu","year":"2021","unstructured":"Liu C, Liang Y, Xue Y, Qian X, Fu J (2021) Food and ingredient joint learning for fine-grained recognition. IEEE Trans Circuits Syst Video Technol 31(6):2480\u20132493","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"9349_CR14","doi-asserted-by":"crossref","unstructured":"Zhang C, Huang Z, Liu S, Xiao J (2022) Dual-channel multi-task CNN for no-reference screen content image quality assessment. In: IEEE transactions on circuits and systems for video technology","DOI":"10.1109\/TCSVT.2022.3143321"},{"key":"9349_CR15","doi-asserted-by":"crossref","unstructured":"Chen J, Ngo CW, (2016) Deep-based ingredient recognition for cooking recipe retrieval. In: Acm on multimedia conference ACM","DOI":"10.1145\/2964284.2964315"},{"key":"9349_CR16","unstructured":"Matsuda Y, Yanai K (2012) Multiple-food recognition considering co-occurrence employing manifold ranking. In: Proceedings of the international conference on pattern recognition, pp 2017\u20132020"},{"key":"9349_CR17","doi-asserted-by":"publisher","unstructured":"Bhadane P, Ravikesh Bhaladhare P (2021) Optimized deep neuro fuzzy network based automatic approach for segmentation and food recognition. In: 2021 in 5th international conference on information systems and computer networks (ISCON), pp 1\u20134. https:\/\/doi.org\/10.1109\/ISCON52037.2021.9702370","DOI":"10.1109\/ISCON52037.2021.9702370"},{"issue":"4","key":"9349_CR18","doi-asserted-by":"publisher","first-page":"1261","DOI":"10.1109\/JBHI.2014.2308928","volume":"18","author":"MM Anthimopoulos","year":"2014","unstructured":"Anthimopoulos MM, Gianola L, Scarnato L, Diem P, Mougiakakou SG (2014) A food recognition system for diabetic patients based on an optimized bag-of-features model. IEEE J Biomed Health Inform 18(4):1261\u20131271. https:\/\/doi.org\/10.1109\/JBHI.2014.2308928","journal-title":"IEEE J Biomed Health Inform"},{"issue":"7","key":"9349_CR19","doi-asserted-by":"publisher","first-page":"1926","DOI":"10.1109\/JBHI.2020.2987943","volume":"24","author":"FPW Lo","year":"2020","unstructured":"Lo FPW, Sun Y, Qiu J, Lo B (2020) Image-based food classification and volume estimation for dietary assessment: a review. IEEE J Biomed Health Inform 24(7):1926\u20131939. https:\/\/doi.org\/10.1109\/JBHI.2020.2987943","journal-title":"IEEE J Biomed Health Inform"},{"issue":"3","key":"9349_CR20","doi-asserted-by":"publisher","first-page":"588","DOI":"10.1109\/JBHI.2016.2636441","volume":"21","author":"G Ciocca","year":"2017","unstructured":"Ciocca G, Napoletano P, Schettini R (2017) Food recognition: a new dataset, experiments, and results. IEEE J Biomed Health Inform 21(3):588\u2013598. https:\/\/doi.org\/10.1109\/JBHI.2016.2636441","journal-title":"IEEE J Biomed Health Inform"},{"issue":"7","key":"9349_CR21","doi-asserted-by":"publisher","first-page":"1926","DOI":"10.1109\/JBHI.2020.2987943","volume":"24","author":"FPW Lo","year":"2020","unstructured":"Lo FPW, Sun Y, Qiu J, Lo B (2020) Image-based food classification and volume estimation for dietary assessment: a review. IEEE J Biomed Health Inform 24(7):1926\u20131939. https:\/\/doi.org\/10.1109\/JBHI.2020.2987943","journal-title":"IEEE J Biomed Health Inform"},{"key":"9349_CR22","doi-asserted-by":"crossref","unstructured":"Lu Y, Allegra D, Anthimopoulos M, Stanco F, Farinella GM, Mougiakakou S (2018) A multi-task learning approach for meal assessment. In: Proceedings of the joint workshop on multimedia for cooking and eating activities and multimedia assisted dietary management, pp 46\u201352","DOI":"10.1145\/3230519.3230593"},{"key":"9349_CR23","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1109\/TIP.2019.2929447","volume":"29","author":"S Jiang","year":"2020","unstructured":"Jiang S, Min W, Liu L, Luo Z (2020) Multi-scale multi-view deep feature aggregation for food recognition. IEEE Trans Image Process 29:265\u2013276","journal-title":"IEEE Trans Image Process"},{"key":"9349_CR24","doi-asserted-by":"crossref","unstructured":"Jiang S, et al. (2020) Few-shot food recognition via multi-view representation learning. In: ACM transactions on multimedia computing communications and applications","DOI":"10.1145\/3391624"},{"key":"9349_CR25","first-page":"1","volume":"2019","author":"H Zhao","year":"2019","unstructured":"Zhao H, Yap K, Kot AC, Duan L, Cheung N (2019) Few-shot and many-shot fusion learning in mobile visual food recognition. IEEE Int Symp Circ Syst (ISCAS) 2019:1\u20135","journal-title":"IEEE Int Symp Circ Syst (ISCAS)"},{"key":"9349_CR26","first-page":"1252","volume":"2019","author":"L Zhang","year":"2019","unstructured":"Zhang L, Zhao J, Li S, Shi B, Duan L-Y (2019) From market to dish: multi-ingredient image recognition for personalized recipe recommendation. IEEE Int Conf Multimedia Expo (ICME) 2019:1252\u20131257","journal-title":"IEEE Int Conf Multimedia Expo (ICME)"},{"key":"9349_CR27","doi-asserted-by":"publisher","first-page":"360","DOI":"10.1016\/j.jvcir.2019.03.011","volume":"60","author":"E Aguilar","year":"2019","unstructured":"Aguilar E, Bolanos M, Radeva P (2019) Regularized uncertainty-based multi-task learning model for food analysis. J Vis Commun Image Represent 60:360\u2013370","journal-title":"J Vis Commun Image Represent"},{"key":"9349_CR28","doi-asserted-by":"crossref","unstructured":"Ege T, Yanai K (2018) Multi-task learning of dish detection and calorie estimation. In: Proceedings of the joint workshop on multimedia for cooking and eating activities and multimedia assisted dietary management, pp 53\u201358","DOI":"10.1145\/3230519.3230594"},{"key":"9349_CR29","doi-asserted-by":"crossref","unstructured":"Read J, Pfahringer B, Holmes G, Frank E (2009) Classifier chains for multi-label classification. Mach Learn Knowl Discov Databases pp 254\u2013269","DOI":"10.1007\/978-3-642-04174-7_17"},{"key":"9349_CR30","unstructured":"Nam J, Menc\u00eda EL, Kim HJ, F\u00fcrnkranz J (2017) Maximizing subset accuracy with recurrentneural networks in multi-label classification. In: Advances in neural information processing systems, pp 5419\u20135429"},{"key":"9349_CR31","doi-asserted-by":"crossref","unstructured":"Wang J, Y ang Y, Mao J, Huang Z, Huang C, Xu W (2016) Cnn-rnn: A unified framework for multi-label image classification. In: IEEE conference on computer vision and pattern recognition, pp 2285\u20132294","DOI":"10.1109\/CVPR.2016.251"},{"issue":"10","key":"9349_CR32","doi-asserted-by":"publisher","first-page":"2801","DOI":"10.1109\/TMM.2018.2812605","volume":"20","author":"J Zhang","year":"2018","unstructured":"Zhang J, Wu Q, Shen C, Zhang J, Lu J (2018) Multilabel image classification with regional latent semantic dependencies. IEEE Trans Multimedia 20(10):2801\u20132813","journal-title":"IEEE Trans Multimedia"},{"key":"9349_CR33","doi-asserted-by":"crossref","unstructured":"Host-Parasite: Graph LSTM-In-LSTM for group activity recognition. In: IEEE transactions on neural networks and learning systems (TNNLS), 32(2): 663\u2013674 (2021)","DOI":"10.1109\/TNNLS.2020.2978942"},{"key":"9349_CR34","doi-asserted-by":"crossref","unstructured":"Chen T, Xu M, Hui X, Wu H, Lin L (2019) Learning semantic-specific graph representation for multi-label image recognition. In: IEEE\/CVF international conference on computer vision (ICCV), pp 522\u2013531","DOI":"10.1109\/ICCV.2019.00061"},{"key":"9349_CR35","doi-asserted-by":"crossref","unstructured":"You R, et al. (2020) Cross-modality attention with semantic graph embedding for multi-label classification. In: The AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v34i07.6964"},{"key":"9349_CR36","unstructured":"Yu F, Vladlen K (2015) Multi-scale context aggregation by dilated convolutions. In: CVPR, arXiv preprint arXiv:1511.07122"},{"key":"9349_CR37","doi-asserted-by":"crossref","unstructured":"Zhao H, Shi J, Qi X, Wang X, Jia J (2017) Pyramid scene parsing network. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 2881\u20132890","DOI":"10.1109\/CVPR.2017.660"},{"key":"9349_CR38","doi-asserted-by":"crossref","unstructured":"Yang M, Yu K, Zhang C, Li Z, Yang K (2018) DenseASPP for semantic segmentation in street scenes. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3684\u20133692","DOI":"10.1109\/CVPR.2018.00388"},{"issue":"2017","key":"9349_CR39","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1016\/j.media.2016.10.004","volume":"36","author":"K Kamnitsas","year":"2017","unstructured":"Kamnitsas K et al (2017) Efficient multi-scale 3D CNN with fully connected CRF for accurate brain lesion segmentation. Med Image Anal 36(2017):61\u201378","journal-title":"Med Image Anal"},{"key":"9349_CR40","unstructured":"Cui Z, Wenlin C, Yixin C (2016) Multi-scale convolutional neural networks for time series classification. In: Computer vision and pattern recognition, arXiv preprint arXiv:1603.06995"},{"key":"9349_CR41","doi-asserted-by":"crossref","unstructured":"Li Y, Chen Y, Wang N, Zhang Z (2019) Scale-aware trident networks for object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 6054\u20136063","DOI":"10.1109\/ICCV.2019.00615"},{"key":"9349_CR42","doi-asserted-by":"crossref","unstructured":"Pennington J, Socher R, Manning C (2014) GloV e: global vectors for word representation. In: Proceedings of empirical methods in natural language processing, pp 1532\u20131543","DOI":"10.3115\/v1\/D14-1162"},{"key":"9349_CR43","unstructured":"Mikolov T, Chen K, Corrado G, Dean J (2013) Efficient estimation of word representations in vector space. In: International conference on learning representations, pp 1\u201312"},{"key":"9349_CR44","doi-asserted-by":"crossref","unstructured":"Selvaraju RR, Cogswell M, Das A, Vedantam R, Parikh D, Batra D (2017) Grad-CAM: visual explanations from deep networks via gradient-based localization. In: IEEE international conference on computer vision (ICCV), pp 618\u2013626","DOI":"10.1109\/ICCV.2017.74"},{"key":"9349_CR45","doi-asserted-by":"crossref","unstructured":"Zhu K, Wu J (2021) Residual attention: a simple but effective method for multi-label recognition. In: IEEE\/CVF international conference on computer vision (ICCV), pp 184\u2013193","DOI":"10.1109\/ICCV48922.2021.00025"},{"key":"9349_CR46","doi-asserted-by":"publisher","first-page":"1514","DOI":"10.1109\/TIP.2020.3045639","volume":"30","author":"J Chen","year":"2021","unstructured":"Chen J, Zhu B, Ngo C-W, Chua T-S, Jiang Y-G (2021) A study of multi-task and region-wise deep learning for food ingredient recognition. IEEE Trans Image Process 30:1514\u20131526","journal-title":"IEEE Trans Image Process"},{"key":"9349_CR47","doi-asserted-by":"publisher","first-page":"116663","DOI":"10.1109\/ACCESS.2020.3005150","volume":"8","author":"CS Won","year":"2020","unstructured":"Won CS (2020) Multi-scale CNN for fine-grained image recognition. IEEE Access 8:116663\u2013116674","journal-title":"IEEE Access"},{"key":"9349_CR48","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, et al (2020) An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"key":"9349_CR49","doi-asserted-by":"publisher","unstructured":"Liu Z, et al. (2022) Swin transformer V2: scaling up capacity and resolution. In: 2022 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), New Orleans, LA, USA, pp 11999\u201312009. https:\/\/doi.org\/10.1109\/CVPR52688.2022.01170","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"9349_CR50","doi-asserted-by":"crossref","unstructured":"Qi Charles R, et al (2017) Pointnet: deep learning on point sets for 3d classification and segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2017.16"},{"key":"9349_CR51","doi-asserted-by":"crossref","unstructured":"Zhao H, et al (2021) Point transformer. In: Proceedings of the IEEE\/CVF international conference on computer vision","DOI":"10.1109\/ICCV48922.2021.01595"},{"key":"9349_CR52","unstructured":"Kipf TN, Welling M (2016) Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-023-09349-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-023-09349-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-023-09349-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,17]],"date-time":"2024-02-17T10:13:37Z","timestamp":1708164817000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-023-09349-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,24]]},"references-count":52,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2024,3]]}},"alternative-id":["9349"],"URL":"https:\/\/doi.org\/10.1007\/s00521-023-09349-4","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2023,12,24]]},"assertion":[{"value":"20 November 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 November 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 December 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}