{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,26]],"date-time":"2025-09-26T13:33:58Z","timestamp":1758893638034},"reference-count":58,"publisher":"Springer Science and Business Media LLC","issue":"22","license":[{"start":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T00:00:00Z","timestamp":1714176000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T00:00:00Z","timestamp":1714176000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1007\/s00521-024-09772-1","type":"journal-article","created":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T09:01:34Z","timestamp":1714208494000},"page":"13703-13714","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["A single-stream adaptive scene layout modeling method for scene recognition"],"prefix":"10.1007","volume":"36","author":[{"given":"Qun","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Feng","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiyuan","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianyu","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiang","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pengfei","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,4,27]]},"reference":[{"key":"9772_CR1","doi-asserted-by":"publisher","first-page":"2392","DOI":"10.1109\/TMM.2021.3080076","volume":"24","author":"S Liu","year":"2022","unstructured":"Liu S, Tian G, Zhang Y, Duan P (2022) Scene recognition mechanism for service robot adapting various families: a CNN-based approach using multi-type cameras. IEEE Trans Multimedia 24:2392\u20132406. https:\/\/doi.org\/10.1109\/TMM.2021.3080076","journal-title":"IEEE Trans Multimedia"},{"doi-asserted-by":"publisher","unstructured":"Gao C, Chen J, Liu S, Wang L, Zhang Q, Wu Q (2021) Room-and-object aware knowledge reasoning for remote embodied referring expression. In: 2021 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 3063\u20133072 . https:\/\/doi.org\/10.1109\/CVPR46437.2021.00308","key":"9772_CR2","DOI":"10.1109\/CVPR46437.2021.00308"},{"key":"9772_CR3","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1109\/TMM.2020.3046877","volume":"24","author":"H Zeng","year":"2022","unstructured":"Zeng H, Song X, Chen G, Jiang S (2022) Amorphous region context modeling for scene recognition. IEEE Trans Multimedia 24:141\u2013151. https:\/\/doi.org\/10.1109\/TMM.2020.3046877","journal-title":"IEEE Trans Multimedia"},{"unstructured":"Javed SA, Nelakanti AK (2017) Object-level context modeling for scene classification with context-CNN. arXiv preprint arXiv:1705.04358","key":"9772_CR4"},{"key":"9772_CR5","doi-asserted-by":"publisher","first-page":"525","DOI":"10.1109\/TIP.2019.2933728","volume":"29","author":"X Song","year":"2020","unstructured":"Song X, Jiang S, Wang B, Chen C, Chen G (2020) Image representations with spatial object-to-object relations for RGB-D scene recognition. IEEE Trans Image Process 29:525\u2013537. https:\/\/doi.org\/10.1109\/TIP.2019.2933728","journal-title":"IEEE Trans Image Process"},{"key":"9772_CR6","doi-asserted-by":"publisher","first-page":"5877","DOI":"10.1109\/TIP.2020.2986599","volume":"29","author":"G Chen","year":"2020","unstructured":"Chen G, Song X, Zeng H, Jiang S (2020) Scene recognition with prototype-agnostic scene layout. IEEE Trans Image Process 29:5877\u20135888. https:\/\/doi.org\/10.1109\/TIP.2020.2986599","journal-title":"IEEE Trans Image Process"},{"doi-asserted-by":"publisher","unstructured":"Laranjeira C, Lacerda A, Nascimento ER (2019) On modeling context from objects with a long short-term memory for indoor scene recognition. In: 2019 32nd SIBGRAPI Conference on Graphics, Patterns and Images (SIBGRAPI), pp 249\u2013256 . https:\/\/doi.org\/10.1109\/SIBGRAPI.2019.00041","key":"9772_CR7","DOI":"10.1109\/SIBGRAPI.2019.00041"},{"doi-asserted-by":"publisher","unstructured":"Zuo Z, Shuai B, Wang G, Liu X, Wang X, Wang B, Chen Y (2015) Convolutional recurrent neural networks: Learning spatial dependencies for image representation. In: 2015 IEEE conference on computer vision and pattern recognition workshops (CVPRW), pp 18\u201326 https:\/\/doi.org\/10.1109\/CVPRW.2015.7301268","key":"9772_CR8","DOI":"10.1109\/CVPRW.2015.7301268"},{"issue":"7","key":"9772_CR9","doi-asserted-by":"publisher","first-page":"2983","DOI":"10.1109\/TIP.2016.2548241","volume":"25","author":"Z Zuo","year":"2016","unstructured":"Zuo Z, Shuai B, Wang G, Liu X, Wang X, Wang B, Chen Y (2016) Learning contextual dependence with convolutional hierarchical recurrent neural networks. IEEE Trans Image Process 25(7):2983\u20132996. https:\/\/doi.org\/10.1109\/TIP.2016.2548241","journal-title":"IEEE Trans Image Process"},{"issue":"6","key":"9772_CR10","doi-asserted-by":"publisher","first-page":"1452","DOI":"10.1109\/TPAMI.2017.2723009","volume":"40","author":"B Zhou","year":"2018","unstructured":"Zhou B, Lapedriza A, Khosla A, Oliva A, Torralba A (2018) Places: a 10 million image database for scene recognition. IEEE Trans Pattern Anal Mach Intell 40(6):1452\u20131464. https:\/\/doi.org\/10.1109\/TPAMI.2017.2723009","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"unstructured":"Zhou B, Khosla A, Lapedriza A, Oliva A, Torralba A (2014) Object detectors emerge in deep scene CNNs. arXiv preprint arXiv:1412.6856","key":"9772_CR11"},{"unstructured":"Lin M, Chen Q, Yan S (2013) Network in network. arXiv preprint arXiv:1312.4400","key":"9772_CR12"},{"doi-asserted-by":"publisher","unstructured":"Quattoni A, Torralba A (2009) Recognizing indoor scenes. In: 2009 IEEE conference on computer vision and pattern recognition, pp 413\u2013420 . https:\/\/doi.org\/10.1109\/CVPR.2009.5206537","key":"9772_CR13","DOI":"10.1109\/CVPR.2009.5206537"},{"doi-asserted-by":"publisher","unstructured":"Xiao J, Hays J, Ehinger KA, Oliva A, Torralba A (2010) Sun database: large-scale scene recognition from abbey to zoo. In: 2010 IEEE computer society conference on computer vision and pattern recognition, pp 3485\u20133492 . https:\/\/doi.org\/10.1109\/CVPR.2010.5539970","key":"9772_CR14","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"9772_CR15","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3129227","author":"K Liu","year":"2021","unstructured":"Liu K, Moon S (2021) Dynamic parallel pyramid networks for scene recognition. IEEE Trans Neural Netw Learn Syst. https:\/\/doi.org\/10.1109\/TNNLS.2021.3129227","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"doi-asserted-by":"publisher","unstructured":"Qiao Z, Yuan X, Zhuang C, Meyarian A (2021) Attention pyramid module for scene recognition. In: 2020 25th international conference on pattern recognition (ICPR), pp 7521\u20137528 . https:\/\/doi.org\/10.1109\/ICPR48806.2021.9412235","key":"9772_CR16","DOI":"10.1109\/ICPR48806.2021.9412235"},{"doi-asserted-by":"publisher","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition, pp 7132\u20137141. https:\/\/doi.org\/10.1109\/CVPR.2018.00745","key":"9772_CR17","DOI":"10.1109\/CVPR.2018.00745"},{"doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90","key":"9772_CR18","DOI":"10.1109\/CVPR.2016.90"},{"doi-asserted-by":"publisher","unstructured":"Xie S, Girshick R, Doll\u00e1r P, Tu Z, He K (2017) Aggregated residual transformations for deep neural networks. In: 2017 IEEE conference on computer vision and pattern recognition (CVPR), pp. 5987\u20135995. https:\/\/doi.org\/10.1109\/CVPR.2017.634","key":"9772_CR19","DOI":"10.1109\/CVPR.2017.634"},{"doi-asserted-by":"crossref","unstructured":"Xie Y, Yan J, Kang L, Guo Y, Zhang J, Luan, X (2022) FCT: fusing CNN and transformer for scene classification. Int J Multimedia Inf Retrieval 1\u20138","key":"9772_CR20","DOI":"10.1007\/s13735-022-00252-7"},{"key":"9772_CR21","first-page":"339","volume":"76","author":"B Chen","year":"2018","unstructured":"Chen B, Li J, Wei G, Ma B (2018) A novel localized and second order feature coding network for image recognition. Lect Notes Comput Sci 76:339\u2013348","journal-title":"Lect Notes Comput Sci"},{"key":"9772_CR22","volume":"102","author":"A L\u00f3pez-Cifuentes","year":"2020","unstructured":"L\u00f3pez-Cifuentes A, Escudero-Vi\u00f1olo M, Besc\u00f3s J, Garc\u00eda-Mart\u00edn \u00c1 (2020) Semantic-aware scene recognition. Lect Notes Comput Sci 102:107256","journal-title":"Lect Notes Comput Sci"},{"key":"9772_CR23","doi-asserted-by":"publisher","first-page":"82066","DOI":"10.1109\/ACCESS.2020.2989863","volume":"8","author":"H Seong","year":"2020","unstructured":"Seong H, Hyun J, Kim E (2020) FOSNet: an end-to-end trainable deep neural network for scene recognition. IEEE Access 8:82066\u201382077. https:\/\/doi.org\/10.1109\/ACCESS.2020.2989863","journal-title":"IEEE Access"},{"issue":"4","key":"9772_CR24","doi-asserted-by":"publisher","first-page":"2055","DOI":"10.1109\/TIP.2017.2675339","volume":"26","author":"L Wang","year":"2017","unstructured":"Wang L, Guo S, Huang W, Xiong Y, Qiao Y (2017) Knowledge guided disambiguation for large-scale scene classification with multi-resolution CNNs. IEEE Trans Image Process 26(4):2055\u20132068. https:\/\/doi.org\/10.1109\/TIP.2017.2675339","journal-title":"IEEE Trans Image Process"},{"issue":"6","key":"9772_CR25","doi-asserted-by":"publisher","first-page":"1715","DOI":"10.1109\/TCSVT.2018.2848543","volume":"29","author":"N Sun","year":"2019","unstructured":"Sun N, Li W, Liu J, Han G, Wu C (2019) Fusing object semantics and deep appearance features for scene recognition. IEEE Trans Circuits Syst Video Technol 29(6):1715\u20131728. https:\/\/doi.org\/10.1109\/TCSVT.2018.2848543","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"issue":"6","key":"9772_CR26","doi-asserted-by":"publisher","first-page":"1437","DOI":"10.1109\/TPAMI.2017.2711011","volume":"40","author":"R Arandjelovic","year":"2018","unstructured":"Arandjelovic R, Gronat P, Torii A, Pajdla T, Sivic J (2018) Netvlad: CNN architecture for weakly supervised place recognition. IEEE Trans Pattern Anal Mach Intell 40(6):1437\u20131451. https:\/\/doi.org\/10.1109\/TPAMI.2017.2711011","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"doi-asserted-by":"publisher","unstructured":"Li Y, Dixit M, Vasconcelos N (2017) Deep scene image classification with the MFAFVNet. In: 2017 IEEE international conference on computer vision (ICCV), pp 5757\u20135765. https:\/\/doi.org\/10.1109\/ICCV.2017.613","key":"9772_CR27","DOI":"10.1109\/ICCV.2017.613"},{"unstructured":"Dixit MD, Vasconcelos N (2016) Object based scene representations using fisher scores of local subspace projections. Adv Neur Inf 29","key":"9772_CR28"},{"key":"9772_CR29","doi-asserted-by":"publisher","first-page":"188","DOI":"10.1016\/j.neucom.2016.11.023","volume":"225","author":"P Tang","year":"2017","unstructured":"Tang P, Wang H, Kwong S (2017) G-ms2f: Googlenet based multi-stage feature fusion of deep CNN for scene recognition. Neurocomputing 225:188\u2013197","journal-title":"Neurocomputing"},{"key":"9772_CR30","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1016\/j.neucom.2019.01.090","volume":"338","author":"S Liu","year":"2019","unstructured":"Liu S, Tian G, Xu Y (2019) A novel scene classification model combining resnet based transfer learning and data augmentation with a filter. Neurocomputing 338:191\u2013206","journal-title":"Neurocomputing"},{"doi-asserted-by":"publisher","unstructured":"Yang S, Ramanan D (2015) Multi-scale recognition with DAG-CNNs. In: 2015 IEEE international conference on computer vision (ICCV), pp 1215\u20131223. https:\/\/doi.org\/10.1109\/ICCV.2015.144","key":"9772_CR31","DOI":"10.1109\/ICCV.2015.144"},{"key":"9772_CR32","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.116382","volume":"193","author":"PS Yee","year":"2022","unstructured":"Yee PS, Lim KM, Lee CP (2022) Deepscene: Scene classification via convolutional neural network with spatial pyramid pooling. Expert Syst Appl 193:116382","journal-title":"Expert Syst Appl"},{"issue":"12","key":"9772_CR33","doi-asserted-by":"publisher","first-page":"3102","DOI":"10.1109\/TPAMI.2019.2921960","volume":"42","author":"M Dixit","year":"2020","unstructured":"Dixit M, Li Y, Vasconcelos N (2020) Semantic fisher scores for task transfer: using objects to classify scenes. IEEE Trans Pattern Anal Mach Intell 42(12):3102\u20133118. https:\/\/doi.org\/10.1109\/TPAMI.2019.2921960","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"6","key":"9772_CR34","doi-asserted-by":"publisher","first-page":"2721","DOI":"10.1109\/TIP.2017.2686017","volume":"26","author":"X Song","year":"2017","unstructured":"Song X, Jiang S, Herranz L (2017) Multi-scale multi-feature context modeling for scene recognition in the semantic manifold. IEEE Trans Image Process 26(6):2721\u20132735. https:\/\/doi.org\/10.1109\/TIP.2017.2686017","journal-title":"IEEE Trans Image Process"},{"issue":"2","key":"9772_CR35","doi-asserted-by":"publisher","first-page":"808","DOI":"10.1109\/TIP.2016.2629443","volume":"26","author":"S Guo","year":"2017","unstructured":"Guo S, Huang W, Wang L, Qiao Y (2017) Locally supervised deep hybrid model for scene recognition. IEEE Trans Image Process 26(2):808\u2013820. https:\/\/doi.org\/10.1109\/TIP.2016.2629443","journal-title":"IEEE Trans Image Process"},{"issue":"4","key":"9772_CR36","doi-asserted-by":"publisher","first-page":"2028","DOI":"10.1109\/TIP.2017.2666739","volume":"26","author":"Z Wang","year":"2017","unstructured":"Wang Z, Wang L, Wang Y, Zhang B, Qiao Y (2017) Weakly supervised patchnets: describing and aggregating local patches for scene recognition. IEEE Trans Image Process 26(4):2028\u20132041. https:\/\/doi.org\/10.1109\/TIP.2017.2666739","journal-title":"IEEE Trans Image Process"},{"key":"9772_CR37","first-page":"474","volume":"74","author":"X Cheng","year":"2018","unstructured":"Cheng X, Lu J, Feng J, Yuan B, Zhou J (2018) Scene recognition with objectness. Lect Notes Comput Sci 74:474\u2013487","journal-title":"Lect Notes Comput Sci"},{"unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S et al (2020) An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929","key":"9772_CR38"},{"doi-asserted-by":"publisher","unstructured":"Miao B, Zhou L, Mian AS, Lam TL, Xu Y (2021) Object-to-scene: learning to transfer object knowledge to indoor scene recognition. In: 2021 IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 2069\u20132075 . https:\/\/doi.org\/10.1109\/IROS51168.2021.9636700","key":"9772_CR39","DOI":"10.1109\/IROS51168.2021.9636700"},{"doi-asserted-by":"publisher","unstructured":"Zhou L, Cen J, Wang X, Sun Z, Lam TL, Xu Y (2021) Borm: Bayesian object relation model for indoor scene recognition. In: 2021 IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 39\u201346. https:\/\/doi.org\/10.1109\/IROS51168.2021.9636024","key":"9772_CR40","DOI":"10.1109\/IROS51168.2021.9636024"},{"doi-asserted-by":"publisher","unstructured":"Pereira R, Gon\u00e7alves N, Garrote L, Barros T, Lopes A, Nunes UJ (2020) Deep-learning based global and semantic feature fusion for indoor scene classification. In: 2020 IEEE international conference on autonomous robot systems and competitions (ICARSC), pp. 67\u201373. https:\/\/doi.org\/10.1109\/ICARSC49921.2020.9096068","key":"9772_CR41","DOI":"10.1109\/ICARSC49921.2020.9096068"},{"issue":"24","key":"9772_CR42","doi-asserted-by":"publisher","first-page":"9069","DOI":"10.3390\/app10249069","volume":"10","author":"W-H Yeo","year":"2020","unstructured":"Yeo W-H, Heo Y-J, Choi Y-J, Kim B-G (2020) Place classification algorithm based on semantic segmented objects. Appl Sci 10(24):9069","journal-title":"Appl Sci"},{"key":"9772_CR43","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1007\/s11263-018-1140-0","volume":"127","author":"B Zhou","year":"2019","unstructured":"Zhou B, Zhao H, Puig X, Xiao T, Fidler S, Barriuso A, Torralba A (2019) Semantic understanding of scenes through the ade20k dataset. Int J Comput Vis 127:302\u2013321","journal-title":"Int J Comput Vis"},{"unstructured":"Redmon J, Farhadi A (2018) Yolov3: an incremental improvement. arXiv preprint arXiv:1804.02767","key":"9772_CR44"},{"issue":"4","key":"9772_CR45","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L-C Chen","year":"2018","unstructured":"Chen L-C, Papandreou G, Kokkinos I, Murphy K, Yuille AL (2018) Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. IEEE Trans Pattern Anal Mach Intell 40(4):834\u2013848. https:\/\/doi.org\/10.1109\/TPAMI.2017.2699184","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"doi-asserted-by":"publisher","unstructured":"Caesar H, Uijlings J, Ferrari V (2018) Coco-stuff: Thing and stuff classes in context. In: 2018 IEEE\/CVF conference on computer vision and pattern recognition, pp 1209\u20131218 . https:\/\/doi.org\/10.1109\/CVPR.2018.00132","key":"9772_CR46","DOI":"10.1109\/CVPR.2018.00132"},{"key":"9772_CR47","first-page":"20","volume":"1050","author":"P Velickovic","year":"2017","unstructured":"Velickovic P, Cucurull G, Casanova A, Romero A, Lio P, Bengio Y (2017) Graph attention networks. Stat 1050:20","journal-title":"Stat"},{"unstructured":"Zhou B, Lapedriza A, Xiao J, Torralba A, Oliva A (2014) Learning deep features for scene recognition using places database. In: Advances in neural information processing systems, 27","key":"9772_CR48"},{"unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. In: Advances in neural information processing systems, 30","key":"9772_CR49"},{"issue":"6","key":"9772_CR50","doi-asserted-by":"publisher","first-page":"1519","DOI":"10.1109\/TMM.2019.2944241","volume":"22","author":"H Zeng","year":"2020","unstructured":"Zeng H, Song X, Chen G, Jiang S (2020) Learning scene attribute for scene recognition. IEEE Trans Multimedia 22(6):1519\u20131530. https:\/\/doi.org\/10.1109\/TMM.2019.2944241","journal-title":"IEEE Trans Multimedia"},{"doi-asserted-by":"crossref","unstructured":"Liu Y, Chen Q, Chen W, Wassell I (2018) Dictionary learning inspired deep network for scene recognition. In: Proceedings of the AAAI conference on artificial intelligence, vol 32","key":"9772_CR51","DOI":"10.1609\/aaai.v32i1.12312"},{"doi-asserted-by":"publisher","unstructured":"Qiu J, Yang Y, Wang X, Tao D (2021) Scene essence. In: 2021 IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 8318\u20138329. https:\/\/doi.org\/10.1109\/CVPR46437.2021.00822","key":"9772_CR52","DOI":"10.1109\/CVPR46437.2021.00822"},{"unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster R-CNN: Towards real-time object detection with region proposal networks. In: Advances in neural information processing systems, vol 28","key":"9772_CR53"},{"issue":"2","key":"9772_CR54","doi-asserted-by":"publisher","first-page":"386","DOI":"10.1109\/TPAMI.2018.2844175","volume":"42","author":"K He","year":"2020","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2020) Mask R-CNN. IEEE Trans Pattern Anal Mach Intell 42(2):386\u2013397. https:\/\/doi.org\/10.1109\/TPAMI.2018.2844175","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European conference on computer vision. Springer, Berlin, pp 213\u2013229","key":"9772_CR55","DOI":"10.1007\/978-3-030-58452-8_13"},{"doi-asserted-by":"crossref","unstructured":"Lin T-Y, Goyal P, Girshick R, He K, Doll\u00e1r P (2017) Focal loss for dense object detection. In: Proceedings of the IEEE international conference on computer vision, pp 2980\u20132988","key":"9772_CR56","DOI":"10.1109\/ICCV.2017.324"},{"unstructured":"Zhu X, Su W, Lu L, Li B, Wang X, Dai J (2020) Deformable detr: deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159","key":"9772_CR57"},{"unstructured":"Zhang H, Li F, Liu S, Zhang L, Su H, Zhu J, Ni LM, Shum H-Y (2022) Dino: Detr with improved denoising anchor boxes for end-to-end object detection. arXiv preprint arXiv:2203.03605","key":"9772_CR58"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09772-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-024-09772-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-024-09772-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,9]],"date-time":"2024-08-09T18:09:49Z","timestamp":1723226989000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-024-09772-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,27]]},"references-count":58,"journal-issue":{"issue":"22","published-print":{"date-parts":[[2024,8]]}},"alternative-id":["9772"],"URL":"https:\/\/doi.org\/10.1007\/s00521-024-09772-1","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2024,4,27]]},"assertion":[{"value":"7 March 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 March 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 April 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}