{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T06:11:36Z","timestamp":1758089496700,"version":"3.44.0"},"publisher-location":"Cham","reference-count":58,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783032046239"},{"type":"electronic","value":"9783032046246"}],"license":[{"start":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T00:00:00Z","timestamp":1758067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T00:00:00Z","timestamp":1758067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-04624-6_13","type":"book-chapter","created":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T05:34:00Z","timestamp":1758000840000},"page":"218-237","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["HisDoc-DETR: Integrating Semantic Learning and\u00a0Feature Fusion for\u00a0Historical Document Layout Analysis"],"prefix":"10.1007","author":[{"given":"Kai","family":"Ding","sequence":"first","affiliation":[]},{"given":"Sheng","family":"Jian","sequence":"additional","affiliation":[]},{"given":"Lianwen","family":"Jin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,17]]},"reference":[{"key":"13_CR1","unstructured":"Nagy, G., Seth, S.C.: Hierarchical representation of optically scanned documents. In: Proceedings of the International Conference on Pattern Recognition (ICPR), Montreal, Que, Canada, pp. 347\u2013349. IEEE (1984)"},{"issue":"11","key":"13_CR2","doi-asserted-by":"publisher","first-page":"1162","DOI":"10.1109\/34.244677","volume":"15","author":"L O\u2019Gorman","year":"1993","unstructured":"O\u2019Gorman, L.: The document spectrum for page layout analysis. IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI) 15(11), 1162\u20131173 (1993)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI)"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Okamoto, M., Takahashi, M.: A hybrid page segmentation method. In: Proceedings of the International Conference on Document Analysis and Recognition (ICDAR), Tsukuba, Japan, pp. 743\u2013746. IEEE (1993)","DOI":"10.1109\/ICDAR.1993.395630"},{"key":"13_CR4","doi-asserted-by":"crossref","unstructured":"Xu, Y., Yin, F., Zhang, Z., et al.: Multi-task layout analysis for historical handwritten documents using fully convolutional networks. In: Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), Stockholm, Sweden, pp. 1057\u20131063. IJCAI (2018)","DOI":"10.24963\/ijcai.2018\/147"},{"key":"13_CR5","doi-asserted-by":"crossref","unstructured":"Oliveira, S.A., Seguin, B., Kaplan, F.: DhSegment: a generic deep-learning approach for document segmentation. In: Proceedings of the International Conference on Frontiers in Handwriting Recognition (ICFHR), Niagara Falls, NY, United States, pp. 7\u201312. IEEE (2018)","DOI":"10.1109\/ICFHR-2018.2018.00011"},{"key":"13_CR6","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1007\/978-3-031-41734-4_20","volume-title":"ICDAR 2023","author":"N Rahal","year":"2023","unstructured":"Rahal, N., V\u00f6gtlin, L., Ingold, R.: Layout analysis of historical document images using a light fully convolutional network. In: Fink, G.A., Jain, R., Kise, K., Zanibbi, R. (eds.) ICDAR 2023. LNCS, vol. 14191, pp. 325\u2013341. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-41734-4_20"},{"key":"13_CR7","doi-asserted-by":"crossref","unstructured":"De Nardin, A., Zottin, S., Paier, M., et al.: Efficient few-shot learning for pixel-precise handwritten document layout analysis. In: Proceedings of the Winter Conference on Applications of Computer Vision (WACV), Waikoloa, HI, United States, pp. 3680\u20133688. IEEE (2023)","DOI":"10.1109\/WACV56688.2023.00367"},{"key":"13_CR8","unstructured":"Chen, L.C., Papandreou, G., Schroff, F., et al.: Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv: 1706.05587 (2017)"},{"issue":"2","key":"13_CR9","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1016\/S0031-3203(99)00055-2","volume":"33","author":"J Sauvola","year":"2000","unstructured":"Sauvola, J., Pietik\u00e4inen, M.: Adaptive document image binarization. Pattern Recognit. (PR) 33(2), 225\u2013236 (2000)","journal-title":"Pattern Recognit. (PR)"},{"key":"13_CR10","doi-asserted-by":"publisher","unstructured":"Ravichandra, S., Siva Sathya, S., Lourdu Marie Sophie, S.: Deep learning based document layout analysis on historical documents. In: Rout, R.R., Ghosh, S.K., Jana, P.K., Tripathy, A.K., Sahoo, J.P., Li, KC. (eds.) Advances in Distributed Computing and Machine Learning. LNCS, vol. 427, pp. 271\u2013281. Springer, Singapore (2022). https:\/\/doi.org\/10.1007\/978-981-19-1018-0_23","DOI":"10.1007\/978-981-19-1018-0_23"},{"key":"13_CR11","unstructured":"Redmon, J., Farhadi, A.: YOLOv3: an incremental improvement. arXiv preprint arXiv:1804.02767 (2018)"},{"key":"13_CR12","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., et al.: You only look once: unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Las Vegas, NV, United States, pp. 779\u2013788. IEEE (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"13_CR13","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., et al.: Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), Venice, Italy, pp. 2980\u20132988. IEEE (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"Tian, Z., Shen, C., Chen, H., et al.: FCOS: fully convolutional one-stage object detection. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), Seoul, Korea, pp. 9627\u20139636. IEEE (2019)","DOI":"10.1109\/ICCV.2019.00972"},{"key":"13_CR15","unstructured":"Zhu, X., Su, W., Lu, L., et al.: Deformable DETR: Deformable Transformers for end-to-end object detection. In: Proceedings of the International Conference on Learning Representations (ICLR), Virtual, Online, pp. 1\u201316. ICLR (2021)"},{"key":"13_CR16","unstructured":"Kastanas, S., Tan, S., He, Y.: Document AI: a comparative study of Transformer-Based, Graph-Based models, and convolutional neural networks for document layout analysis. arXiv preprint arXiv:2308.15517 (2023)"},{"key":"13_CR17","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1007\/978-3-031-21648-0_30","volume-title":"ICFHR 2022","author":"H Cheng","year":"2022","unstructured":"Cheng, H., Jian, C., Wu, S., et al.: SCUT-CAB: a new benchmark dataset of ancient Chinese books with complex layouts for document layout analysis. In: Porwal, U., Forn\u00e9s, A., Shafait, F. (eds.) ICFHR 2022. LNCS, vol. 13639, pp. 436\u2013451. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-21648-0_30"},{"key":"13_CR18","doi-asserted-by":"crossref","unstructured":"Bar-Yosef, I., Hagbi, N., Kedem, K., et al.: Line segmentation for degraded handwritten historical documents. In: Proceedings of the International Conference on Document Analysis and Recognition (ICDAR), Barcelona, Spain, pp. 1161\u20131165. IEEE (2009)","DOI":"10.1109\/ICDAR.2009.191"},{"key":"13_CR19","unstructured":"Wong, K.Y., Casey, R.G., Wahl, F.M.: Document analysis system. In: Proceedings of the International Conference on Pattern Recognition (ICPR), Munich, W Ger, pp. 496\u2013500. IEEE (1990)"},{"key":"13_CR20","doi-asserted-by":"crossref","unstructured":"Fisher, J.L., Hinds, S.C., D\u2019Amato, D.P.: A rule-based system for document image segmentation. In: Proceedings of the International Conference on Pattern Recognition (ICPR), Atlantic City, NJ, USA, pp. 567\u2013572. IEEE (1990)","DOI":"10.1109\/ICPR.1990.118166"},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"Esposito, F., Malerba, D., Semeraro, G., et al.: An experimental page layout recognition system for office document automatic classification: an integrated approach for inductive generalization. In: Proceedings of the International Conference on Pattern Recognition (ICPR), Atlantic City, NJ, USA, pp. 557\u2013562. IEEE (1990)","DOI":"10.1109\/ICPR.1990.118164"},{"key":"13_CR22","doi-asserted-by":"crossref","unstructured":"Baird, H.S., Jones, S.E., Fortune, S.J.: Image segmentation by shape-directed covers. In: Proceedings of the International Conference on Pattern Recognition (ICPR), Atlantic City, NJ, USA, pp. 820\u2013825. IEEE (1990)","DOI":"10.1109\/ICPR.1990.118223"},{"key":"13_CR23","doi-asserted-by":"crossref","unstructured":"Smith, R.W.: Hybrid page layout analysis via tab-stop detection. In: Proceedings of the International Conference on Document Analysis and Recognition (ICDAR), Barcelona, Spain, pp. 241\u2013245. IEEE (2009)","DOI":"10.1109\/ICDAR.2009.257"},{"issue":"3","key":"13_CR24","doi-asserted-by":"publisher","first-page":"370","DOI":"10.1006\/cviu.1998.0684","volume":"70","author":"K Kise","year":"1998","unstructured":"Kise, K., Sato, A., Iwata, M.: Segmentation of page images using the area Voronoi diagram. Comput. Vis. Image Underst. (CVIU) 70(3), 370\u2013382 (1998)","journal-title":"Comput. Vis. Image Underst. (CVIU)"},{"issue":"3","key":"13_CR25","doi-asserted-by":"publisher","first-page":"799","DOI":"10.1016\/S0031-3203(02)00082-1","volume":"36","author":"Y Xiao","year":"2003","unstructured":"Xiao, Y., Yan, H.: Text region extraction in a document image based on the Delaunay tessellation. Pattern Recogn. 36(3), 799\u2013809 (2003)","journal-title":"Pattern Recogn."},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"Soto, C., Yoo, S.: Visual detection with context for document layout analysis. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing and International Joint Conference on Natural Language Processing (EMNLP), Hong Kong, China, pp. 3464\u20133470. ACL (2019)","DOI":"10.18653\/v1\/D19-1348"},{"key":"13_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1007\/978-3-030-86549-8_8","volume-title":"Document Analysis and Recognition \u2013 ICDAR 2021","author":"P Zhang","year":"2021","unstructured":"Zhang, P., et al.: VSR: a unified framework for document layout analysis combining vision, semantics and relations. In: Llad\u00f3s, J., Lopresti, D., Uchida, S. (eds.) ICDAR 2021. LNCS, vol. 12821, pp. 115\u2013130. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-86549-8_8"},{"key":"13_CR28","doi-asserted-by":"crossref","unstructured":"Cheng, H., Zhang, P., Wu, S., et al.: M6Doc: a large-scale multi-format, multi-type, multi-layout, multi-language, multi-annotation category dataset for modern document layout analysis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Vancouver, BC, Canada, pp. 15138\u201315147. IEEE (2023)","DOI":"10.1109\/CVPR52729.2023.01453"},{"key":"13_CR29","doi-asserted-by":"crossref","unstructured":"Yang, X., Yumer, E., Asente, P., et al.: Learning to extract semantic structure from documents using multimodal fully convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Honolulu, HI, United States, pp. 5315\u20135324. IEEE (2017)","DOI":"10.1109\/CVPR.2017.462"},{"issue":"1","key":"13_CR30","first-page":"1","volume":"1","author":"F Kaplan","year":"2021","unstructured":"Kaplan, F., Oliveira, S.A., Clematide, S., et al.: Combining visual and textual features for semantic segmentation of historical newspapers. J. Data Mining Digit. Humanit. (JDMDH) 1(1), 1\u201326 (2021)","journal-title":"J. Data Mining Digit. Humanit. (JDMDH)"},{"key":"13_CR31","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1007\/978-3-031-41676-7_18","volume-title":"ICDAR 2023","author":"A Banerjee","year":"2023","unstructured":"Banerjee, A., Biswas, S., Llad\u00f3s, J., et al.: SwinDocSegmenter: an end-to-end unified domain adaptive Transformer for document instance segmentation. In: Fink, G.A., Jain, R., Kise, K., Zanibbi, R. (eds.) ICDAR 2023. LNCS, vol. 14187, pp. 307\u2013325. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-41676-7_18"},{"key":"13_CR32","doi-asserted-by":"crossref","unstructured":"Wang, R., Fujii, Y., Popat, A.C.: Post-OCR paragraph recognition by graph convolutional networks. In: Proceedings of the Winter Conference on Applications of Computer Vision (WACV), Waikoloa, HI, United States, pp. 493\u2013502. IEEE (2022)","DOI":"10.1109\/WACV51458.2022.00259"},{"key":"13_CR33","unstructured":"Wei, S., Xu, N.: Paragraph2Graph: a GNN-based framework for layout paragraph analysis. arXiv preprint arXiv: 2304.11810 (2023)"},{"key":"13_CR34","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1007\/978-3-031-41734-4_4","volume-title":"ICDAR 2023","author":"J Wang","year":"2023","unstructured":"Wang, J., Krumdick, M., Tong, B., et al.: A Graphical approach to document layout analysis. In: Fink, G.A., Jain, R., Kise, K., Zanibbi, R. (eds.) ICDAR 2023. LNCS, vol. 14191, pp. 53\u201369. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-41734-4_4"},{"key":"13_CR35","doi-asserted-by":"crossref","unstructured":"Li, J., Xu, Y., Lv, T., et al.: DIT: self-supervised pre-training for document image Transformer. In: Proceedings of the ACM International Conference on Multimedia (ACM MM), Lisboa, Portugal, pp. 3530\u20133539. ACM (2022)","DOI":"10.1145\/3503161.3547911"},{"key":"13_CR36","doi-asserted-by":"crossref","unstructured":"Huang, Y., Lv, T., Cui, L., et al.: LayoutLMv3: pre-training for document AI with unified text and image masking. In: Proceedings of the ACM International Conference on Multimedia (ACM MM), Lisboa, Portugal, pp. 4083\u20134091. ACM (2022)","DOI":"10.1145\/3503161.3548112"},{"key":"13_CR37","doi-asserted-by":"crossref","unstructured":"Da, C., Luo, C., Zheng, Q., et al.: Vision grid transformer for document layout analysis. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), Paris, France, pp. 19462\u201319472. IEEE (2023)","DOI":"10.1109\/ICCV51070.2023.01783"},{"key":"13_CR38","unstructured":"Zhang, H., Li, F., Liu, S., et al.: DINO: DETR with improved denoising anchor boxes for end-to-end object detection. In: Proceedings of the International Conference on Learning Representations (ICLR), Virtual, Online, pp. 1\u201323. ICLR (2022)"},{"key":"13_CR39","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.neunet.2017.12.012","volume":"107","author":"S Elfwing","year":"2018","unstructured":"Elfwing, S., Uchibe, E., Doya, K.: Sigmoid-weighted linear units for neural network function approximation in reinforcement learning. Neural Netw. 107, 3\u201311 (2018)","journal-title":"Neural Netw."},{"key":"13_CR40","doi-asserted-by":"crossref","unstructured":"Wang, Q., Wu, B., Zhu, P., et al.: ECA-Net: efficient channel attention for deep convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Virtual, Online, United States, pp. 11534\u201311542. IEEE (2020)","DOI":"10.1109\/CVPR42600.2020.01155"},{"key":"13_CR41","doi-asserted-by":"crossref","unstructured":"Hou, Q., Zhou, D., Feng, J.: Coordinate attention for efficient mobile network design. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Virtual, Online, United States, pp. 13713\u201313722. IEEE (2021)","DOI":"10.1109\/CVPR46437.2021.01350"},{"key":"13_CR42","doi-asserted-by":"crossref","unstructured":"Liu, S., Qi, L., Qin, H., et al.: Path aggregation network for instance segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Salt Lake City, UT, United States, pp. 8759\u20138768. IEEE (2018)","DOI":"10.1109\/CVPR.2018.00913"},{"key":"13_CR43","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R., et al.: Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Honolulu, HI, United States, pp. 2117\u20132125. IEEE (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"13_CR44","doi-asserted-by":"crossref","unstructured":"Pang, J., Chen, K., Shi, J., et al.: Libra R-CNN: towards balanced learning for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Long Beach, CA, United States, pp. 821\u2013830. IEEE (2019)","DOI":"10.1109\/CVPR.2019.00091"},{"key":"13_CR45","unstructured":"Sun, K., Zhao, Y., Jiang, B., et al.: High-resolution representations for labeling pixels and regions. arXiv preprint arXiv: 1904.04514 (2019)"},{"key":"13_CR46","doi-asserted-by":"crossref","unstructured":"Wang, J., Chen, K., Xu, R., et al.: CARAFE: content-aware reassembly of features. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), Seoul, Korea, pp. 3007\u20133016. IEEE (2019)","DOI":"10.1109\/ICCV.2019.00310"},{"key":"13_CR47","doi-asserted-by":"crossref","unstructured":"Ghiasi, G., Lin, T.Y., Le, Q.V.: NAS-FPN: learning scalable feature pyramid architecture for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Long Beach, CA, United States, pp. 7036\u20137045. IEEE (2019)","DOI":"10.1109\/CVPR.2019.00720"},{"key":"13_CR48","doi-asserted-by":"crossref","unstructured":"Dai, X., Chen, Y., Xiao, B., et al.: Dynamic head: unifying object detection heads with attentions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Virtual, Online, United States, pp. 7373\u20137382. IEEE (2021)","DOI":"10.1109\/CVPR46437.2021.00729"},{"issue":"6","key":"13_CR49","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2016","unstructured":"Ren, S., He, K., Girshick, R., et al.: Faster R-CNN: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI) 39(6), 1137\u20131149 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI)"},{"key":"13_CR50","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., et al.: Mask R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), Venice, Italy, pp. 2961\u20132969. IEEE (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"13_CR51","doi-asserted-by":"crossref","unstructured":"Cai, Z., Vasconcelos, N.: Cascade R-CNN: delving into high quality object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Salt Lake City, UT, United States, pp. 6154\u20136162. IEEE (2018)","DOI":"10.1109\/CVPR.2018.00644"},{"key":"13_CR52","doi-asserted-by":"crossref","unstructured":"Wang, Y., Zhang, X., Yang, T., et al.: Anchor DETR: query design for Transformer-based detector. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI), Virtual, Online, pp. 2567\u20132575. AAAI (2022)","DOI":"10.1609\/aaai.v36i3.20158"},{"key":"13_CR53","doi-asserted-by":"crossref","unstructured":"Meng, D., Chen, X., Fan, Z., et al.: Conditional DETR for fast training convergence. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), Virtual, Online, Canada, pp. 3631\u20133640. IEEE (2021)","DOI":"10.1109\/ICCV48922.2021.00363"},{"key":"13_CR54","unstructured":"Liu, S., Li, F., Zhang, H., et al.: DAB-DETR: dynamic anchor boxes are better queries for DETR. In: Proceedings of the International Conference on Learning Representations (ICLR), Virtual, Online, pp. 1\u201320. ICLR (2021)"},{"key":"13_CR55","doi-asserted-by":"crossref","unstructured":"Li, F., Zhang, H., Liu, S., et al.: DN-DETR: accelerate DETR training by introducing query denoising. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), New Orleans, LA, United States, pp. 13619\u201313627. IEEE (2022)","DOI":"10.1109\/CVPR52688.2022.01325"},{"key":"13_CR56","doi-asserted-by":"crossref","unstructured":"Chen, Q., Chen, X., Wang, J., et al.: Group DETR: fast DETR training with group-wise one-to-many assignment. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), Paris, France, pp. 6633\u20136642. IEEE (2023)","DOI":"10.1109\/ICCV51070.2023.00610"},{"key":"13_CR57","doi-asserted-by":"crossref","unstructured":"Jia, D., Yuan, Y., He, H., et al.: DETRs with hybrid matching. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Vancouver, BC, Canada, pp. 19702\u201319712. IEEE (2023)","DOI":"10.1109\/CVPR52729.2023.01887"},{"key":"13_CR58","doi-asserted-by":"crossref","unstructured":"Zheng, D., Dong, W., Hu, H., et al.: Less is more: focus attention for efficient DETR. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), Paris, France, pp. 6674\u20136683. IEEE (2023)","DOI":"10.1109\/ICCV51070.2023.00614"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition \u2013 ICDAR 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-04624-6_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T06:03:33Z","timestamp":1758002613000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-04624-6_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,17]]},"ISBN":["9783032046239","9783032046246"],"references-count":58,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-04624-6_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,9,17]]},"assertion":[{"value":"17 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Wuhan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iapr.org\/icdar2025","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}