{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T15:31:43Z","timestamp":1742916703810,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":23,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819785100"},{"type":"electronic","value":"9789819785117"}],"license":[{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8511-7_27","type":"book-chapter","created":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T05:03:24Z","timestamp":1730523804000},"page":"378-391","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi-Modal Attention Based on 2D Structured Sequence for Table Recognition"],"prefix":"10.1007","author":[{"given":"Yiming","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Yaping","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Lu","family":"Xiang","sequence":"additional","affiliation":[]},{"given":"Yu","family":"Zhou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,3]]},"reference":[{"key":"27_CR1","doi-asserted-by":"publisher","first-page":"291","DOI":"10.1016\/j.neucom.2018.05.080","volume":"311","author":"S Bai","year":"2018","unstructured":"Bai, S., An, S.: A survey on automatic image caption generation. Neurocomputing 311, 291\u2013304 (2018)","journal-title":"Neurocomputing"},{"key":"27_CR2","doi-asserted-by":"crossref","unstructured":"Chen, L., Huang, C., Zheng, X., Lin, J., Huang, X.J.: Tablevlm: Multi-modal pre-training for table structure recognition. In: Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2437\u20132449 (2023)","DOI":"10.18653\/v1\/2023.acl-long.137"},{"key":"27_CR3","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes (voc) challenge. Int. J. Comput. Vision 88, 303\u2013338 (2010)","journal-title":"Int. J. Comput. Vision"},{"key":"27_CR4","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"27_CR5","doi-asserted-by":"crossref","unstructured":"Huang, Y., Lu, N., Chen, D., Li, Y., Xie, Z., Zhu, S., Gao, L., Peng, W.: Improving table structure recognition with visual-alignment sequential coordinate modeling. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11134\u201311143 (2023)","DOI":"10.1109\/CVPR52729.2023.01071"},{"key":"27_CR6","doi-asserted-by":"publisher","unstructured":"Kasar, T., Barlas, P., Adam, S., Chatelain, C., Paquet, T.: Learning to detect tables in scanned document images using line information. In: 2013 12th International Conference on Document Analysis and Recognition, pp. 1185\u20131189 (2013). https:\/\/doi.org\/10.1109\/ICDAR.2013.240","DOI":"10.1109\/ICDAR.2013.240"},{"key":"27_CR7","doi-asserted-by":"crossref","unstructured":"Lin, W., Sun, Z., Ma, C., Li, M., Wang, J., Sun, L., Huo, Q.: Tsrformer: Table structure recognition with transformers. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 6473\u20136482 (2022)","DOI":"10.1145\/3503161.3548038"},{"key":"27_CR8","doi-asserted-by":"crossref","unstructured":"Liu, H., Li, X., Gong, M., Liu, B., Wu, Y., Jiang, D., Liu, Y., Sun, X.: Grab what you need: Rethinking complex table structure recognition with flexible components deliberation. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a038, pp. 3603\u20133611 (2024)","DOI":"10.1609\/aaai.v38i4.28149"},{"key":"27_CR9","doi-asserted-by":"crossref","unstructured":"Ly, N.T., Takasu, A.: An end-to-end multi-task learning model for image-based table recognition. arXiv preprint arXiv:2303.08648 (2023)","DOI":"10.5220\/0011685000003417"},{"key":"27_CR10","doi-asserted-by":"crossref","unstructured":"Lysak, M., Nassar, A., Livathinos, N., Auer, C., Staar, P.: Optimized table tokenization for table structure recognition. arXiv preprint arXiv:2305.03393 (2023)","DOI":"10.1007\/978-3-031-41679-8_3"},{"key":"27_CR11","doi-asserted-by":"crossref","unstructured":"Nassar, A., Livathinos, N., Lysak, M., Staar, P.: Tableformer: Table structure understanding with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4614\u20134623 (2022)","DOI":"10.1109\/CVPR52688.2022.00457"},{"key":"27_CR12","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1016\/j.is.2015.08.004","volume":"56","author":"M Pawlik","year":"2016","unstructured":"Pawlik, M., Augsten, N.: Tree edit distance: Robust and memory-efficient. Inf. Syst. 56, 157\u2013173 (2016)","journal-title":"Inf. Syst."},{"key":"27_CR13","doi-asserted-by":"crossref","unstructured":"Qasim, S.R., Mahmood, H., Shafait, F.: Rethinking table recognition using graph neural networks. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 142\u2013147. IEEE (2019)","DOI":"10.1109\/ICDAR.2019.00031"},{"key":"27_CR14","doi-asserted-by":"crossref","unstructured":"Qiao, L., Li, Z., Cheng, Z., Zhang, P., Pu, S., Niu, Y., Ren, W., Tan, W., Wu, F.: Lgpma: Complicated table structure recognition with local and global pyramid mask alignment. In: International Conference on Document Analysis and Recognition, pp. 99\u2013114. Springer (2021)","DOI":"10.1007\/978-3-030-86549-8_7"},{"key":"27_CR15","doi-asserted-by":"crossref","unstructured":"Shen, H., Gao, X., Wei, J., Qiao, L., Zhou, Y., Li, Q., Cheng, Z.: Divide rows and conquer cells: Towards structure recognition for large tables. In: Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence, IJCAI-23, International Joint Conferences on Artificial Intelligence Organization, pp. 1369\u20131377 (2023)","DOI":"10.24963\/ijcai.2023\/152"},{"key":"27_CR16","doi-asserted-by":"crossref","unstructured":"Smock, B., Pesala, R., Abraham, R.: Pubtables-1m: Towards comprehensive table extraction from unstructured documents. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4634\u20134642 (2022)","DOI":"10.1109\/CVPR52688.2022.00459"},{"key":"27_CR17","doi-asserted-by":"publisher","unstructured":"Tensmeyer, C., Morariu, V.I., Price, B., Cohen, S., Martinez, T.: Deep splitting and merging for table structure decomposition. In: 2019 International Conference on Document Analysis and Recognition (ICDAR), pp. 114\u2013121 (2019) https:\/\/doi.org\/10.1109\/ICDAR.2019.00027","DOI":"10.1109\/ICDAR.2019.00027"},{"key":"27_CR18","unstructured":"Tupaj, S., Shi, Z., Chang, D.H.: Extracting tabular information from text files (1996), https:\/\/api.semanticscholar.org\/CorpusID:18379904"},{"key":"27_CR19","doi-asserted-by":"crossref","unstructured":"Wang, H., Zhang, Y., Yu, X.: An overview of image caption generation methods. Comput. Intell. Neuroscience 2020 (2020)","DOI":"10.1155\/2020\/3062706"},{"key":"27_CR20","doi-asserted-by":"crossref","unstructured":"Xue, W., Yu, B., Wang, W., Tao, D., Li, Q.: Tgrnet: A table graph reconstruction network for table structure recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1295\u20131304 (2021)","DOI":"10.1109\/ICCV48922.2021.00133"},{"key":"27_CR21","unstructured":"Ye, J., Qi, X., He, Y., Chen, Y., Gu, D., Gao, P., Xiao, R.: Pingan-vcgroup\u2019s solution for icdar 2021 competition on scientific literature parsing task b: table recognition to html. arXiv preprint arXiv:2105.01848 (2021)"},{"key":"27_CR22","doi-asserted-by":"crossref","unstructured":"Zheng, X., Burdick, D., Popa, L., Zhong, X., Wang, N.X.R.: Global table extractor (gte): A framework for joint table identification and cell structure recognition using visual context. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 697\u2013706 (2021)","DOI":"10.1109\/WACV48630.2021.00074"},{"key":"27_CR23","doi-asserted-by":"crossref","unstructured":"Zhong, X., ShafieiBavani, E., Jimeno\u00a0Yepes, A.: Image-based table recognition: data, model, and evaluation. In: European Conference on Computer Vision, pp. 564\u2013580. Springer (2020)","DOI":"10.1007\/978-3-030-58589-1_34"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8511-7_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T05:12:48Z","timestamp":1730524368000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8511-7_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,3]]},"ISBN":["9789819785100","9789819785117"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8511-7_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,3]]},"assertion":[{"value":"3 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}