{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:41:08Z","timestamp":1778082068728,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T00:00:00Z","timestamp":1634428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,17]]},"DOI":"10.1145\/3474085.3481534","type":"proceedings-article","created":{"date-parts":[[2021,10,18]],"date-time":"2021-10-18T10:23:20Z","timestamp":1634552600000},"page":"1084-1092","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":29,"title":["Show, Read and Reason"],"prefix":"10.1145","author":[{"given":"Hao","family":"Liu","sequence":"first","affiliation":[{"name":"Tencent YouTu Lab, Hefei, China"}]},{"given":"Xin","family":"Li","sequence":"additional","affiliation":[{"name":"Tencent YouTu Lab, Hefei, China"}]},{"given":"Bing","family":"Liu","sequence":"additional","affiliation":[{"name":"Tencent YouTu Lab, Hefei, China"}]},{"given":"Deqiang","family":"Jiang","sequence":"additional","affiliation":[{"name":"Tencent YouTu Lab, Hefei, China"}]},{"given":"Yinsong","family":"Liu","sequence":"additional","affiliation":[{"name":"Tencent YouTu Lab, Hefei, China"}]},{"given":"Bo","family":"Ren","sequence":"additional","affiliation":[{"name":"Tencent YouTu Lab, Hefei, China"}]},{"given":"Rongrong","family":"Ji","sequence":"additional","affiliation":[{"name":"Xiamen University, Hefei, China"}]}],"member":"320","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Thirty-Second AAAI Conference on Artificial Intelligence.","author":"Bao Junwei","year":"2018"},{"key":"e_1_3_2_1_2_1","volume-title":"Complicated Table Structure Recognition. arXiv preprint arXiv:1908.04729","author":"Chi Zewen","year":"2019"},{"key":"e_1_3_2_1_3_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018"},{"key":"e_1_3_2_1_4_1","volume-title":"et almbox","author":"Dosovitskiy Alexey","year":"2020"},{"key":"e_1_3_2_1_5_1","volume-title":"ICDAR 2019 competition on table detection and recognition (cTDaR). In 2019 International Conference on Document Analysis and Recognition (ICDAR). IEEE, 1510--1515","author":"Gao Liangcai","year":"2019"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2013.292"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_11_1","volume-title":"TABBIE: Pretrained Representations of Tabular Data. arXiv preprint arXiv:2105.02584","author":"Iida Hiroshi","year":"2021"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1045"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2019.00220"},{"key":"e_1_3_2_1_14_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016"},{"key":"e_1_3_2_1_15_1","volume-title":"Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942","author":"Lan Zhenzhong","year":"2019"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1127"},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of The 12th Language Resources and Evaluation Conference. 1918--1925","author":"Li Minghao","year":"2020"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00560"},{"key":"e_1_3_2_1_19_1","volume-title":"2020 b. GFTE: Graph-based Financial Table Extraction. arXiv preprint arXiv:2003.07560","author":"Li Yiren","year":"2020"},{"key":"e_1_3_2_1_20_1","unstructured":"Tsung-Yi Lin Piotr Doll\u00e1r Ross Girshick Kaiming He Bharath Hariharan and Serge Belongie. 2017. Feature pyramid networks for object detection. 2117--2125.  Tsung-Yi Lin Piotr Doll\u00e1r Ross Girshick Kaiming He Bharath Hariharan and Serge Belongie. 2017. Feature pyramid networks for object detection. 2117--2125."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.61"},{"key":"e_1_3_2_1_22_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019"},{"key":"e_1_3_2_1_23_1","volume-title":"Swin transformer: Hierarchical vision transformer using shifted windows. arXiv preprint arXiv:2103.14030","author":"Liu Ze","year":"2021"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/3298239.3298265"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2019.00029"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.5555\/3454287.3455008"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00294"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2019.00031"},{"key":"e_1_3_2_1_29_1","volume-title":"LGPMA: Complicated Table Structure Recognition with Local and Global Pyramid Mask Alignment. arXiv preprint arXiv:2105.06224","author":"Qiao Liang","year":"2021"},{"key":"e_1_3_2_1_30_1","volume-title":"Exploring the limits of transfer learning with a unified text-to-text transformer. arXiv preprint arXiv:1910.10683","author":"Raffel Colin","year":"2019"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58604-1_5"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2017.192"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/1815330.1815345"},{"key":"e_1_3_2_1_34_1","volume-title":"Ernie: Enhanced representation through knowledge integration. arXiv preprint arXiv:1904.09223","author":"Sun Yu","year":"2019"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2019.00027"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_1_37_1","volume-title":"Pyramid vision transformer: A versatile backbone for dense prediction without convolutions. arXiv preprint arXiv:2102.12122","author":"Wang Wenhai","year":"2021"},{"key":"e_1_3_2_1_38_1","volume-title":"Cvt: Introducing convolutions to vision transformers. arXiv preprint arXiv:2103.15808","author":"Wu Haiping","year":"2021"},{"key":"e_1_3_2_1_39_1","volume-title":"End-to-end deep learning for person search. arXiv preprint arXiv:1604.01850","author":"Xiao Tong","year":"2016"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403172"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2019.00125"},{"key":"e_1_3_2_1_42_1","volume-title":"Tabert: Pretraining for joint understanding of textual and tabular data. arXiv preprint arXiv:2005.08314","author":"Yin Pengcheng","year":"2020"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00074"},{"key":"e_1_3_2_1_44_1","volume-title":"Image-based table recognition: data, model, and evaluation. arXiv preprint arXiv:1911.10683","author":"Zhong Xu","year":"2019"},{"key":"e_1_3_2_1_45_1","volume-title":"Deformable detr: Deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159","author":"Zhu Xizhou","year":"2020"}],"event":{"name":"MM '21: ACM Multimedia Conference","location":"Virtual Event China","acronym":"MM '21","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 29th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3481534","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3474085.3481534","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:17:35Z","timestamp":1750191455000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3481534"}},"subtitle":["Table Structure Recognition with Flexible Context Aggregator"],"short-title":[],"issued":{"date-parts":[[2021,10,17]]},"references-count":45,"alternative-id":["10.1145\/3474085.3481534","10.1145\/3474085"],"URL":"https:\/\/doi.org\/10.1145\/3474085.3481534","relation":{},"subject":[],"published":{"date-parts":[[2021,10,17]]},"assertion":[{"value":"2021-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}