{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T17:26:10Z","timestamp":1757611570586,"version":"3.44.0"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030865481"},{"type":"electronic","value":"9783030865498"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-86549-8_32","type":"book-chapter","created":{"date-parts":[[2021,9,4]],"date-time":"2021-09-04T02:05:57Z","timestamp":1630721157000},"page":"497-513","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Document Domain Randomization for Deep Learning Document Layout Extraction"],"prefix":"10.1007","author":[{"given":"Meng","family":"Ling","sequence":"first","affiliation":[]},{"given":"Jian","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Torsten","family":"M\u00f6ller","sequence":"additional","affiliation":[]},{"given":"Petra","family":"Isenberg","sequence":"additional","affiliation":[]},{"given":"Tobias","family":"Isenberg","sequence":"additional","affiliation":[]},{"given":"Michael","family":"Sedlmair","sequence":"additional","affiliation":[]},{"given":"Robert S.","family":"Laramee","sequence":"additional","affiliation":[]},{"given":"Han-Wei","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Wu","sequence":"additional","affiliation":[]},{"given":"C. Lee","family":"Giles","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,9,2]]},"reference":[{"key":"32_CR1","unstructured":"Github: Tensorpack Faster R-CNN (February 2021). https:\/\/github.com\/tensorpack\/tensorpack\/tree\/master\/examples\/FasterRCNN"},{"key":"32_CR2","doi-asserted-by":"publisher","unstructured":"Arif, S., Shafait, F.: Table detection in document images using foreground and background features. In: Proceedings of the DICTA, pp. 245\u2013252. IEEE, Piscataway (2018). https:\/\/doi.org\/10.1109\/DICTA.2018.8615795","DOI":"10.1109\/DICTA.2018.8615795"},{"key":"32_CR3","doi-asserted-by":"publisher","unstructured":"Battle, L., Duan, P., Miranda, Z., Mukusheva, D., Chang, R., Stonebraker, M.: Beagle: automated extraction and interpretation of visualizations from the web. In: Proceedings of the CHI, pp. 594:1\u2013594:8. ACM, New York (2018). https:\/\/doi.org\/10.1145\/3173574.3174168","DOI":"10.1145\/3173574.3174168"},{"issue":"12","key":"32_CR4","doi-asserted-by":"publisher","first-page":"2306","DOI":"10.1109\/TVCG.2013.234","volume":"19","author":"MA Borkin","year":"2013","unstructured":"Borkin, M.A., et al.: What makes a visualization memorable? IEEE Trans. Vis. Comput. Graph. 19(12), 2306\u20132315 (2013). https:\/\/doi.org\/10.1109\/TVCG.2013.234","journal-title":"IEEE Trans. Vis. Comput. Graph."},{"key":"32_CR5","doi-asserted-by":"publisher","unstructured":"Caragea, C., et al.: CiteSeerx: a scholarly big dataset. In: de Rijke, M., et al. (eds.) ECIR 2014. LNCS, vol. 8416, pp. 311\u2013322. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-06028-6_26","DOI":"10.1007\/978-3-319-06028-6_26"},{"issue":"3","key":"32_CR6","doi-asserted-by":"publisher","first-page":"713","DOI":"10.1111\/cgf.14034","volume":"39","author":"A Chatzimparmpas","year":"2020","unstructured":"Chatzimparmpas, A., Jusufi, I.: The state of the art in enhancing trust in machine learning models with the use of visualizations. Comput. Graph. Forum 39(3), 713\u2013756 (2020). https:\/\/doi.org\/10.1111\/cgf.14034","journal-title":"Comput. Graph. Forum"},{"key":"32_CR7","doi-asserted-by":"publisher","unstructured":"Chen, J., et al.: IEEE VIS figures and tables image dataset. IEEE Dataport (2020). https:\/\/doi.org\/10.21227\/4hy6-vh52. https:\/\/visimagenavigator.github.io\/","DOI":"10.21227\/4hy6-vh52"},{"key":"32_CR8","doi-asserted-by":"publisher","first-page":"3826","DOI":"10.1109\/TVCG.2021.3054916","volume":"27","author":"J Chen","year":"2021","unstructured":"Chen, J., et al.: VIS30K: a collection of figures and tables from IEEE visualization conference publications. IEEE Trans. Vis. Comput. Graph. 27, 3826\u20133833 (2021). https:\/\/doi.org\/10.1109\/TVCG.2021.3054916","journal-title":"IEEE Trans. Vis. Comput. Graph."},{"key":"32_CR9","doi-asserted-by":"publisher","unstructured":"Choudhury, S.R., Mitra, P., Giles, C.L.: Automatic extraction of figures from scholarly documents. In: Proceedings of the DocEng, pp. 47\u201350. ACM, New York (2015). https:\/\/doi.org\/10.1145\/2682571.2797085","DOI":"10.1145\/2682571.2797085"},{"key":"32_CR10","unstructured":"Clark, C., Divvala, S.: Looking beyond text: Extracting figures, tables and captions from computer science papers. In: Workshops at the 29th AAAI Conference on Artificial Intelligence (2015). https:\/\/aaai.org\/ocs\/index.php\/WS\/AAAIW15\/paper\/view\/10092"},{"key":"32_CR11","doi-asserted-by":"publisher","unstructured":"Clark, C., Divvala, S.: PDFFigures 2.0: mining figures from research papers. In: Proceedings of the JCDL, pp. 143\u2013152. ACM, New York (2016). https:\/\/doi.org\/10.1145\/2910896.2910904","DOI":"10.1145\/2910896.2910904"},{"key":"32_CR12","doi-asserted-by":"publisher","unstructured":"Davila, K., Setlur, S., Doermann, D., Bhargava, U.K., Govindaraju, V.: Chart mining: a survey of methods for automated chart analysis. IEEE Trans. Pattern Anal. Mach. Intell. 43 (2021, to appear). https:\/\/doi.org\/10.1109\/TPAMI.2020.2992028","DOI":"10.1109\/TPAMI.2020.2992028"},{"key":"32_CR13","doi-asserted-by":"publisher","unstructured":"Dong, X., et al.: Knowledge vault: a web-scale approach to probabilistic knowledge fusion. In: Proceedings of the KDD, pp. 601\u2013610. ACM, New York (2014). https:\/\/doi.org\/10.1145\/2623330.2623623","DOI":"10.1145\/2623330.2623623"},{"key":"32_CR14","doi-asserted-by":"publisher","unstructured":"Dosovitskiy, A., et al.: FlowNet: learning optical flow with convolutional networks. In: Proceedings of the ICCV, pp. 2758\u20132766. IEEE, Los Alamitos (2015). https:\/\/doi.org\/10.1109\/ICCV.2015.316","DOI":"10.1109\/ICCV.2015.316"},{"issue":"3","key":"32_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1167\/jov.21.3.16","volume":"21","author":"CM Funke","year":"2021","unstructured":"Funke, C.M., Borowski, J., Stosio, K., Brendel, W., Wallis, T.S., Bethge, M.: Five points to check when comparing visual perception in humans and machines. J. Vis. 21(3), 1\u201323 (2021). https:\/\/doi.org\/10.1167\/jov.21.3.16","journal-title":"J. Vis."},{"key":"32_CR16","unstructured":"Geirhos, R., Rubisch, P., Michaelis, C., Bethge, M., Wichmann, F.A., Brendel, W.: ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness (2018). https:\/\/arxiv.org\/abs\/1811.12231"},{"key":"32_CR17","doi-asserted-by":"publisher","unstructured":"Giles, C.L., Bollacker, K.D., Lawrence, S.: CiteSeer: an automatic citation indexing system. In: Proceedings of the DL, pp. 89\u201398. ACM, New York (1998). https:\/\/doi.org\/10.1145\/276675.276685","DOI":"10.1145\/276675.276685"},{"key":"32_CR18","doi-asserted-by":"publisher","unstructured":"He, D., Cohen, S., Price, B., Kifer, D., Giles, C.L.: Multi-scale multi-task FCN for semantic page segmentation and table detection. In: Proceedings of the ICDAR, pp. 254\u2013261. IEEE, Los Alamitos (2017). https:\/\/doi.org\/10.1109\/ICDAR.2017.50","DOI":"10.1109\/ICDAR.2017.50"},{"key":"32_CR19","unstructured":"James, S., Johns, E.: 3D simulation for robot arm control with deep Q-learning (2016). https:\/\/arxiv.org\/abs\/1609.03759"},{"key":"32_CR20","unstructured":"Katona, G.: Component Extraction from Scientific Publications using Convolutional Neural Networks. Master\u2019s thesis, Computer Science Department, University of Vienna, Austria (2019)"},{"issue":"1","key":"32_CR21","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna, R., et al.: Visual genome: connecting language and vision using crowdsourced dense image annotations. Int. J. Comput. Vis. 123(1), 32\u201373 (2017). https:\/\/doi.org\/10.1007\/s11263-016-0981-7","journal-title":"Int. J. Comput. Vis."},{"key":"32_CR22","doi-asserted-by":"publisher","unstructured":"Li, M., et al.: DocBank: a benchmark dataset for document layout analysis. In: Proceedings of the COLING, pp. 949\u2013960. ICCL, Praha, Czech Republic (2020). https:\/\/doi.org\/10.18653\/v1\/2020.coling-main.82","DOI":"10.18653\/v1\/2020.coling-main.82"},{"key":"32_CR23","doi-asserted-by":"publisher","unstructured":"Li, R., Chen, J.: Toward a deep understanding of what makes a scientific visualization memorable. In: Proceedings of the SciVis, pp. 26\u201331. IEEE, Los Alamitos (2018). https:\/\/doi.org\/10.1109\/SciVis.2018.8823764","DOI":"10.1109\/SciVis.2018.8823764"},{"key":"32_CR24","doi-asserted-by":"publisher","unstructured":"Ling, M., Chen, J.: DeepPaperComposer: a simple solution for training data preparation for parsing research papers. In: Proceedings of the EMNLP\/Scholarly Document Processing, pp. 91\u201396. ACL, Stroudsburg (2020). https:\/\/doi.org\/10.18653\/v1\/2020.sdp-1.10","DOI":"10.18653\/v1\/2020.sdp-1.10"},{"key":"32_CR25","doi-asserted-by":"publisher","unstructured":"Ling, M., et al.: Three benchmark datasets for scholarly article layout analysis. IEEE Dataport (2020). https:\/\/doi.org\/10.21227\/326q-bf39","DOI":"10.21227\/326q-bf39"},{"key":"32_CR26","doi-asserted-by":"publisher","unstructured":"Lo, K., Wang, L.L., Neumann, M., Kinney, R., Weld, D.S.: S2ORC: the semantic scholar open research corpus. In: Proceedings of the ACL, pp. 4969\u20134983. ACL, Stroudsburg (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.447","DOI":"10.18653\/v1\/2020.acl-main.447"},{"key":"32_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"473","DOI":"10.1007\/978-3-642-04346-8_62","volume-title":"Research and Advanced Technology for Digital Libraries","author":"P Lopez","year":"2009","unstructured":"Lopez, P.: GROBID: combining automatic bibliographic data recognition and term extraction for scholarship publications. In: Agosti, M., Borbinha, J., Kapidakis, S., Papatheodorou, C., Tsakonas, G. (eds.) ECDL 2009. LNCS, vol. 5714, pp. 473\u2013474. Springer, Heidelberg (2009). https:\/\/doi.org\/10.1007\/978-3-642-04346-8_62"},{"key":"32_CR28","doi-asserted-by":"publisher","unstructured":"Mayer, N., et al.: A large dataset to train convolutional networks for disparity, optical flow, and scene flow estimation. In: Proceedings of the CVPR, pp. 4040\u20134048. IEEE, Los Alamitos (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.438","DOI":"10.1109\/CVPR.2016.438"},{"key":"32_CR29","unstructured":"Poppler: Poppler. Dataset and online search (2014). https:\/\/poppler.freedesktop.org\/"},{"key":"32_CR30","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1007\/978-3-319-03437-9_30","volume-title":"Metadata and Semantics Research","author":"P Praczyk","year":"2013","unstructured":"Praczyk, P., Nogueras-Iso, J.: A semantic approach for the annotation of figures: application to high-energy physics. In: Garoufallou, E., Greenberg, J. (eds.) MTSR 2013. CCIS, vol. 390, pp. 302\u2013314. Springer, Cham (2013). https:\/\/doi.org\/10.1007\/978-3-319-03437-9_30"},{"issue":"6","key":"32_CR31","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39(6), 1137\u20131149 (2017). https:\/\/doi.org\/10.1109\/TPAMI.2016.2577031","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"32_CR32","unstructured":"Rolnick, D., Veit, A., Belongie, S., Shavit, N.: Deep learning is robust to massive label noise. arXiv preprint arXiv:1705.10694 (2017)"},{"key":"32_CR33","doi-asserted-by":"crossref","unstructured":"Sadeghi, F., Levine, S.: CAD2RL: real single-image flight without a single real image. In: Proceedings of the RSS, pp. 34:1\u201334:10. RSS Foundation (2017). https:\/\/doi.org\/10.15607\/RSS.2017.XIII.034","DOI":"10.15607\/RSS.2017.XIII.034"},{"key":"32_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"664","DOI":"10.1007\/978-3-319-46478-7_41","volume-title":"Computer Vision \u2013 ECCV 2016","author":"N Siegel","year":"2016","unstructured":"Siegel, N., Horvitz, Z., Levin, R., Divvala, S., Farhadi, A.: FigureSeer: parsing result-figures in research papers. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9911, pp. 664\u2013680. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_41"},{"key":"32_CR35","doi-asserted-by":"publisher","unstructured":"Siegel, N., Lourie, N., Power, R., Ammar, W.: Extracting scientific figures with distantly supervised neural networks. In: Proceedings of the JCDL, pp. 223\u2013232. ACM, New York (2018). https:\/\/doi.org\/10.1145\/3197026.3197040","DOI":"10.1145\/3197026.3197040"},{"key":"32_CR36","doi-asserted-by":"publisher","unstructured":"Sinha, A., et al.: An overview of Microsoft Academic Service (MAS) and applications. In: Proceedings of the WWW, pp. 243\u2013246. ACM, New York (2015). https:\/\/doi.org\/10.1145\/2740908.2742839","DOI":"10.1145\/2740908.2742839"},{"key":"32_CR37","doi-asserted-by":"publisher","unstructured":"Song, S., Lichtenberg, S.P., Xiao, J.: SUN RGB-D: a RGB-D scene understanding benchmark suite. In: Proceedings of the CVPR, pp. 567\u2013576. IEEE, Los Alamitos (2015). https:\/\/doi.org\/10.1109\/CVPR.2015.7298655","DOI":"10.1109\/CVPR.2015.7298655"},{"key":"32_CR38","unstructured":"Stribling, J., Krohn, M., Aguayo, D.: SCIgen - an automatic CS paper generator (2005). Online tool: https:\/\/pdos.csail.mit.edu\/archive\/scigen\/"},{"key":"32_CR39","doi-asserted-by":"publisher","unstructured":"Tobin, J., Fong, R., Ray, A., Schneider, J., Zaremba, W., Abbeel, P.: Domain randomization for transferring deep neural networks from simulation to the real world. In: Proceedings of the IROS, pp. 23\u201330. IEEE, Piscataway (2017). https:\/\/doi.org\/10.1109\/IROS.2017.8202133","DOI":"10.1109\/IROS.2017.8202133"},{"key":"32_CR40","doi-asserted-by":"publisher","unstructured":"Tremblay, J., et al.: Training deep networks with synthetic data: bridging the reality gap by domain randomization. In: Proceedings of the CVPRW, pp. 969\u2013977. IEEE, Los Alamitos (2018). https:\/\/doi.org\/10.1109\/CVPRW.2018.00143","DOI":"10.1109\/CVPRW.2018.00143"},{"key":"32_CR41","doi-asserted-by":"publisher","unstructured":"Yang, X., Yumer, E., Asente, P., Kraley, M., Kifer, D., Lee Giles, C.: Learning to extract semantic structure from documents using multimodal fully convolutional neural networks. In: Proceedings of the CVPR, pp. 5315\u20135324. IEEE, Los Alamitos (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.462","DOI":"10.1109\/CVPR.2017.462"},{"key":"32_CR42","doi-asserted-by":"publisher","unstructured":"Zhong, X., Tang, J., Yepes, A.J.: PubLayNet: largest dataset ever for document layout analysis. In: Proceedings of the ICDAR, pp. 1015\u20131022. IEEE, Los Alamitos (2019). https:\/\/doi.org\/10.1109\/ICDAR.2019.00166","DOI":"10.1109\/ICDAR.2019.00166"}],"container-title":["Lecture Notes in Computer Science","Document Analysis and Recognition \u2013 ICDAR 2021"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-86549-8_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T22:04:00Z","timestamp":1756937040000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-86549-8_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030865481","9783030865498"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-86549-8_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"2 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICDAR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Document Analysis and Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lausanne","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Switzerland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icdar2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iapr.org\/icdar2021","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"340","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"182","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"54% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.9","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.9","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Additionally, 13 competition reports are included.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}