{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T09:08:20Z","timestamp":1777626500587,"version":"3.51.4"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2019,2,15]],"date-time":"2019-02-15T00:00:00Z","timestamp":1550188800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100000266","name":"Engineering and Physical Sciences Research Council","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000266","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100004325","name":"AstraZeneca","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100004325","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["IJDAR"],"published-print":{"date-parts":[[2019,3]]},"DOI":"10.1007\/s10032-019-00317-0","type":"journal-article","created":{"date-parts":[[2019,2,15]],"date-time":"2019-02-15T09:02:08Z","timestamp":1550221328000},"page":"55-78","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":44,"title":["A framework for information extraction from tables in biomedical literature"],"prefix":"10.1007","volume":"22","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2706-9676","authenticated-orcid":false,"given":"Nikola","family":"Milosevic","sequence":"first","affiliation":[]},{"given":"Cassie","family":"Gregson","sequence":"additional","affiliation":[]},{"given":"Robert","family":"Hernandez","sequence":"additional","affiliation":[]},{"given":"Goran","family":"Nenadic","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,2,15]]},"reference":[{"key":"317_CR1","first-page":"556","volume":"13","author":"B Alex","year":"2008","unstructured":"Alex, B., Grover, C., Haddow, B., Kabadjor, M., Klein, E., Matthews, M., Roebuck, S., Tobin, R., Wang, X.: Assisted curation: does text mining really help? Pac. Symp. Biocomput. 13, 556\u2013567 (2008)","journal-title":"Pac. Symp. Biocomput."},{"key":"317_CR2","volume-title":"Text Mining for Biology and Biomedicine","author":"S Ananiadou","year":"2006","unstructured":"Ananiadou, S., McNaught, J.: Text Mining for Biology and Biomedicine. Artech House, London (2006)"},{"key":"317_CR3","unstructured":"Aronson, A.R.: Effective mapping of biomedical text to the UMLS metathesaurus: the metamap program. In: Proceedings of the AMIA Symposium, American Medical Informatics Association, p. 17 (2001)"},{"key":"317_CR4","doi-asserted-by":"crossref","unstructured":"Auer, S., Bizer, C., Kobilarov, G., Lehmann, J., Cyganiak, R., Ives, Z.: Dbpedia: a nucleus for a web of open data. In: The Semantic Web, pp. 722\u2013735. Springer (2007)","DOI":"10.1007\/978-3-540-76298-0_52"},{"issue":"suppl 1","key":"317_CR5","doi-asserted-by":"publisher","first-page":"D267","DOI":"10.1093\/nar\/gkh061","volume":"32","author":"O Bodenreider","year":"2004","unstructured":"Bodenreider, O.: The unified medical language system (UMLS): integrating biomedical terminology. Nucleic Acids Res. 32(suppl 1), D267\u2013D270 (2004)","journal-title":"Nucleic Acids Res."},{"key":"317_CR6","doi-asserted-by":"crossref","unstructured":"Chavan, M.M., Shirgave, S.: A methodology for extracting head contents from meaningful tables in web pages. In: 2011 International Conference on Communication Systems and Network Technologies (CSNT), pp. 272\u2013277. IEEE (2011)","DOI":"10.1109\/CSNT.2011.66"},{"key":"317_CR7","doi-asserted-by":"crossref","unstructured":"Chen, H.-H., Tsai, S.-C., Tsai, J.-H.: Mining tables from large scale HTML texts. In: Proceedings of the 18th Conference on Computational Linguistics, vol. 1, pp. 166\u2013172. Association for Computational Linguistics (2000)","DOI":"10.3115\/990820.990845"},{"key":"317_CR8","unstructured":"Constantin, A.: Automatic structure and keyphrase analysis of scientific publications. Ph.D. thesis (2014)"},{"key":"317_CR9","doi-asserted-by":"crossref","unstructured":"Crestan, E., Pantel, P.: Web-scale knowledge extraction from semi-structured tables. In: Proceedings of the 19th International Conference on World Wide Web, pp. 1081\u20131082. ACM (2010)","DOI":"10.1145\/1772690.1772814"},{"key":"317_CR10","doi-asserted-by":"crossref","unstructured":"Dalvi, B.B., Cohen, W.W., Callan, J.: Websets: extracting sets of entities from the web using unsupervised information extraction. In: Proceedings of the Fifth ACM International Conference on Web Search and Data Mining, pp. 243\u2013252. ACM (2012)","DOI":"10.1145\/2124295.2124327"},{"issue":"1","key":"317_CR11","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.datak.2004.10.004","volume":"54","author":"DW Embley","year":"2005","unstructured":"Embley, D.W., Tao, C., Liddle, S.W.: Automating the extraction of data from html tables with unknown structure. Data Knowl. Eng. 54(1), 3\u201328 (2005)","journal-title":"Data Knowl. Eng."},{"key":"317_CR12","doi-asserted-by":"crossref","unstructured":"Gatterbauer, W., Bohunsky, P., Herzog, M., Kr\u00fcpl, B., Pollak, B.: Towards domain-independent information extraction from web tables. In: Proceedings of the 16th International Conference on World Wide Web, pp. 71\u201380. ACM (2007)","DOI":"10.1145\/1242572.1242583"},{"key":"317_CR13","unstructured":"Ghasemi-Gol, M., Szekely, P.: Tabvec: table vectors for classification of web tables (2018). ArXiv preprint \n                    arXiv:1802.06290"},{"issue":"1","key":"317_CR14","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1145\/1656274.1656278","volume":"11","author":"M Hall","year":"2009","unstructured":"Hall, M., Frank, E., Holmes, G., Pfahringer, B., Reutemann, P., Witten, I.H.: The weka data mining software: an update. ACM SIGKDD Explor. Newsl. 11(1), 10\u201318 (2009)","journal-title":"ACM SIGKDD Explor. Newsl."},{"key":"317_CR15","unstructured":"Haralick, R.M.: Document image understanding: geometric and logical layout. In: 1994 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 1994. Proceedings CVPR\u201994, pp. 385\u2013390. IEEE (1994)"},{"issue":"9","key":"317_CR16","doi-asserted-by":"publisher","first-page":"1263","DOI":"10.1109\/TKDE.2008.239","volume":"21","author":"H He","year":"2009","unstructured":"He, H., Garcia, E.A.: Learning from imbalanced data. IEEE Trans. Knowl. Data Eng. 21(9), 1263\u20131284 (2009)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"issue":"16","key":"317_CR17","doi-asserted-by":"publisher","first-page":"2196","DOI":"10.1093\/bioinformatics\/btm301","volume":"23","author":"MA Hearst","year":"2007","unstructured":"Hearst, M.A., Divoli, A., Guturu, H., Ksikes, A., Nakov, P., Wooldridge, M.A., Ye, J.: Biotext search engine: beyond abstract search. Bioinformatics 23(16), 2196\u20132197 (2007)","journal-title":"Bioinformatics"},{"key":"317_CR18","unstructured":"Hurst, M.: Layout and language: beyond simple text for information interaction-modelling the table. In: Proceedings of the 2nd International Conference on Multimodal Interfaces, Hong Kong (1999)"},{"key":"317_CR19","unstructured":"Hurst, M.F.: The interpretation of tables in texts. Ph.D. thesis (2000)"},{"issue":"2","key":"317_CR20","doi-asserted-by":"publisher","first-page":"174","DOI":"10.1109\/TKDE.2006.19","volume":"18","author":"S-W Jung","year":"2006","unstructured":"Jung, S.-W., Kwon, H.-C.: A scalable hybrid approach for extracting head components from web tables. IEEE Trans. Knowl. Data Eng. 18(2), 174\u2013187 (2006)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"317_CR21","unstructured":"Kieninger, T.G., Strieder, B.: T-recs table recognition and validation approach. In: AAAI Fall Symposium on Using Layout for the Generation, Understanding and Retrieval of Documents (1999)"},{"issue":"7553","key":"317_CR22","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436\u2013444 (2015)","journal-title":"Nature"},{"key":"317_CR23","doi-asserted-by":"publisher","DOI":"10.4324\/9781315835976","volume-title":"Principles of Pragmatics","author":"GN Leech","year":"2016","unstructured":"Leech, G.N.: Principles of Pragmatics. Routledge, London (2016)"},{"key":"317_CR24","unstructured":"Liu, Y.: Tableseer: automatic table extraction, search, and understanding. Ph.D. thesis, The Pennsylvania State University (2009)"},{"key":"317_CR25","unstructured":"Long, V.: An agent-based approach to table recognition and interpretation. Ph.D. thesis, Macquarie University Sydney, Australia (2010)"},{"key":"317_CR26","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space (2013). ArXiv preprint \n                    arXiv:1301.3781"},{"key":"317_CR27","unstructured":"Miles, A., Bechhofer, S.: SKOS simple knowledge organization system reference. W3C recommendation 18, W3C (2009)"},{"issue":"11","key":"317_CR28","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1145\/219717.219748","volume":"38","author":"GA Miller","year":"1995","unstructured":"Miller, G.A.: Wordnet: a lexical database for english. Commun. ACM 38(11), 39\u201341 (1995)","journal-title":"Commun. ACM"},{"key":"317_CR29","unstructured":"Milosevic, N.: Marvin: Semantic annotation using multiple knowledge sources (2016). ArXiv preprint \n                    arXiv:1602.00515"},{"key":"317_CR30","doi-asserted-by":"crossref","unstructured":"Milosevic, N., Gregson, C., Hernandez, R., Nenadic, G.: Disentangling structure of tables in scientific literature. In: Proceedings of the 21th International Conference on Applications of Natural Language to Information Systems (NLDB 2016). Springer (2016)","DOI":"10.1007\/978-3-319-41754-7_14"},{"key":"317_CR31","doi-asserted-by":"crossref","unstructured":"Mulwad, V., Finin, T., Joshi, A.: Semantic message passing for generating linked data from tables. In: International Semantic Web Conference, pp. 363\u2013378. Springer (2013)","DOI":"10.1007\/978-3-642-41335-3_23"},{"key":"317_CR32","unstructured":"Mulwad, V., Finin, T., Syed, Z., Joshi, A.: Using linked data to interpret tables. In: COLD, vol. 665 (2010)"},{"issue":"1","key":"317_CR33","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1109\/34.824820","volume":"22","author":"G Nagy","year":"2000","unstructured":"Nagy, G.: Twenty years of document image analysis in PAMI. IEEE Trans. Pattern Anal. Mach. Intell. 22(1), 38\u201362 (2000)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"317_CR34","doi-asserted-by":"crossref","unstructured":"Ng, H.T., Lim, C.Y., Koo, J.L.T.: Learning to recognize tables in free text. In: Proceedings of the 37th Annual Meeting of the Association for Computational Linguistics on Computational Linguistics. pp. 443\u2013450. Association for Computational Linguistics (1999)","DOI":"10.3115\/1034678.1034746"},{"key":"317_CR35","unstructured":"of Medicine, U.S.N.L. Medline. \n                    http:\/\/www.ncbi.nlm.nih.gov\/pubmed\n                    \n                   (2014)"},{"key":"317_CR36","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.: Glove: global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"317_CR37","doi-asserted-by":"crossref","unstructured":"Roberts, R.J.: Pubmed central: the genbank of the published literature (2001)","DOI":"10.1073\/pnas.98.2.381"},{"key":"317_CR38","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.neunet.2014.09.003","volume":"61","author":"J Schmidhuber","year":"2015","unstructured":"Schmidhuber, J.: Deep learning in neural networks: an overview. Neural Netw. 61, 85\u2013117 (2015)","journal-title":"Neural Netw."},{"key":"317_CR39","unstructured":"Silva, A.: Parts that add up to a whole: a framework for the analysis of tables. Ph.D. thesis, University of Edinburgh (2010)"},{"key":"317_CR40","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1016\/j.jbi.2013.11.002","volume":"52","author":"I Sim","year":"2014","unstructured":"Sim, I., Tu, S.W., Carini, S., Lehmann, H.P., Pollock, B.H., Peleg, M., Wittkowski, K.M.: The ontology of clinical research (OCRE): an informatics foundation for the science of clinical research. J. Biomed. Inform. 52, 78\u201391 (2014)","journal-title":"J. Biomed. Inform."},{"key":"317_CR41","unstructured":"Son, J.-W., Lee, J.-A., Park, S.-B., Song, H.-J., Lee, S.-J., Park, S.-Y.: Discriminating meaningful web tables from decorative tables using a composite kernel. In: IEEE\/WIC\/ACM International Conference on Web Intelligence and Intelligent Agent Technology, 2008. WI-IAT\u201908, vol. 1, pp. 368\u2013371. IEEE (2008)"},{"key":"317_CR42","doi-asserted-by":"crossref","unstructured":"Tengli, A., Yang, Y., Ma, N.L.: Learning table extraction from examples. In: Proceedings of the 20th International Conference on Computational Linguistics, p. 987. Association for Computational Linguistics (2004)","DOI":"10.3115\/1220355.1220497"},{"key":"317_CR43","unstructured":"United States National Library of Medicine. Medline. \n                    http:\/\/www.nlm.nih.gov\/bsd\/stats\/cit_added.html\n                    \n                   (2014)"},{"key":"317_CR44","doi-asserted-by":"crossref","unstructured":"Van\u00a0Assem, M., Rijgersberg, H., Wigham, M., Top, J.: Converting and annotating quantitative data tables. In: The Semantic Web-ISWC 2010, pp. 16\u201331. Springer (2010)","DOI":"10.1007\/978-3-642-17746-0_2"},{"key":"317_CR45","unstructured":"Wang, X., Wood, D.: Tabular abstraction for tabular editing and formatting. In: Proceedings of 3rd International Conference for Young Computer Scientists, pp. 17\u201329 (1993)"},{"key":"317_CR46","first-page":"2254","volume":"321","author":"XF Wang","year":"2013","unstructured":"Wang, X.F.: Research on information extraction based on web table structure and ontology. Appl. Mech. Mater. 321, 2254\u20132259 (2013)","journal-title":"Appl. Mech. Mater."},{"key":"317_CR47","doi-asserted-by":"crossref","unstructured":"Wang, Y., Hu, J.: A machine learning based approach for table detection on the web. In: Proceedings of the 11th International Conference on World Wide Web, pp. 242\u2013250. ACM (2002)","DOI":"10.1145\/511446.511478"},{"issue":"5","key":"317_CR48","doi-asserted-by":"publisher","first-page":"589","DOI":"10.1007\/s10791-006-9005-5","volume":"9","author":"X Wei","year":"2006","unstructured":"Wei, X., Croft, B., McCallum, A.: Table extraction for answer retrieval. Inf. Retr. 9(5), 589\u2013611 (2006)","journal-title":"Inf. Retr."},{"key":"317_CR49","doi-asserted-by":"crossref","unstructured":"Wong, W., Martinez, D., Cavedon, L.: Extraction of named entities from tables in gene mutation literature. In: Proceedings of the Workshop on Current Trends in Biomedical Natural Language Processing, pp. 46\u201354. Association for Computational Linguistics (2009)","DOI":"10.3115\/1572364.1572371"},{"issue":"4","key":"317_CR50","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1080\/00140136808930981","volume":"11","author":"P Wright","year":"1968","unstructured":"Wright, P.: Using tabulated information. Ergonomics 11(4), 331\u2013343 (1968)","journal-title":"Ergonomics"},{"issue":"4","key":"317_CR51","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1016\/0003-6870(70)90133-X","volume":"1","author":"P Wright","year":"1970","unstructured":"Wright, P., Fox, K.: Presenting information in tables. Appl. Ergon. 1(4), 234\u2013242 (1970)","journal-title":"Appl. Ergon."},{"key":"317_CR52","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1016\/j.jbi.2014.10.002","volume":"53","author":"R Xu","year":"2015","unstructured":"Xu, R., Wang, Q.: Combining automatic table classification and relationship extraction in extracting anticancer drug-side effect pairs from full-text articles. J. Biomed. Inform. 53, 128\u2013135 (2015)","journal-title":"J. Biomed. Inform."},{"key":"317_CR53","unstructured":"Yildiz, B., Kaiser, K., Miksch, S.: pdf2table: a method to extract table information from pdf files. In: IICAI, pp. 1773\u20131785 (2005)"}],"container-title":["International Journal on Document Analysis and Recognition (IJDAR)"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10032-019-00317-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10032-019-00317-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10032-019-00317-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,2,14]],"date-time":"2020-02-14T19:06:43Z","timestamp":1581707203000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10032-019-00317-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,2,15]]},"references-count":53,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2019,3]]}},"alternative-id":["317"],"URL":"https:\/\/doi.org\/10.1007\/s10032-019-00317-0","relation":{},"ISSN":["1433-2833","1433-2825"],"issn-type":[{"value":"1433-2833","type":"print"},{"value":"1433-2825","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,2,15]]},"assertion":[{"value":"6 February 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 December 2018","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 February 2019","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 February 2019","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}