{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T07:38:58Z","timestamp":1763105938540,"version":"3.37.3"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2023,6,22]],"date-time":"2023-06-22T00:00:00Z","timestamp":1687392000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,6,22]],"date-time":"2023-06-22T00:00:00Z","timestamp":1687392000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Scientometrics"],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1007\/s11192-023-04774-7","type":"journal-article","created":{"date-parts":[[2023,6,22]],"date-time":"2023-06-22T06:05:34Z","timestamp":1687413934000},"page":"4349-4382","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["A hybrid strategy to extract metadata from scholarly articles by utilizing support vector machine and heuristics"],"prefix":"10.1007","volume":"128","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4563-8951","authenticated-orcid":false,"given":"Muhammad","family":"Waqas","sequence":"first","affiliation":[]},{"given":"Nadeem","family":"Anjum","sequence":"additional","affiliation":[]},{"given":"Muhammad Tanvir","family":"Afzal","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,6,22]]},"reference":[{"key":"4774_CR1","doi-asserted-by":"publisher","first-page":"99458","DOI":"10.1109\/ACCESS.2020.2997907","volume":"8","author":"MW Ahmed","year":"2020","unstructured":"Ahmed, M. W., & Afzal, M. T. (2020). FLAG-PDFe: Features oriented metadata extraction framework for scientific publications. IEEE Access, 8, 99458\u201399469.","journal-title":"IEEE Access"},{"key":"4774_CR2","unstructured":"Berg, \u00d8. R., Oepen, S., & Read, J. (2012). Towards high-quality text stream extraction from pdf: Technical background to the acl 2012 contributed task. In Proceedings of the ACL-2012 special workshop on rediscovering 50 years of discoveries (pp. 98\u2013103). Association for Computational Linguistics."},{"issue":"12","key":"4774_CR3","doi-asserted-by":"publisher","first-page":"9585","DOI":"10.1007\/s11192-021-04162-z","volume":"126","author":"I B\u00f6schen","year":"2021","unstructured":"B\u00f6schen, I. (2021). Software review: The jatsdecoder package\u2013extract metadata, abstract and sectioned text from niso-jats coded xml documents; insights to pubmed central\u2019s open access database. Scientometrics, 126(12), 9585\u20139601.","journal-title":"Scientometrics"},{"key":"4774_CR4","doi-asserted-by":"crossref","unstructured":"Constantin, A., Pettifer, S., & Voronkov, A. (2013). Pdfx: Fully-automated pdf-to-xml conversion of scientific literature. In Proceedings of the 2013 ACM symposium on document engineering (pp. 177\u2013180). ACM.","DOI":"10.1145\/2494266.2494271"},{"key":"4774_CR5","first-page":"661","volume":"8","author":"IG Councill","year":"2008","unstructured":"Councill, I. G., Giles, C. L., & Kan, M.-Y. (2008). Parscit: An open-source crf reference string parsing package. LREC, 8, 661\u2013667.","journal-title":"LREC"},{"key":"4774_CR6","doi-asserted-by":"crossref","unstructured":"D\u00e9jean, H. & Meunier, J.-L. (2006). A system for converting pdf documents into structured xml format. In International workshop on document analysis systems (pp. 129\u2013140). Springer.","DOI":"10.1007\/11669487_12"},{"key":"4774_CR7","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1007\/978-3-319-46565-4_19","volume-title":"Semantic web evaluation challenge","author":"A Dimou","year":"2016","unstructured":"Dimou, A., Di Iorio, A., Lange, C., & Vahdati, S. (2016). Semantic publishing challenge\u2014Assessing the quality of scientific output in its ecosystem. In A. Dimou, A. Di Iorio, C. Lange, & S. Vahdati (Eds.), Semantic web evaluation challenge (pp. 243\u2013254). Springer."},{"key":"4774_CR8","doi-asserted-by":"crossref","unstructured":"Do, H. H. N., Chandrasekaran, M. K., Cho, P. S., & Kan, M. Y. (2013). Extracting and matching authors and affiliations in scholarly documents. In Proceedings of the 13th ACM\/IEEE-CS joint conference on digital libraries (pp. 219\u2013228). ACM.","DOI":"10.1145\/2467696.2467703"},{"key":"4774_CR9","doi-asserted-by":"crossref","unstructured":"Granitzer, M., Hristakeva, M., Jack, K., & Knight, R. (2012). A comparison of metadata extraction techniques for crowdsourced bibliographic metadata management. In Proceedings of the 27th annual ACM symposium on applied computing (pp. 962\u2013964). ACM.","DOI":"10.1145\/2245276.2245462"},{"issue":"3","key":"4774_CR10","doi-asserted-by":"publisher","first-page":"258","DOI":"10.1087\/20100308","volume":"23","author":"AE Jinha","year":"2010","unstructured":"Jinha, A. E. (2010). Article 50 million: An estimate of the number of scholarly articles in existence. Learned Publishing, 23(3), 258\u2013263.","journal-title":"Learned Publishing"},{"key":"4774_CR11","volume-title":"The stm report. An overview of scientific and scholarly publishing","author":"R Johnson","year":"2018","unstructured":"Johnson, R., Watkinson, A., & Mabe, M. (2018). The stm report. An overview of scientific and scholarly publishing (5th ed.). STM Association.","edition":"5"},{"issue":"4","key":"4774_CR12","doi-asserted-by":"publisher","first-page":"485","DOI":"10.1162\/coli.2006.32.4.485","volume":"32","author":"T Kiss","year":"2006","unstructured":"Kiss, T., & Strunk, J. (2006). Unsupervised multilingual sentence boundary detection. Computational Linguistics, 32(4), 485\u2013525.","journal-title":"Computational Linguistics"},{"issue":"3\u20134","key":"4774_CR13","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1007\/s00799-014-0115-1","volume":"14","author":"S Klampfl","year":"2014","unstructured":"Klampfl, S., Granitzer, M., Jack, K., & Kern, R. (2014). Unsupervised document structure analysis of digital scientific articles. International Journal on Digital Libraries, 14(3\u20134), 83\u201399.","journal-title":"International Journal on Digital Libraries"},{"issue":"1","key":"4774_CR14","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1007\/PL00013570","volume":"4","author":"S Klink","year":"2001","unstructured":"Klink, S., & Kieninger, T. (2001). Rule-based document structure understanding with a fuzzy combination of layout and textual features. International Journal on Document Analysis and Recognition, 4(1), 18\u201326.","journal-title":"International Journal on Document Analysis and Recognition"},{"key":"4774_CR15","unstructured":"Lin, Y., Michel, J.-B., Aiden, E. L., Orwant, J., Brockman, W., & Petrov, S. (2022). Syntactic annotations for the google books ngram corpus."},{"key":"4774_CR28","doi-asserted-by":"crossref","unstructured":"Luong, M. T., Nguyen, T. D., & Kan, M. Y. (2012). Logical structure recovery in scholarly articles with rich document features. In Multimedia storage and retrieval innovations for digital library systems (pp. 270\u2013292). IGI Global.","DOI":"10.4018\/978-1-4666-0900-6.ch014"},{"issue":"1","key":"4774_CR16","doi-asserted-by":"publisher","first-page":"25","DOI":"10.2174\/2213275911666180627093515","volume":"11","author":"K Ma","year":"2018","unstructured":"Ma, K. (2018). Automatic literature metadata extraction from datacite services. Recent Patents on Computer Science, 11(1), 25\u201331.","journal-title":"Recent Patents on Computer Science"},{"issue":"1","key":"4774_CR17","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1186\/1751-0473-7-7","volume":"7","author":"C Ramakrishnan","year":"2012","unstructured":"Ramakrishnan, C., Patnia, A., Hovy, E., & Burns, G. A. (2012). Layout-aware text extraction from full-text pdf of scientific articles. Source Code for Biology and Medicine, 7(1), 7.","journal-title":"Source Code for Biology and Medicine"},{"issue":"12","key":"4774_CR18","doi-asserted-by":"publisher","first-page":"829","DOI":"10.1038\/nrg3337","volume":"13","author":"D Rebholz-Schuhmann","year":"2012","unstructured":"Rebholz-Schuhmann, D., Oellrich, A., & Hoehndorf, R. (2012). Text-mining solutions for biomedical research: Enabling integrative biology. Nature Reviews Genetics, 13(12), 829\u2013839.","journal-title":"Nature Reviews Genetics"},{"issue":"4","key":"4774_CR19","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1007\/s10032-015-0253-z","volume":"18","author":"K Santosh","year":"2015","unstructured":"Santosh, K. (2015). g-dice: Graph mining-based document information content exploitation. International Journal on Document Analysis and Recognition, 18(4), 337\u2013355.","journal-title":"International Journal on Document Analysis and Recognition"},{"issue":"3","key":"4774_CR20","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1007\/s10032-016-0267-1","volume":"19","author":"X Su","year":"2016","unstructured":"Su, X., Gao, G., Wei, H., & Bao, F. (2016). A knowledge-based recognition system for historical Mongolian documents. International Journal on Document Analysis and Recognition, 19(3), 221\u2013235.","journal-title":"International Journal on Document Analysis and Recognition"},{"key":"4774_CR21","doi-asserted-by":"crossref","unstructured":"Tkaczyk, D., Bolikowski, L., Czeczko, A., & Rusek, K. (2012). A modular metadata extraction system for born-digital articles. In 2012 10th IAPR international workshop on document analysis systems (DAS) (pp. 11\u201316). IEEE.","DOI":"10.1109\/DAS.2012.4"},{"issue":"4","key":"4774_CR22","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1007\/s10032-015-0249-8","volume":"18","author":"D Tkaczyk","year":"2015","unstructured":"Tkaczyk, D., Szostek, P., Fedoryszak, M., Dendek, P. J., & Bolikowski, \u0141. (2015). Cermine: Automatic extraction of structured metadata from scientific literature. International Journal on Document Analysis and Recognition, 18(4), 317\u2013335.","journal-title":"International Journal on Document Analysis and Recognition"},{"key":"4774_CR23","doi-asserted-by":"crossref","unstructured":"Tsai, C.-T., Kundu, G., & Roth, D. (2013). Concept-based analysis of scientific literature. In Proceedings of the 22nd ACM international conference on conference on information & knowledge management (pp. 1733\u20131738). ACM.","DOI":"10.1145\/2505515.2505613"},{"key":"4774_CR24","doi-asserted-by":"crossref","unstructured":"Tuarob, S., Bhatia, S., Mitra, P., & Giles, C. L. (2013). Automatic detection of pseudocodes in scholarly documents using machine learning. In 2013 12th international conference on document analysis and recognition (pp. 738\u2013742). IEEE.","DOI":"10.1109\/ICDAR.2013.151"},{"issue":"10","key":"4774_CR25","doi-asserted-by":"publisher","first-page":"1881","DOI":"10.1109\/TKDE.2019.2913376","volume":"32","author":"S Tuarob","year":"2020","unstructured":"Tuarob, S., Kang, S. W., Wettayakorn, P., Pornprasit, C., Sachati, T., Hassan, S.-U., & Haddawy, P. (2020). Automatic classification of algorithm citation functions in scientific literature. IEEE Transactions on Knowledge and Data Engineering, 32(10), 1881\u20131896. https:\/\/doi.org\/10.1109\/TKDE.2019.2913376","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"issue":"1","key":"4774_CR26","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1145\/959242.959249","volume":"5","author":"T Washio","year":"2003","unstructured":"Washio, T., & Motoda, H. (2003). State of the art of graph-based data mining. Acm Sigkdd Explorations Newsletter, 5(1), 59\u201368.","journal-title":"Acm Sigkdd Explorations Newsletter"},{"issue":"3","key":"4774_CR27","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1609\/aimag.v36i3.2601","volume":"36","author":"J Wu","year":"2015","unstructured":"Wu, J., Williams, K. M., Chen, H.-H., Khabsa, M., Caragea, C., Tuarob, S., Ororbia, A. G., Jordan, D., Mitra, P., & Giles, C. L. (2015). Citeseerx: AI in a digital library search engine. AI Magazine, 36(3), 35\u201348.","journal-title":"AI Magazine"}],"container-title":["Scientometrics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11192-023-04774-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11192-023-04774-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11192-023-04774-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,12]],"date-time":"2023-07-12T14:11:33Z","timestamp":1689171093000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11192-023-04774-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,22]]},"references-count":28,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2023,8]]}},"alternative-id":["4774"],"URL":"https:\/\/doi.org\/10.1007\/s11192-023-04774-7","relation":{},"ISSN":["0138-9130","1588-2861"],"issn-type":[{"type":"print","value":"0138-9130"},{"type":"electronic","value":"1588-2861"}],"subject":[],"published":{"date-parts":[[2023,6,22]]},"assertion":[{"value":"29 August 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 June 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 June 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}