{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T02:59:05Z","timestamp":1772765945199,"version":"3.50.1"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T00:00:00Z","timestamp":1746144000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T00:00:00Z","timestamp":1746144000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"DOI":"10.1007\/s42979-025-04000-6","type":"journal-article","created":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T19:09:20Z","timestamp":1746212960000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A Comprehensive Metadata Framework for Preservation and Accessibility of Digital News and Educational Resource Management"],"prefix":"10.1007","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4656-1041","authenticated-orcid":false,"given":"Muzammil","family":"Khan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huma","family":"Rani","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sana","family":"Ullah","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Arif Ur","family":"Rahman","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,2]]},"reference":[{"key":"4000_CR1","unstructured":"Khan M. Using text processing techniques for linking news stories for digital preservation. PhD thesis, Faculty of Computer Science, Preston University Kohat, Islamabad Campus, HEC Pakistan (2018)"},{"key":"4000_CR2","unstructured":"Shepard C. Google\u2019s index size revealed: 400 billion docs ( changing). https:\/\/zyppy.com\/seo\/google-index-size\/, 06th Oct, 2024 and Accessed on 28th Feb, 2025 (2025)"},{"key":"4000_CR3","unstructured":"Size W. The size of the world wide web (the internet). https:\/\/www.worldwidewebsize.com\/, Accessed on 28th Feb, 2025 (2025)"},{"issue":"2","key":"4000_CR4","first-page":"68","volume":"33","author":"BF Lavoie","year":"2004","unstructured":"Lavoie BF. The open archival information system reference model: introductory guide. Microform Digit Rev. 2004;33(2):68\u201381.","journal-title":"Microform Digit Rev"},{"key":"4000_CR5","unstructured":"Guta M. Small business trends. Blog, https:\/\/smallbiztrends.com\/2019\/02\/web-hosting-uptime-statistics.html, Accessed on 28th Feb, 2025 (Feb 15, 2019)"},{"key":"4000_CR6","doi-asserted-by":"crossref","unstructured":"Ntoulas A, Cho J, Olston C. What\u2019s new on the web? the evolution of the web from a search engine perspective. In: Proceedings of the 13th International Conference on World Wide Web, pp. 1\u201312 (2004)","DOI":"10.1145\/988672.988674"},{"key":"4000_CR7","unstructured":"Lyman P. Archiving the world wide web. Building a national strategy for digital preservation: Issues in digital media archiving, 38\u201351 (2002)"},{"key":"4000_CR8","doi-asserted-by":"crossref","unstructured":"Pleiss DC. Preserving contemporary newspapers: A new survey of state libraries in the usa. IFLA Journal, 0340\u20130361 (2024)","DOI":"10.1177\/03400352241286174"},{"key":"4000_CR9","doi-asserted-by":"crossref","unstructured":"Khan M, Rahman AU, Ullah M, Naseem R. The role of named entities in linking news articles during preservation. In: International Conference on the Sciences of Electronics, Technologies of Information and Telecommunications, pp. 50\u201358 (2018). Springer","DOI":"10.1007\/978-3-030-21005-2_5"},{"key":"4000_CR10","doi-asserted-by":"crossref","unstructured":"Khan M, Rahman AU, Awan MD, Alam SM. Normalizing digital news-stories for preservation. In: Digital Information Management (ICDIM), 2016 Eleventh International Conference On, pp. 85\u201390 (2016). IEEE","DOI":"10.1109\/ICDIM.2016.7829785"},{"key":"4000_CR11","unstructured":"Riley J. Understanding metadata. Official of NISO, https:\/\/www.niso.org\/publications\/understanding-metadata, Accessed on 28th Feb, 2025 (2017)"},{"issue":"3","key":"4000_CR12","first-page":"209","volume":"4","author":"VB Dashrath","year":"2014","unstructured":"Dashrath VB. Role of metadata in digital resource management. Int J Digital Library Serv. 2014;4(3):209\u20132017.","journal-title":"Int J Digital Library Serv"},{"issue":"3\u20134","key":"4000_CR13","first-page":"17","volume":"40","author":"J Greenberg","year":"2005","unstructured":"Greenberg J. Understanding metadata and metadata schemes. Catalog Classif Quart. 2005;40(3\u20134):17\u201336.","journal-title":"Catalog Classif Quart"},{"key":"4000_CR14","unstructured":"Greenberg J. Dublin core: History, key concepts, and evolving context (part one). In: Slide Presentation on Dc-2010 International Conference on Dublin Core and Metadata Applications Pittsburgh, PA (2010)"},{"key":"4000_CR15","unstructured":"Habib DP, Balliot RL. How to search the world wide web: a tutorial for beginners and non-experts (2000)"},{"issue":"2","key":"4000_CR16","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1016\/j.aci.2017.05.006","volume":"14","author":"M Harran","year":"2018","unstructured":"Harran M, Farrelly W, Curran K. A method for verifying integrity & authenticating digital media. Appl Comput Inform. 2018;14(2):145\u201358.","journal-title":"Appl Comput Inform"},{"issue":"4","key":"4000_CR17","first-page":"1082","volume":"8","author":"M McClelland","year":"2002","unstructured":"McClelland M, McArthur D, Giersch S, Geisler G. Challenges for service providers when importing metadata in digital libraries. D-Lib Mag. 2002;8(4):1082\u20139873.","journal-title":"D-Lib Mag"},{"key":"4000_CR18","unstructured":"Khan M, Rahman AU. Digital news story preservation framework. In: Digital Libraries: Providing Quality Information: 17th International Conference on Asia-Pacific Digital Libraries, ICADL 2015, Seoul, Korea, December 9-12, 2015. Proceedings, vol. 9469, p. 350 (2015). Springer"},{"issue":"1","key":"4000_CR19","first-page":"71","volume":"38","author":"M Khan","year":"2019","unstructured":"Khan M, Rahman AU. A systematic approach towards web preservation. Inf Technol Libr. 2019;38(1):71\u201390.","journal-title":"Inf Technol Libr"},{"issue":"1","key":"4000_CR20","first-page":"140","volume":"32","author":"M Khan","year":"2017","unstructured":"Khan M, Rahman AU, Awan MD. Exploring the digital world of newspaper archives. Sci Technol J Portugal. 2017;32(1):140\u201364.","journal-title":"Sci Technol J Portugal"},{"key":"4000_CR21","doi-asserted-by":"crossref","unstructured":"Khan M, Rahman AU, Awan MD. Term-based approach for linking digital news stories. In: Italian Research Conference on Digital Libraries, pp. 127\u2013138 (2018). Springer","DOI":"10.1007\/978-3-319-73165-0_13"},{"key":"4000_CR22","doi-asserted-by":"crossref","unstructured":"Khan M, Khan SS, Ahmad A, Rahman AU. The role of news title for linking during preservation process in digital archives. Library Hi Tech (2020)","DOI":"10.1108\/LHT-07-2020-0157"},{"key":"4000_CR23","doi-asserted-by":"crossref","unstructured":"Khan M, Rahman AU, Ahmad A, Khan SS. A content-based technique for linking dual language news articles in an archive. J Inf Sci 0165551520937614 (2020)","DOI":"10.1177\/0165551520937614"},{"key":"4000_CR24","doi-asserted-by":"publisher","unstructured":"Syed MA, Rahman AU, Khan M. Quantifying the use of english words in urdu news-stories. In: 2019 International Conference on Asian Language Processing (IALP), pp. 1\u20136 (2019). https:\/\/doi.org\/10.1109\/IALP48816.2019.9037734","DOI":"10.1109\/IALP48816.2019.9037734"},{"issue":"7","key":"4000_CR25","doi-asserted-by":"publisher","first-page":"4435","DOI":"10.3390\/app13074435","volume":"13","author":"M Khan","year":"2023","unstructured":"Khan M, Khan SS, Alharbi Y, Alferaidi A, Alharbi TS, Yadav K. The role of transliterated words in linking bilingual news articles in an archive. Appl Sci. 2023;13(7):4435.","journal-title":"Appl Sci"},{"issue":"4","key":"4000_CR26","doi-asserted-by":"publisher","first-page":"215824402312013","DOI":"10.1177\/21582440231201368","volume":"13","author":"M Khan","year":"2023","unstructured":"Khan M, Alharbi Y, Alferaidi A, Alharbi TS, Yadav K. Metadata for efficient management of digital news articles in multilingual news archives. SAGE Open. 2023;13(4):21582440231201370.","journal-title":"SAGE Open"},{"issue":"15","key":"4000_CR27","doi-asserted-by":"publisher","first-page":"8566","DOI":"10.3390\/app13158566","volume":"13","author":"M Khan","year":"2023","unstructured":"Khan M, Ullah K, Alharbi Y, Alferaidi A, Alharbi TS, Yadav K, Alsharabi N, Ahmad A. Understanding the research challenges in low-resource language and linking bilingual news articles in multilingual news archive. Appl Sci. 2023;13(15):8566.","journal-title":"Appl Sci"},{"key":"4000_CR28","unstructured":"DCMI: Dublin core metadata initiative (dcmi). Official Website http:\/\/dublincore.org\/ (Accessed on 28th Feb, 2025) (2021)"},{"key":"4000_CR29","unstructured":"DCMI: The dublin core metadata initiative (dcmi). Official Website https:\/\/www.ifla.org\/best-practice-for-national-bibliographic-agencies-in-a-digital-age\/node\/8822 (Accessed on 28th Feb, 2025) (2025)"},{"key":"4000_CR30","doi-asserted-by":"crossref","unstructured":"Weibel S. Dublin core metadata for resource discovery. OCLC Online Computer Library Center, In. https:\/\/www.ietf.org\/rfc\/rfc2413.txt(Accessed on 28th Feb, 2025) (1998)","DOI":"10.17487\/rfc2413"},{"key":"4000_CR31","unstructured":"NISO-Press: Understanding metadata. National Information Standards, NISO (2004)"},{"key":"4000_CR32","unstructured":"MODS: Metadata object description schema (mods). Official Website http:\/\/www.loc.gov\/standards\/mods\/ (Accessed on 28th Feb, 2025) (2025)"},{"key":"4000_CR33","unstructured":"MODS: Metadata object description schema (mods). Official Website https:\/\/www.loc.gov\/standards\/mods\/mods-overview.html (Accessed on 28th Feb, 2025) (2025)"},{"key":"4000_CR34","doi-asserted-by":"crossref","unstructured":"McCallum SH. An introduction to the metadata object description schema (mods). Library hi tech (2004)","DOI":"10.1108\/07378830410524521"},{"key":"4000_CR35","unstructured":"MODS: Mods overview. Official Website https:\/\/www.loc.gov\/standards\/mods\/mods-overview.html (Accessed on 28th Feb, 2025) (2025)"},{"key":"4000_CR36","first-page":"8","volume":"3","author":"CA Lee","year":"2010","unstructured":"Lee CA. Open archival information system (oais) reference model. Encycl Libr Inf Sci. 2010;3:8.","journal-title":"Encycl Libr Inf Sci"},{"key":"4000_CR37","doi-asserted-by":"publisher","first-page":"093","DOI":"10.1093\/database\/baae093","volume":"2024","author":"OO Amusat","year":"2024","unstructured":"Amusat OO, Hegde H, Mungall CJ, Giannakou A, Byers NP, Gunter D, Fagnan K, Ramakrishnan L. Automated annotation of scientific texts for ml-based keyphrase extraction and validation. Database. 2024;2024:093.","journal-title":"Database"},{"key":"4000_CR38","unstructured":"Bouabdallah A, Gavilan J, Gerbl J, Patumcharoenpol P. Multimodal approach for metadata extraction from german scientific publications. arXiv preprint arXiv:2111.05736 (2021)"},{"key":"4000_CR39","doi-asserted-by":"crossref","unstructured":"Tareque\u00a0Shohan F, Tafseer\u00a0Nayeem M, Islam S, Ubaida\u00a0Akash A, Joty S. Xl-headtags: Leveraging multimodal retrieval augmentation for the multilingual generation of news headlines and tags. arXiv e-prints, 2406 (2024)","DOI":"10.18653\/v1\/2024.findings-acl.771"},{"key":"4000_CR40","unstructured":"Huang AY, Nair A, Goh ZR, Liu T. Web archives metadata generation with gpt-4o: Challenges and insights. arXiv preprint arXiv:2411.05409 (2024)"},{"key":"4000_CR41","doi-asserted-by":"publisher","first-page":"22778","DOI":"10.1109\/ACCESS.2024.3363879","volume":"12","author":"S Pongpaichet","year":"2024","unstructured":"Pongpaichet S, Sukosit B, Duangtanawat C, Jamjongdamrongkit J, Mahacharoensuk C, Matangkarat K, Singhajan P, Noraset T, Tuarob S. Camelon: A system for crime metadata extraction and spatiotemporal visualization from online news articles. IEEE Access. 2024;12:22778\u2013802.","journal-title":"IEEE Access"}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-025-04000-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-025-04000-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-025-04000-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,2]],"date-time":"2025-05-02T19:09:27Z","timestamp":1746212967000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-025-04000-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,2]]},"references-count":41,"journal-issue":{"issue":"5","published-online":{"date-parts":[[2025,6]]}},"alternative-id":["4000"],"URL":"https:\/\/doi.org\/10.1007\/s42979-025-04000-6","relation":{},"ISSN":["2661-8907"],"issn-type":[{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,5,2]]},"assertion":[{"value":"1 January 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 April 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The author(s) declared no potential Conflict of interest with respect to the research, authorship, and\/or publication of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This article does not contain any studies with human or animal participants performed by any of the authors.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}}],"article-number":"437"}}