{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,4]],"date-time":"2026-02-04T16:34:27Z","timestamp":1770222867382,"version":"3.49.0"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2016,7,16]],"date-time":"2016-07-16T00:00:00Z","timestamp":1468627200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"International Internet Preservation Consortium","award":["NONE"],"award-info":[{"award-number":["NONE"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Digit Libr"],"published-print":{"date-parts":[[2016,9]]},"DOI":"10.1007\/s00799-016-0184-4","type":"journal-article","created":{"date-parts":[[2016,7,16]],"date-time":"2016-07-16T06:23:24Z","timestamp":1468650204000},"page":"223-238","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["Web archive profiling through CDX summarization"],"prefix":"10.1007","volume":"17","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8267-3326","authenticated-orcid":false,"given":"Sawood","family":"Alam","sequence":"first","affiliation":[]},{"given":"Michael L.","family":"Nelson","sequence":"additional","affiliation":[]},{"given":"Herbert","family":"Van de Sompel","sequence":"additional","affiliation":[]},{"given":"Lyudmila L.","family":"Balakireva","sequence":"additional","affiliation":[]},{"given":"Harihar","family":"Shankar","sequence":"additional","affiliation":[]},{"given":"David S. H.","family":"Rosenthal","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2016,7,16]]},"reference":[{"key":"184_CR1","unstructured":"Alam, S., Cartledge, C.L., Nelson, M.L.: Support for various HTTP methods on the web. Tech. Rep (2014). arXiv:1405.2330"},{"key":"184_CR2","unstructured":"Alam, S., Kreymer, I., Nelson, M.L.: Object resource stream (ORS) and CDX-JSON (CDXJ) draft (2015). https:\/\/github.com\/oduwsdl\/ORS"},{"key":"184_CR3","doi-asserted-by":"crossref","unstructured":"Alam, S., Nelson, M.L., Van de Sompel, H., Balakireva, L., Shankar, H., Rosenthal, D.S.H.: Web archive profiling through CDX summarization. In: Proceedings of 19th international conference on theory and practice of digital libraries. TPDL 2015, vol. 9316, pp. 3\u201314. Pozna\u0144, Poland (2015)","DOI":"10.1007\/978-3-319-24592-8_1"},{"issue":"3\u20134","key":"184_CR4","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1007\/s00799-014-0111-5","volume":"14","author":"Y AlNoamany","year":"2014","unstructured":"AlNoamany, Y., AlSum, A., Weigle, M.C., Nelson, M.L.: Who and what links to the Internet Archive. Int. J. Digit. Librar. 14(3\u20134), 101\u2013115 (2014)","journal-title":"Int. J. Digit. Librar."},{"key":"184_CR5","first-page":"60","volume":"2013","author":"A AlSum","year":"2013","unstructured":"AlSum, A., Weigle, M.C., Nelson, M.L., Van de Sompel, H.: Profiling web archive coverage for top-level domain and content language. Proc. Int. Conf. Theory Pract. Digit. Librar. TPDL 2013, 60\u201371 (2013)","journal-title":"Proc. Int. Conf. Theory Pract. Digit. Librar. TPDL"},{"issue":"3\u20134","key":"184_CR6","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1007\/s00799-014-0118-y","volume":"14","author":"A AlSum","year":"2014","unstructured":"AlSum, A., Weigle, M.C., Nelson, M.L., Van de Sompel, H.: Profiling web archive coverage for top-level domain and content language. Int. J. Digit. Librar. 14(3\u20134), 149\u2013166 (2014)","journal-title":"Int. J. Digit. Librar."},{"key":"184_CR7","unstructured":"Ben-Kiki, O., Evans, C., Ingy d\u00f6t Net: YAML Ain\u2019t Markup Language (YAML $$^{{\\rm TM}}$$ TM ) Version 1.2 (2009). http:\/\/www.yaml.org\/spec\/1.2\/spec.html"},{"key":"184_CR8","doi-asserted-by":"publisher","unstructured":"Bornand, N.J., Balakireva, L., Van de Sompel, H.: Routing memento requests using binary classifiers. In: Proceedings of the 16th ACM\/IEEE-CS on joint conference on digital libraries, JCDL \u201916, pp. 63\u201372 (2016). doi: 10.1145\/2910896.2910899","DOI":"10.1145\/2910896.2910899"},{"key":"184_CR9","doi-asserted-by":"crossref","unstructured":"Crockford, D.: The application\/json media type for javascript object notation (JSON). RFC 4627 (2006)","DOI":"10.17487\/rfc4627"},{"key":"184_CR10","doi-asserted-by":"crossref","unstructured":"Deutsch, P.: GZIP file format specification version 4.3. RFC 1952 (1996)","DOI":"10.17487\/rfc1952"},{"issue":"5","key":"184_CR11","doi-asserted-by":"publisher","first-page":"702","DOI":"10.1002\/asi.20524","volume":"58","author":"L Egghe","year":"2007","unstructured":"Egghe, L.: Untangling Herdan\u2019s law and Heaps\u2019 law: mathematical and informetric arguments. J. Am. Soc. Inf. Sci. Technol. 58(5), 702\u2013709 (2007)","journal-title":"J. Am. Soc. Inf. Sci. Technol."},{"issue":"2","key":"184_CR12","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1145\/253262.253299","volume":"26","author":"L Gravano","year":"1997","unstructured":"Gravano, L., Chang, C.C.K., Garc\u00eda-Molina, H., Paepcke, A.: STARTS: stanford proposal for internet meta-searching. SIGMOD Rec. 26(2), 207\u2013218 (1997). doi: 10.1145\/253262.253299","journal-title":"SIGMOD Rec."},{"key":"184_CR13","unstructured":"Internet archive: CDX file format (2003). http:\/\/archive.org\/web\/researcher\/cdx_file_format.php"},{"key":"184_CR14","unstructured":"Internet archive: archive-it -web archiving services for libraries and archives (2006). https:\/\/www.archive-it.org\/"},{"key":"184_CR15","unstructured":"ISO 28500: WARC (Web ARChive) file format (2009). http:\/\/www.digitalpreservation.gov\/formats\/fdd\/fdd000236.shtml"},{"key":"184_CR16","doi-asserted-by":"publisher","unstructured":"Liu, L.: Query routing in large-scale digital library systems. In: 15th International Conference on Data Engineering, 1999. Proceedings, pp. 154\u2013163 (1999). doi: 10.1109\/ICDE.1999.754918","DOI":"10.1109\/ICDE.1999.754918"},{"issue":"1","key":"184_CR17","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1145\/505282.505284","volume":"34","author":"W Meng","year":"2002","unstructured":"Meng, W., Yu, C., Liu, K.L.: Building efficient and effective metasearch engines. ACM Comput. Surv. (CSUR) 34(1), 48\u201389 (2002)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"184_CR18","unstructured":"Mozilla Foundation: Public Suffix List (2015). https:\/\/publicsuffix.org\/"},{"key":"184_CR19","doi-asserted-by":"crossref","unstructured":"Sanderson, R.: Global web archive integration with memento. In: Proceedings of the 12th ACM\/IEEE-CS joint conference on digital libraries, pp. 379\u2013380. ACM, New York (2012)","DOI":"10.1145\/2232817.2232900"},{"key":"184_CR20","unstructured":"Sanderson, R., Van de Sompel, H., Nelson, M.L.: IIPC memento aggregator experiment (2012). http:\/\/www.netpreserve.org\/sites\/default\/files\/resources\/Sanderson.pdf"},{"key":"184_CR21","unstructured":"Sigursson, K., Stack, M., Ranitovic, I.: Heritrix user manual: sort-friendly URI reordering transform (2006). http:\/\/crawler.archive.org\/articles\/user_manual\/glossary.html#surt"},{"key":"184_CR22","unstructured":"Sporny, M., Kellogg, G., Lanthaler, M.: A JSON-based serialization for linked data. W3C Recommendation (2014)"},{"key":"184_CR23","unstructured":"Stanford University Libraries: Stanford Web Archive Portal (2013). https:\/\/swap.stanford.edu\/"},{"key":"184_CR24","doi-asserted-by":"crossref","unstructured":"Sugiura, A., Etzioni, O.: Query routing for web search engines: architecture and experiments. Comput. Netw. 33(1), 417\u2013429 (2000)","DOI":"10.1016\/S1389-1286(00)00059-1"},{"issue":"2","key":"184_CR25","doi-asserted-by":"publisher","first-page":"363","DOI":"10.1109\/TKDE.2013.13","volume":"26","author":"T Tran","year":"2014","unstructured":"Tran, T., Zhang, L.: Keyword query routing. IEEE Trans. Knowl. Data Eng. 26(2), 363\u2013375 (2014)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"184_CR26","doi-asserted-by":"publisher","unstructured":"UK Web Archive: Crawled URL Index JISC UK Web Domain Dataset (1996\u20132013) (2014). doi: 10.5259\/ukwa.ds.2\/cdx\/1","DOI":"10.5259\/ukwa.ds.2\/cdx\/1"},{"key":"184_CR27","doi-asserted-by":"crossref","unstructured":"Van de Sompel, H., Nelson, M.L., Sanderson, R.: HTTP framework for time-based access to resource states\u2014Memento. RFC 7089 (2013)","DOI":"10.17487\/rfc7089"},{"key":"184_CR28","unstructured":"Weka: Attribute-relation file format (ARFF) (2009). http:\/\/weka.wikispaces.com\/ARFF"}],"container-title":["International Journal on Digital Libraries"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-016-0184-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00799-016-0184-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-016-0184-4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-016-0184-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,11]],"date-time":"2019-09-11T03:23:08Z","timestamp":1568172188000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00799-016-0184-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,7,16]]},"references-count":28,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2016,9]]}},"alternative-id":["184"],"URL":"https:\/\/doi.org\/10.1007\/s00799-016-0184-4","relation":{},"ISSN":["1432-5012","1432-1300"],"issn-type":[{"value":"1432-5012","type":"print"},{"value":"1432-1300","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,7,16]]},"assertion":[{"value":"10 January 2016","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 July 2016","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 July 2016","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 July 2016","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}