{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T15:49:24Z","timestamp":1759938564638,"version":"3.40.4"},"reference-count":59,"publisher":"Springer Science and Business Media LLC","issue":"3-4","license":[{"start":{"date-parts":[[2014,6,27]],"date-time":"2014-06-27T00:00:00Z","timestamp":1403827200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Digit Libr"],"published-print":{"date-parts":[[2014,8]]},"DOI":"10.1007\/s00799-014-0118-y","type":"journal-article","created":{"date-parts":[[2014,6,26]],"date-time":"2014-06-26T13:39:15Z","timestamp":1403789955000},"page":"149-166","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":32,"title":["Profiling web archive coverage for top-level domain and content language"],"prefix":"10.1007","volume":"14","author":[{"given":"Ahmed","family":"AlSum","sequence":"first","affiliation":[]},{"given":"Michele C.","family":"Weigle","sequence":"additional","affiliation":[]},{"given":"Michael L.","family":"Nelson","sequence":"additional","affiliation":[]},{"given":"Herbert","family":"Van de Sompel","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,6,27]]},"reference":[{"key":"118_CR1","unstructured":"ISO 639\u20133. URL http:\/\/www-01.sil.org\/iso639-3\/ . Accessed 30 Oct 2013"},{"key":"118_CR2","doi-asserted-by":"crossref","unstructured":"Ainsworth, S.G., AlSum, A., SalahEldeen, H., Weigle, M.C., Nelson, M.L.: How much of the Web is Archived? In: Proceedings of the 11th annual international ACM\/IEEE Joint Conference on Digital libraries, JCDL \u201911, pp. 133\u2013136 (2011)","DOI":"10.1145\/1998076.1998100"},{"key":"118_CR3","doi-asserted-by":"crossref","unstructured":"AlNoamany, Y., AlSum, A., Weigle, M.C., Nelson, M.L.: Who and what links to the internet archive. In: Proceedings of the 17th International Conference on Theory and Practice of Digital Libraries, TPDL \u201913, pp. 346\u2013357 (2013)","DOI":"10.1007\/978-3-642-40501-3_35"},{"key":"118_CR4","doi-asserted-by":"crossref","unstructured":"AlNoamany, Y., Weigle, M.C., Nelson, M.L.: Access patterns for robots and humans in web archives. In: Proceedings of the 13th ACM\/IEEE-CS Joint Conference on Digital Libraries, pp. 339\u2013348 (2013)","DOI":"10.1145\/2467696.2467722"},{"key":"118_CR5","first-page":"60","volume-title":"Proceeding of the 17th International Conference of Theory of Practice of Digital Libraries, TPDL 2013","author":"A AlSum","year":"2013","unstructured":"AlSum, A., Weigle, M., Nelson, M., Sompel, H.: Profiling Web Archive Coverage for Top-Level Domain and Content Language. In: Aalberg, T., Papatheodorou, C., Dobreva, M., Tsakonas, G., Farrugia, C. (eds.) Proceeding of the 17th International Conference of Theory of Practice of Digital Libraries, TPDL 2013, pp. 60\u201371. Springer, Berlin Heidelberg (2013)"},{"issue":"2","key":"118_CR6","doi-asserted-by":"crossref","first-page":"179","DOI":"10.18352\/lq.7987","volume":"20","author":"S Aubry","year":"2010","unstructured":"Aubry, S.: Introducing web archives as a new library service: the experience of the national library of France. LIBER Q. 20(2), 179\u2013199 (2010)","journal-title":"LIBER Q."},{"key":"118_CR7","volume-title":"Modern Information Retrieval: The Concepts and Technology Behind Search","author":"R Baeza-Yates","year":"2011","unstructured":"Baeza-Yates, R., Riberio-Neto, B.: Modern Information Retrieval: The Concepts and Technology Behind Search, 2nd edn. Addison-Wesley Professional, London (2011)","edition":"2"},{"issue":"1","key":"118_CR8","first-page":"1082","volume":"12","author":"S Bailey","year":"2006","unstructured":"Bailey, S., Thompson, D.: UKWAC building the UK\u2019s first public web archive. D-Lib Mag. 12(1), 1082\u20139873 (2006)","journal-title":"D-Lib Mag."},{"key":"118_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"316","DOI":"10.1007\/11880561_26","volume-title":"String Processing and Information Retrieval SE-26","author":"M Baillie","year":"2006","unstructured":"Baillie, M., Azzopardi, L., Crestani, F.: Adaptive query-based sampling of distributed collections. In: Crestani, F., Ferragina, P., Sanderson, M. (eds.) String Processing and Information Retrieval SE-26. Lecture Notes in Computer Science, vol. 4209, pp. 316\u2013328. Springer, Berlin Heidelberg (2006)"},{"issue":"5","key":"118_CR10","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1145\/1411509.1411514","volume":"55","author":"Z Bar-Yossef","year":"2008","unstructured":"Bar-Yossef, Z., Gurevich, M.: Random sampling from a search engine\u2019s index. J. ACM (JACM) 55(5), 24 (2008)","journal-title":"J. ACM (JACM)"},{"key":"118_CR11","unstructured":"Braden, R.: RFC 1123-Requirements for Internet Hosts-Application and Support (1989). URL http:\/\/www.ietf.org\/rfc\/rfc1123.txt"},{"key":"118_CR12","doi-asserted-by":"crossref","DOI":"10.29085\/9781856049009","volume-title":"Archiving Websites: A Practical Guide for Information Management Professionals","author":"A Brown","year":"2006","unstructured":"Brown, A.: Archiving Websites: A Practical Guide for Information Management Professionals, 1st edn. Facet, London (2006)","edition":"1"},{"key":"118_CR13","volume-title":"Archiving Websites. General Considerations and Strategies","author":"N Br\u00fcgger","year":"2005","unstructured":"Br\u00fcgger, N.: Archiving Websites. General Considerations and Strategies, 1st edn. The Center for Internet Research, Aarhus N (2005)","edition":"1"},{"key":"118_CR14","doi-asserted-by":"crossref","unstructured":"Brunelle, J.F., Nelson, M.L.: An evaluation of caching policies for Memento Timemaps. In: Proceedings of the 13th ACM\/IEEE-CS Joint Conference on Digital Libraries. JCDL \u201913, pp. 267\u2013276. ACM Press, New York (2013)","DOI":"10.1145\/2467696.2467717"},{"key":"118_CR15","first-page":"127","volume-title":"Advances in Information Retrieval SE-5, The Information Retrieval Series","author":"J Callan","year":"2000","unstructured":"Callan, J.: Distributed information retrieval. In: Croft, W. (ed.) Advances in Information Retrieval SE-5, The Information Retrieval Series, vol. 7, pp. 127\u2013150. Springer, New York (2000)"},{"issue":"2","key":"118_CR16","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1145\/382979.383040","volume":"19","author":"J Callan","year":"2001","unstructured":"Callan, J., Connell, M.: Query-based sampling of text databases. ACM Trans. Inform. Syst. 19(2), 97\u2013130 (2001)","journal-title":"ACM Trans. Inform. Syst."},{"issue":"2","key":"118_CR17","doi-asserted-by":"publisher","first-page":"479","DOI":"10.1145\/304181.304224","volume":"28","author":"J Callan","year":"1999","unstructured":"Callan, J., Connell, M., Du, A.: Automatic discovery of language models for text databases. ACM SIGMOD Record 28(2), 479\u2013490 (1999)","journal-title":"ACM SIGMOD Record"},{"key":"118_CR18","doi-asserted-by":"crossref","unstructured":"Callan, J.P., Lu, Z., Croft, W.B.: Searching distributed collections with inference networks. In: Proceedings of the 18th annual international ACM SIGIR conference on Research and development in information retrieval. SIGIR \u201995, pp. 21\u201328. ACM Press, New York (1995)","DOI":"10.1145\/215206.215328"},{"key":"118_CR19","doi-asserted-by":"crossref","unstructured":"Chakrabarti, S., Joshi, M.M., Punera, K., Pennock, D.M.: The structure of broad topics on the web. In: Proceedings of the 11th international conference on World Wide Web. WWW \u201902, pp. 251\u2013260. ACM Press, New York (2002)","DOI":"10.1145\/511446.511480"},{"key":"118_CR20","unstructured":"Chen, K., Chen, Y., Ting, P.: Developing national Taiwan university web archiving system. In: Proceedings of 8th International Web Archiving Workshop, IWAW \u201908 (2008)"},{"key":"118_CR21","unstructured":"Clausen, L.R.: Overview of the Netarkivet web archiving system. In: Proceedings of 6th International Web Archiving Workshop, IWAW \u201906 (2006)"},{"key":"118_CR22","doi-asserted-by":"crossref","unstructured":"Craswell, N., Bailey, P., Hawking, D.: Server selection on the World Wide Web. In: Proceedings of the fifth ACM conference on Digital libraries. DL \u201900, pp. 37\u201346. ACM Press, New York (2000)","DOI":"10.1145\/336597.336628"},{"key":"118_CR23","doi-asserted-by":"crossref","unstructured":"D\u2019Souza, D.J., Thom, J.A., Zobel, J.: A comparison of techniques for selecting text collections. In: Proceedings of 11th Australasian Database Conference, ADC 2000, pp. 28\u201332 (2000)","DOI":"10.1109\/ADC.2000.819810"},{"key":"118_CR24","unstructured":"Gomes, D., Nogueira, A., Miranda, J.a., Costa, M.: Introducing the Portuguese web archive initiative. In: Proceedings of 8th International Web Archiving Workshop, IWAW \u201908 (2008)"},{"issue":"2","key":"118_CR25","doi-asserted-by":"publisher","first-page":"126","DOI":"10.1145\/191843.191869","volume":"23","author":"L Gravano","year":"1994","unstructured":"Gravano, L., Garc\u00eda-Molina, H., Tomasic, A.: The effectiveness of GIOSS for the text database discovery problem. ACM SIGMOD Record 23(2), 126\u2013137 (1994)","journal-title":"ACM SIGMOD Record"},{"issue":"2","key":"118_CR26","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1145\/320248.320252","volume":"24","author":"L Gravano","year":"1999","unstructured":"Gravano, L., Garc\u00eda-Molina, H., Tomasic, A.: GlOSS: text-source discovery over the Internet. ACM Trans. Database Syst. 24(2), 229\u2013264 (1999)","journal-title":"ACM Trans. Database Syst."},{"key":"118_CR27","unstructured":"Grotke, A.: IIPC 2008 Member Profile Survey Results. Tech. rep., International Internet Preservation Consortium Publications (2008). URL http:\/\/www.netpreserve.org\/resources\/2008-iipc-member-profile-survey-results"},{"key":"118_CR28","doi-asserted-by":"crossref","unstructured":"Gulli, A., Signorini, A.: The indexable web is more than 11.5 billion pages. In: International World Wide Web Conference, pp. 902\u2013903 (2005)","DOI":"10.1145\/1062745.1062789"},{"key":"118_CR29","unstructured":"Heslop, H., Davis, S., Wilson, A.: An Approach to the Preservation of Digital Records. Tech. rep., National Archives of Australia (2002). URL http:\/\/www.naa.gov.au\/Images\/An-approach-Green-Paper_tcm16-47161"},{"issue":"3","key":"118_CR30","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1016\/j.datak.2006.08.008","volume":"62","author":"CA Heuser","year":"2007","unstructured":"Heuser, C.A., Mecca, G., Raunich, S., Pappalardo, A.: A new algorithm for clustering search results. Data Knowl. Eng. 62(3), 504\u2013522 (2007)","journal-title":"Data Knowl. Eng."},{"key":"118_CR31","doi-asserted-by":"crossref","unstructured":"Ipeirotis, P.G., Gravano, L.: Distributed search over the hidden web: hierarchical database sampling and selection. In: Proceeding of the 28th Very-Large Database conference, VLDB \u201902, pp. 394\u2013405 (2002)","DOI":"10.1016\/B978-155860869-6\/50042-1"},{"issue":"2","key":"118_CR32","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1145\/376284.375671","volume":"30","author":"PG Ipeirotis","year":"2001","unstructured":"Ipeirotis, P.G., Gravano, L., Sahami, M.: Probe, count, and classify. ACM SIGMOD Record 30(2), 67\u201378 (2001)","journal-title":"ACM SIGMOD Record"},{"key":"118_CR33","unstructured":"Kavcic-colic, A., Grobelnik, M.: Archiving the Slovenian Web : Recent Experiences. In: Proceedings of 4th International Web Archiving Workshop, IWAW \u201904 (2004)"},{"issue":"1","key":"118_CR34","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1109\/TPDS.2004.1264782","volume":"15","author":"R Losee","year":"2004","unstructured":"Losee, R., Church, L.: Information retrieval with distributed databases: analytic models of performance. IEEE Transactions on Parallel and Distributed Systems 15(1), 18\u201327 (2004)","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"118_CR35","doi-asserted-by":"crossref","unstructured":"Lu, J., Callan, J.: Federated search of text-based digital libraries in hierarchical peer-to-peer networks. In: Proceedings of 27th European Conference on Information Retrieval Research, ECIR \u201905, pp. 52\u201366 (2005)","DOI":"10.1007\/978-3-540-31865-1_5"},{"key":"118_CR36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-46332-0","volume-title":"Web Archiving","author":"J Masan\u00e8s","year":"2006","unstructured":"Masan\u00e8s, J.: Web Archiving. Springer, Berlin, Heidelberg (2006)"},{"issue":"1","key":"118_CR37","doi-asserted-by":"publisher","first-page":"48","DOI":"10.1145\/505282.505284","volume":"34","author":"W Meng","year":"2002","unstructured":"Meng, W., Yu, C., Liu, K.L.: Building efficient and effective metasearch engines. ACM Comput. Surv. 34(1), 48\u201389 (2002)","journal-title":"ACM Comput. Surv."},{"key":"118_CR38","first-page":"67b","volume":"3","author":"G Monroe","year":"2002","unstructured":"Monroe, G., French, J., Powell, A.: Obtaining language models of web collections using query-based sampling techniques. Hawaii Int. Conf. Syst. Sci. 3, 67b (2002)","journal-title":"Hawaii Int. Conf. Syst. Sci."},{"key":"118_CR39","doi-asserted-by":"crossref","unstructured":"Niu, J.: An overview of web archiving. D-Lib Mag. 18(3\/4) (2012)","DOI":"10.1045\/march2012-niu1"},{"key":"118_CR40","doi-asserted-by":"crossref","unstructured":"Niu, J.: Functionalities of web archives. D-Lib Mag. 18(3\/4) (2012)","DOI":"10.1045\/march2012-niu2"},{"key":"118_CR41","unstructured":"Phillips, A., Davis, M.: RFC 5646-Tags for Identifying Languages (2009). URL http:\/\/tools.ietf.org\/html\/rfc5646"},{"issue":"4","key":"118_CR42","doi-asserted-by":"publisher","first-page":"412","DOI":"10.1145\/944012.944016","volume":"21","author":"AL Powell","year":"2003","unstructured":"Powell, A.L., French, J.C.: Comparing the performance of collection selection algorithms. ACM Trans. Inform. Syst. 21(4), 412\u2013456 (2003)","journal-title":"ACM Trans. Inform. Syst."},{"key":"118_CR43","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1007\/978-1-4614-1981-5_12","volume-title":"Economics of Information Security and Privacy III SE-12","author":"S Preibusch","year":"2013","unstructured":"Preibusch, S., Bonneau, J.: The privacy landscape: product differentiation on data collection. In: Schneier, B. (ed.) Economics of Information Security and Privacy III SE-12, pp. 263\u2013283. Springer, New York (2013)"},{"key":"118_CR44","unstructured":"Rossi, A.: Fixing Broken Links on the Internet (2013). URL http:\/\/blog.archive.org\/2013\/10\/25\/fixing-broken-links\/"},{"key":"118_CR45","unstructured":"Sanderson, R.: Memento Tools: Proxy Scripts (2010). URL http:\/\/www.mementoweb.org\/tools\/proxy\/"},{"key":"118_CR46","unstructured":"Sanderson, R., Shankar, H., AlSum, A.: Memento aggregator source code (2010). URL https:\/\/code.google.com\/p\/memento-server"},{"issue":"2","key":"118_CR47","first-page":"90","volume":"41","author":"R Shiozaki","year":"2009","unstructured":"Shiozaki, R., Eisenschitz, T.: Role and justification of web archiving by national libraries: a questionnaire survey. J. Libr. Inform. Sci. 41(2), 90\u2013107 (2009)","journal-title":"J. Libr. Inform. Sci."},{"key":"118_CR48","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1007\/11610113_7","volume-title":"Frontiers of WWW Research and Development-APWeb 2006 SE-7","author":"M Shokouhi","year":"2006","unstructured":"Shokouhi, M., Scholer, F., Zobel, J.: Sample sizes for query probing in uncooperative distributed information retrieval. In: Zhou, X., Li, J., Shen, H., Kitsuregawa, M., Zhang, Y. (eds.) Frontiers of WWW Research and Development-APWeb 2006 SE-7. Lecture Notes in Computer Science, vol. 3841, pp. 63\u201375. Springer, Berlin Heidelberg (2006)"},{"issue":"1","key":"118_CR49","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1561\/1500000010","volume":"5","author":"M Shokouhi","year":"2011","unstructured":"Shokouhi, M., Si, L.: Federated search. Found. Trends Inform. Retrieval 5(1), 1\u2013102 (2011)","journal-title":"Found. Trends Inform. Retrieval"},{"key":"118_CR50","doi-asserted-by":"crossref","unstructured":"Si, L., Callan, J.: Modeling search engine effectiveness for federated search. In: Proceedings of the 28th annual international ACM SIGIR conference on Research and development in information retrieval. SIGIR \u201905, pp. 83\u201392. ACM Press, New York (2005)","DOI":"10.1145\/1076034.1076051"},{"key":"118_CR51","doi-asserted-by":"crossref","unstructured":"Stirling, P., Illien, G., Sanz, P., Sepetjan, S.: The state of e-legal deposit in France: looking back at five years of putting new legislation into practice and envisioning the future. In: World Library and Information Congress: 77th IFLA General Conference and Assembly (2011)","DOI":"10.1177\/0340035211435323"},{"key":"118_CR52","doi-asserted-by":"crossref","unstructured":"Thomas, P., Hawking, D.: Evaluating sampling methods for uncooperative collections. In: Proceedings of the 30th annual international ACM SIGIR conference on Research and development in information retrieval, SIGIR \u201907, pp. 503\u2013512 (2007)","DOI":"10.1145\/1277741.1277828"},{"key":"118_CR53","unstructured":"Tofel, B.: \u2018Wayback\u2019 for Accessing Web Archives. In: Proceedings of 7th International Web Archiving Workshop, IWAW \u201907 (2007)"},{"key":"118_CR54","unstructured":"Van de Sompel, H., Nelson, M.L., Sanderson, R.: RFC 7089-HTTP framework for time-based access to resource states-Memento (2013). URL http:\/\/tools.ietf.org\/html\/rfc7089"},{"key":"118_CR55","unstructured":"Van de Sompel, H., Nelson, M.L., Sanderson, R., Balakireva, L.L., Ainsworth, S., Shankar, H.: Memento: Time Travel for the Web. Tech. Rep. arXiv:0911.1112 (2009)"},{"key":"118_CR56","unstructured":"Van de Sompel, H., Sanderson, R., Nelson, M.L., Balakireva, L.L., Shankar, H., Ainsworth, S., Sompel, H.V.D.: An HTTP-based versioning mechanism for linked data. In: Proceedings of the Linked Data on the Web Workshop, LDOW 2010 (2010)"},{"key":"118_CR57","unstructured":"Vlcek, I.: Identification and archiving of the Czech web outside the national domain. In: Proceedings of 8th International Web Archiving Workshop, IWAW \u201908 (2008)"},{"key":"118_CR58","unstructured":"Yan, H., Huang, L., Chen, C., Xie, Z.: A new data storage and service model of China web. In: Proceedings of 4th International Web Archiving Workshop, IWAW \u201904 (2004)"},{"issue":"2","key":"118_CR59","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1111\/j.0824-7935.2005.00271.x","volume":"21","author":"H Zhuge","year":"2005","unstructured":"Zhuge, H., Liu, J., Feng, L., Sun, X., He, C.: Query routing in a peer-to-peer semantic link network. Comput. Intell. 21(2), 197\u2013216 (2005)","journal-title":"Comput. Intell."}],"container-title":["International Journal on Digital Libraries"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-014-0118-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00799-014-0118-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-014-0118-y","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00799-014-0118-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,3]],"date-time":"2025-05-03T13:33:13Z","timestamp":1746279193000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00799-014-0118-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014,6,27]]},"references-count":59,"journal-issue":{"issue":"3-4","published-print":{"date-parts":[[2014,8]]}},"alternative-id":["118"],"URL":"https:\/\/doi.org\/10.1007\/s00799-014-0118-y","relation":{},"ISSN":["1432-5012","1432-1300"],"issn-type":[{"type":"print","value":"1432-5012"},{"type":"electronic","value":"1432-1300"}],"subject":[],"published":{"date-parts":[[2014,6,27]]},"assertion":[{"value":"30 October 2013","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 May 2014","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 May 2014","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 June 2014","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}