{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,28]],"date-time":"2025-09-28T12:48:42Z","timestamp":1759063722913},"publisher-location":"Berlin, Heidelberg","reference-count":27,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642030789"},{"type":"electronic","value":"9783642030796"}],"license":[{"start":{"date-parts":[[2009,1,1]],"date-time":"2009-01-01T00:00:00Z","timestamp":1230768000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2009]]},"DOI":"10.1007\/978-3-642-03079-6_15","type":"book-chapter","created":{"date-parts":[[2009,7,23]],"date-time":"2009-07-23T11:40:53Z","timestamp":1248349253000},"page":"185-200","source":"Crossref","is-referenced-by-count":10,"title":["Frequent Itemset Mining for Clustering Near Duplicate Web Documents"],"prefix":"10.1007","author":[{"given":"Dmitry I.","family":"Ignatov","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sergei O.","family":"Kuznetsov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"issue":"5","key":"15_CR1","doi-asserted-by":"publisher","first-page":"434","DOI":"10.3844\/jcssp.2006.434.440","volume":"2","author":"A.M. Hasnah","year":"2006","unstructured":"Hasnah, A.M.: A New Filtering Algorithm for Duplicate Document Based on Concept Analysis. Journal of Computer Science\u00a02(5), 434\u2013440 (2006)","journal-title":"Journal of Computer Science"},{"issue":"10","key":"15_CR2","doi-asserted-by":"publisher","first-page":"1282","DOI":"10.1093\/bioinformatics\/btl099","volume":"22","author":"S. Barkow","year":"2006","unstructured":"Barkow, S., Bleuler, S., Prelic, A., Zimmermann, P., Zitzler, E.: BicAT: a biclustering analysis toolbox. Bioinformatics\u00a022(10), 1282\u20131283 (2006)","journal-title":"Bioinformatics"},{"key":"15_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"615","DOI":"10.1007\/978-3-540-24775-3_73","volume-title":"Advances in Knowledge Discovery and Data Mining","author":"J. Besson","year":"2004","unstructured":"Besson, J., Robardet, C., Boulicaut, J.-F.: Constraint-based mining of formal concepts in transactional data. In: Dai, H., Srikant, R., Zhang, C. (eds.) PAKDD 2004. LNCS, vol.\u00a03056, pp. 615\u2013624. Springer, Heidelberg (2004)"},{"key":"15_CR4","unstructured":"Borgelt, C.: Efficient Implementations of Apriori and Eclat. In: Proc. Workshop on Frequent Itemset Mining Implementations Proceedings of the IEEE ICDM Workshop on Frequent Itemset Mining Implementations (FIMI 2003) (2003)"},{"key":"15_CR5","unstructured":"Broder, A.: On the resemblance and containment of documents. In: Proc. Compression and Complexity of Sequences (SEQS: Sequences 1997)"},{"key":"15_CR6","doi-asserted-by":"crossref","unstructured":"Broder, A., Charikar, M., Frieze, A.M., Mitzenmacher, M.: Min-Wise Independent Permutations. In: Proc. STOC, pp. 327\u2013336 (1998)","DOI":"10.1145\/276698.276781"},{"key":"15_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/3-540-45123-4_1","volume-title":"Combinatorial Pattern Matching","author":"A. Broder","year":"2000","unstructured":"Broder, A.: Identifying and filtering near-duplicate documents. In: Giancarlo, R., Sankoff, D. (eds.) CPM 2000. LNCS, vol.\u00a01848, pp. 1\u201310. Springer, Heidelberg (2000)"},{"key":"15_CR8","unstructured":"Burdick, D., et al.: MAFIA: A Performance Study of mining Maximal Frequent Itemsets. In: Proc. Workshop on Frequent Itemset Mining Implementations Proceedings of the IEEE ICDM Workshop on Frequent Itemset Mining Implementations (FIMI 2003) (2003)"},{"key":"15_CR9","doi-asserted-by":"crossref","unstructured":"Cho, J., Shivakumar, N., Garcia-Molina, H.: Finding replicated web collections. In: Proc. SIGMOD Conference, pp. 355\u2013366 (2000)","DOI":"10.1145\/342009.335429"},{"issue":"2","key":"15_CR10","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1145\/506309.506311","volume":"20","author":"A. Chowdhury","year":"2002","unstructured":"Chowdhury, A., Frieder, O., Grossman, D.A., McCabe, M.C.: Collection statistics for fast duplicate document detection. ACM Transactions on Information Systems\u00a020(2), 171\u2013191 (2002)","journal-title":"ACM Transactions on Information Systems"},{"key":"15_CR11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-59830-2","volume-title":"Formal Concept Analysis: Mathematical Foundations","author":"B. Ganter","year":"1999","unstructured":"Ganter, B., Wille, R.: Formal Concept Analysis: Mathematical Foundations. Springer, Heidelberg (1999)"},{"key":"15_CR12","doi-asserted-by":"crossref","unstructured":"Goethals, B., Zaki, M.: Advances in Frequent Itemset Mining Implementations: Introduction to FIMI 2003. In: Proceedings of the IEEE ICDM Workshop on Frequent Itemset Mining Implementations, FIMI 2003 (2003)","DOI":"10.1145\/1007730.1007744"},{"key":"15_CR13","unstructured":"Grahne, G., Zhu, J.: Efficiently Using Prefix-trees in Mining Frequent Itemsets. In: Proc. FIMI 2003 Workshop (2003)"},{"key":"15_CR14","doi-asserted-by":"crossref","unstructured":"Haveliwala, T.H., Gionis, A., Klein, D., Indyk, P.: Evaluating Strategies for Similarity Search on the Web. In: Proc. WWW 2002, Honolulu, pp. 432\u2013442 (2002)","DOI":"10.1145\/511446.511502"},{"key":"15_CR15","volume-title":"Proc. of the 11th International World Wide Web Conference, WWW 2002","author":"S. Ilyinsky","year":"2002","unstructured":"Ilyinsky, S., Kuzmin, M., Melkov, A., Segalovich, I.: An efficient method to detect duplicates of Web documents with the use of inverted index. In: Proc. of the 11th International World Wide Web Conference, WWW 2002, Honolulu, Hawaii, USA, May 7-11. ACM, New York (2002)"},{"key":"15_CR16","unstructured":"Cluto, G.K.: A Clustering Toolkit. University of Minnesota, Department of Computer Science Minneapolis, MN 55455, Technical Report: 02-017, November 28 (2003)"},{"key":"15_CR17","doi-asserted-by":"crossref","unstructured":"Kolcz, A., Chowdhury, A., Alspector, J.: Improved Robustness of Signature-Based Near-Replica Detection via Lexicon Randomization. In: Kim, W., Kohavi, R., Gehrke, J., DuMouchel, W. (eds.) Proc. KDD 2004, Seattle, pp. 605\u2013610 (2004)","DOI":"10.1145\/1014052.1014127"},{"key":"15_CR18","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1080\/09528130210164170","volume":"14","author":"S.O. Kuznetsov","year":"2002","unstructured":"Kuznetsov, S.O., Obiedkov, S.A.: Comparing Performance of Algorithms for Generating Concept Lattices. Journal of Experimental and Theoretical Artificial Intelligence\u00a014, 189\u2013216 (2002)","journal-title":"Journal of Experimental and Theoretical Artificial Intelligence"},{"key":"15_CR19","unstructured":"Liu, G., Lu, H., Yu, J.X., Wei, W., Xiao, X.: AFOPT: An Efficient Implementation of Pattern Growth Approach. In: Proceedings of the IEEE ICDM Workshop on Frequent Itemset Mining Implementations (FIMI 2003) (2003)"},{"key":"15_CR20","series-title":"LNAI","doi-asserted-by":"publisher","first-page":"372","DOI":"10.1007\/978-3-540-24651-0_31","volume-title":"Concept Lattices","author":"D. Merwe van der","year":"2004","unstructured":"van der Merwe, D., Obiedkov, S.A., Kourie, D.: AddIntent: A New Incremental Algorithm for Constructing Concept Lattices. In: Eklund, P. (ed.) ICFCA 2004. LNCS (LNAI), vol.\u00a02961, pp. 372\u2013385. Springer, Heidelberg (2004)"},{"issue":"1","key":"15_CR21","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1016\/S0306-4379(99)00003-4","volume":"24","author":"N. Pasquier","year":"1999","unstructured":"Pasquier, N., Bastide, Y., Taouil, R., Lakhal, L.: Efficient Mining of Association Rules Using Closed Itemset Lattices. Inform. Syst.\u00a024(1), 25\u201346 (1999)","journal-title":"Inform. Syst."},{"key":"15_CR22","volume-title":"Machine Learning and Applications","author":"M. Potthast","year":"2007","unstructured":"Potthast, M., Stein, B.: New Issues in Near-duplicate Detection, in Data Analysis. In: Machine Learning and Applications. Springer, Heidelberg (2007)"},{"key":"15_CR23","unstructured":"Pugh, W., Henzinger, M.: Detecting duplicate and near-duplicate files, United States Patent 6658423 (December 2, 2003)"},{"key":"15_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"204","DOI":"10.1007\/10704656_13","volume-title":"The World Wide Web and Databases","author":"N. Shivakumar","year":"1999","unstructured":"Shivakumar, N., Garcia-Molina, H.: Finding near-replicas of documents on the web. In: Atzeni, P., Mendelzon, A.O., Mecca, G. (eds.) WebDB 1998. LNCS, vol.\u00a01590, pp. 204\u2013212. Springer, Heidelberg (1999)"},{"key":"15_CR25","doi-asserted-by":"crossref","unstructured":"Xiao, C., Wang, W., Lin, X., Yu, J.X.: Efficient similarity joins for near duplicate detection. In: WWW 2008: Proceeding of the 17th international conference on World Wide Web, Beijing, China, pp. 131\u2013140 (2008)","DOI":"10.1145\/1367497.1367516"},{"key":"15_CR26","doi-asserted-by":"publisher","first-page":"311","DOI":"10.1023\/B:MACH.0000027785.44527.d6","volume":"55","author":"Y. Zhao","year":"2004","unstructured":"Zhao, Y., Karypis, G.: Empirical and Theoretical Comparisons of Selected Criterion Functions for Document Clustering. Machine Learning\u00a055, 311\u2013331 (2004)","journal-title":"Machine Learning"},{"key":"15_CR27","unstructured":"http:\/\/company.yandex.ru\/academic\/grant\/datasets_description.xml"}],"container-title":["Lecture Notes in Computer Science","Conceptual Structures: Leveraging Semantic Technologies"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-03079-6_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,21]],"date-time":"2019-05-21T13:06:51Z","timestamp":1558444011000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-03079-6_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009]]},"ISBN":["9783642030789","9783642030796"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-03079-6_15","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2009]]}}}