{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T04:53:25Z","timestamp":1778734405774,"version":"3.51.4"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2000,7,1]],"date-time":"2000-07-01T00:00:00Z","timestamp":962409600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2000,7,1]],"date-time":"2000-07-01T00:00:00Z","timestamp":962409600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Information Retrieval"],"published-print":{"date-parts":[[2000,7]]},"DOI":"10.1023\/a:1009953814988","type":"journal-article","created":{"date-parts":[[2002,12,23]],"date-time":"2002-12-23T00:25:11Z","timestamp":1040603111000},"page":"127-163","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":952,"title":["Automating the Construction of Internet Portals with Machine Learning"],"prefix":"10.1007","volume":"3","author":[{"given":"Andrew Kachites","family":"McCallum","sequence":"first","affiliation":[]},{"given":"Kamal","family":"Nigam","sequence":"additional","affiliation":[]},{"given":"Jason","family":"Rennie","sequence":"additional","affiliation":[]},{"given":"Kristie","family":"Seymore","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"266400_CR1","unstructured":"Baker D, Hofmann T, McCallum A and Yang Y (1999) A hierarchical probabilistic model for novelty detection in text. Tech. Rep., Just Research. http:\/\/www.cs.cmu.edu\/\u00bbmccallum."},{"key":"266400_CR2","first-page":"1","volume":"3","author":"LE Baum","year":"1972","unstructured":"Baum LE (1972) An inequality and associated maximization technique in statistical estimation of probabilistic functions of a Markov process. Inequalities, 3:1-8.","journal-title":"Inequalities"},{"key":"266400_CR3","volume-title":"Dynamic Programming","author":"RE Bellman","year":"1957","unstructured":"Bellman RE (1957) Dynamic Programming. Princeton University Press, Princeton, NJ."},{"key":"266400_CR4","doi-asserted-by":"crossref","unstructured":"Bikel DM, Miller S, Schwartz R and Weischedel R (1997) Nymble: A high-performance learning name-finder. In: Procedings of the Fifth Conference on Applied Natural Language Processing (ANLP-97), pp. 194-201.","DOI":"10.3115\/974557.974586"},{"key":"266400_CR5","doi-asserted-by":"crossref","unstructured":"Blum A and Mitchell T (1998) Combining labeled and unlabeled data with co-training. In: Proceedings of the 11th Annual Conference on Computational Learning Theory (COLT '98), pp. 92-100.","DOI":"10.1145\/279943.279962"},{"key":"266400_CR6","unstructured":"Boyan J, Freitag D and Joachims T (1996) A machine learning architecture for optimizing web search engines. In: AAAI-96 Workshop on Internet-Based Information Systems."},{"key":"266400_CR7","unstructured":"Chakrabarti S, van der Berg M and Dom B (1999) Focused crawling: A new approach to topic-specific Web resource discovery. In: Proceedings of 8th International World Wide Web Conference (WWW8)."},{"key":"266400_CR8","unstructured":"Chang H, Cohn D and McCallum A (1999) Creating customized authority lists. http:\/\/www.cs.cmu.edu\/~mccallum."},{"key":"266400_CR9","unstructured":"Chen SF and Goodman JT (1998) An empirical study of smoothing techniques for language modeling. Tech. Rep. TR-10-98, Computer Science Group, Harvard University."},{"key":"266400_CR10","doi-asserted-by":"crossref","unstructured":"Cho J, Garcia-Molina H and Page L (1998) Efficient crawling through URL ordering. In: Proceedings of the Seventh World-Wide Web Conference (WWW7).","DOI":"10.1016\/S0169-7552(98)00108-1"},{"key":"266400_CR11","doi-asserted-by":"crossref","unstructured":"Cohen W(1998) A web-based information system that reasons with structured collections of text. In: Proceedings of the Second International Conference on Autonomous Agents (Agents '98), pp. 400-407.","DOI":"10.1145\/280765.280870"},{"key":"266400_CR12","doi-asserted-by":"crossref","unstructured":"Cohen Wand Fan W(1999) Learning page-independent heuristics for extracting data from web pages. In: AAAI Spring Symposium on Intelligent Agents in Cyberspace.","DOI":"10.1016\/S1389-1286(99)00047-X"},{"key":"266400_CR13","unstructured":"Craven M, DiPasquo D, Freitag D, McCallum A, Mitchell T, Nigam K and Slattery S (1998) Learning to extract symbolic knowledge from the World Wide Web. In: Proceedings of the Fifteenth National Conference on Artificial Intelligence (AAAI-98), pp. 509-516."},{"issue":"1","key":"266400_CR14","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"AP Dempster","year":"1977","unstructured":"Dempster AP, Laird NM and Rubin DB (1977) Maximum likelihood from incomplete data via the EM algorithm. Journal of the Royal Statistical Society, Series B, 39(1):1-38.","journal-title":"Journal of the Royal Statistical Society"},{"key":"266400_CR15","unstructured":"Freitag D and McCallum A (1999) Information extraction with HMMs and shrinkage. In: Proceedings of the AAAI-99 Workshop on Machine Learning for Information Extraction."},{"key":"266400_CR16","doi-asserted-by":"crossref","unstructured":"Giles CL, Bollacker KD and Lawrence S (1998) CiteSeer: An autonomous citation indexing system. In: Digital Libraries 98-Third ACM Conference on Digital Libraries, pp. 89-98.","DOI":"10.1145\/276675.276685"},{"key":"266400_CR17","unstructured":"Hofmann T and Puzicha J (1998) Statistical models for co-occurrence data. Tech. Rep. AI Memo 1625, Artificial Intelligence Laboratory, MIT."},{"key":"266400_CR18","unstructured":"Joachims T, Freitag D and Mitchell T (1997)Webwatcher: A tour guide for theWorldWideWeb. In: Proceedings of the Fifteenth International Joint Conference on Artificial Intelligence (IJCAI-97), pp. 770-777."},{"key":"266400_CR19","doi-asserted-by":"crossref","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"LP Kaelbling","year":"1996","unstructured":"Kaelbling LP, Littman ML and Moore AW (1996) Reinforcement learning: A survey. Journal of Artificial Intelligence Research, 4:237-285.","journal-title":"Journal of Artificial Intelligence Research"},{"key":"266400_CR20","unstructured":"Kearns M, Mansour Y and Ng A (2000) Approximate planning in large POMDPs via reusable trajectories. In: Advances in Neural Information Processing Systems 12. The MIT Press."},{"key":"266400_CR21","doi-asserted-by":"crossref","unstructured":"Kleinberg J (1999) Authoritative sources in a hyperlinked environment. Journal of the ACM, 46.","DOI":"10.1145\/324133.324140"},{"key":"266400_CR22","doi-asserted-by":"crossref","first-page":"225","DOI":"10.1016\/0885-2308(92)90019-Z","volume":"6","author":"J Kupiec","year":"1992","unstructured":"Kupiec J (1992) Robust part-of-speech tagging using a hidden Markov model. Computer Speech and Language, 6:225-242.","journal-title":"Computer Speech and Language"},{"issue":"6","key":"266400_CR23","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1109\/2.769447","volume":"32","author":"S Lawrence","year":"1999","unstructured":"Lawrence S, Giles CL and Bollacker K. (1999) Digital libraries and autonomous citation indexing. IEEE Computer, 32(6), 67-71.","journal-title":"IEEE Computer"},{"key":"266400_CR24","unstructured":"Leek TR (1997) Information extraction using hidden Markov models. Master's Thesis, UC San Diego."},{"key":"266400_CR25","doi-asserted-by":"crossref","unstructured":"Lewis DD (1998) Naive (Bayes) at forty: The independence assumption in information retrieval. In: Machine Learning: ECML-98, Tenth European Conference on Machine Learning, pp. 4-15.","DOI":"10.1007\/BFb0026666"},{"key":"266400_CR26","unstructured":"McCallum A and Nigam K (1998) A comparison of event models for naive Bayes text classification. In: AAAI-98 Workshop on Learning for Text Categorization. http:\/\/www.cs.cmu.edu\/~mccallum."},{"key":"266400_CR27","unstructured":"McCallum A, Rosenfeld R, Mitchell T and Ng A (1998) Improving text clasification by shrinkage in a hierarchy of classes. In: Machine Learning: Proceedings of the Fifteenth International Conference (ICML '98), pp. 359-367."},{"key":"266400_CR28","volume-title":"Mixture Models","author":"G McLachlan","year":"1988","unstructured":"McLachlan G and Basford K (1988) Mixture Models. Marcel Dekker, New York."},{"key":"266400_CR29","unstructured":"Menczer F (1997) ARACHNID: Adaptive retrieval agents choosing heuristic neighborhoods for information discovery. In: Machine Learning: Proceedings of the Fourteenth International Conference (ICML '97), pp. 227-235."},{"issue":"2","key":"266400_CR30","first-page":"155","volume":"20","author":"B Merialdo","year":"1994","unstructured":"Merialdo B (1994) Tagging english text with a probabilistic model. Computational Linguistics, 20(2):155-171.","journal-title":"Computational Linguistics"},{"key":"266400_CR31","volume-title":"Machine Learning","author":"TM Mitchell","year":"1997","unstructured":"Mitchell TM (1997) Machine Learning. McGraw-Hill, New York."},{"issue":"1","key":"266400_CR32","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1006\/csla.1994.1001","volume":"8","author":"H Ney","year":"1994","unstructured":"Ney H, Essen U and Kneser R (1994) On structuring probabilistic dependencies in stochastic language modeling. Computer Speech and Language, 8(1):1-38.","journal-title":"Computer Speech and Language"},{"key":"266400_CR33","doi-asserted-by":"crossref","unstructured":"Nigam K, McCallum A, Thrun S and Mitchell T (2000) Text classification from labeled and unlabeled documents using EM. Machine Learning, 39.","DOI":"10.1023\/A:1007692713085"},{"issue":"2","key":"266400_CR34","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1109\/5.18626","volume":"77","author":"LR Rabiner","year":"1989","unstructured":"Rabiner LR (1989) A tutorial on hidden Markov models and selected applications in speech recognition. Proceedings of the IEEE, 77(2):257-286.","journal-title":"Proceedings of the IEEE"},{"key":"266400_CR35","unstructured":"Riloff E and Jones R (1999) Learning dictionaries for information extraction using multi-level boot-strapping. In: Proceedings of the Sixteenth National Conference on Artificial Intellligence (AAAI-99), pp.474-479."},{"key":"266400_CR36","unstructured":"Stolcke A, Shriberg E, Bates R, Coccaro N, Jurafsky D, Martin R, Meteer M, Ries K, Taylor P and Ess-Dykema CV (1998) Dialog act modeling for conversational speech. In: AAAI Spring Symposium on Applying Machine Learning to Discourse Processing, pp. 98-105."},{"key":"266400_CR37","first-page":"9","volume":"3","author":"RS Sutton","year":"1988","unstructured":"Sutton RS (1988) Learning to predict by the methods of temporal differences. Machine Learning, 3:9-44.","journal-title":"Machine Learning"},{"key":"266400_CR38","unstructured":"Tesauro G and Galperin GR (1997) On-line policy improvement using monte-carlo search. In: Advances in Neural Information Processing Systems 9, The MIT Press, pp. 1068-1074."},{"issue":"4","key":"266400_CR39","doi-asserted-by":"crossref","first-page":"275","DOI":"10.3233\/IDA-1997-1405","volume":"1","author":"L Torgo","year":"1997","unstructured":"Torgo L and Gama J (1997) Regression using classification algorithms. Intelligent Data Analysis, 1(4):275-292.","journal-title":"Intelligent Data Analysis"},{"key":"266400_CR40","doi-asserted-by":"crossref","first-page":"260","DOI":"10.1109\/TIT.1967.1054010","volume":"IT-13","author":"AJ Viterbi","year":"1967","unstructured":"Viterbi AJ (1967) Error bounds for convolutional codes and an asymtotically optimum decoding algorithm. IEEE Transactions on Information Theory, IT-13, 260-269.","journal-title":"IEEE Transactions on Information Theory"},{"issue":"4","key":"266400_CR41","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1145\/273035.273057","volume":"41","author":"IH Witten","year":"1998","unstructured":"Witten IH, Nevill-Manning C, McNab R and Cunnningham SJ (1998) A public digital library based on full-text retrieval: Collections and experience. Communications of the ACM, 41(4):71-75.","journal-title":"Communications of the ACM"},{"key":"266400_CR42","unstructured":"Yamron J, Carp I, Gillick L, Lowe S and van Mulbregt, P. (1998) A hidden Markov model approach to text segmentation and event tracking. In: Procedings of International Conference on Acoustics, Speech and Signal Processing (ICASSP-98), Seattle, Washington."},{"key":"266400_CR43","doi-asserted-by":"crossref","unstructured":"Yarowsky D (1995) Unsupervised word sense disambiguation rivaling supervised methods. In: Proceedings of the 33rd Annual Meeting of the Association for Computational Linguistics (ACL-95), pp. 189-196.","DOI":"10.3115\/981658.981684"}],"container-title":["Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1009953814988.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1009953814988\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1009953814988.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,27]],"date-time":"2025-05-27T12:18:13Z","timestamp":1748348293000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1009953814988"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2000,7]]},"references-count":43,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2000,7]]}},"alternative-id":["266400"],"URL":"https:\/\/doi.org\/10.1023\/a:1009953814988","relation":{},"ISSN":["1386-4564","1573-7659"],"issn-type":[{"value":"1386-4564","type":"print"},{"value":"1573-7659","type":"electronic"}],"subject":[],"published":{"date-parts":[[2000,7]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}