{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T19:58:43Z","timestamp":1773863923535,"version":"3.50.1"},"reference-count":58,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2009,6,10]],"date-time":"2009-06-10T00:00:00Z","timestamp":1244592000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Scientometrics"],"published-print":{"date-parts":[[2010,2]]},"DOI":"10.1007\/s11192-009-0046-6","type":"journal-article","created":{"date-parts":[[2009,6,9]],"date-time":"2009-06-09T13:56:31Z","timestamp":1244555791000},"page":"289-306","source":"Crossref","is-referenced-by-count":70,"title":["Exploring the feasibility and accuracy of Latent Semantic Analysis based text mining techniques to detect similarity between patent documents and scientific publications"],"prefix":"10.1007","volume":"82","author":[{"given":"Tom","family":"Magerman","sequence":"first","affiliation":[]},{"given":"Bart","family":"Van Looy","sequence":"additional","affiliation":[]},{"given":"Xiaoyan","family":"Song","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2009,6,10]]},"reference":[{"key":"46_CR1","unstructured":"Atherton, P., & Borko, H. (1965). A test of factor-analytically derived automated classification methods. AIP Report AIP-DRP 65-l."},{"key":"46_CR2","doi-asserted-by":"crossref","unstructured":"Azoulay, P., Ding, W., & Stuart, T. (2006). The impact of academic patenting on the rate, quality and direction of (public) research. NBER Working Paper No. 11917. Cambridge MA: National Bureau of Economic Research.","DOI":"10.3386\/w11917"},{"key":"46_CR3","doi-asserted-by":"crossref","first-page":"665","DOI":"10.1007\/1-4020-2755-9_31","volume-title":"Handbook of quantitative science and technology research. The use of publication and patent statistics in studies of S&T systems","author":"E Bassecoulard","year":"2004","unstructured":"Bassecoulard, E., & Zitt, M. (2004). Patents and publications: The lexical connection. In H. F. Moed, W. Gl\u00e4nzel, & U. Schmoch (Eds.), Handbook of quantitative science and technology research. The use of publication and patent statistics in studies of S&T systems (pp. 665\u2013694). Dordrecht: Kluwer Academic Publishers."},{"key":"46_CR100","volume-title":"Modern information retrieval","author":"R Baeza-Yates","year":"1999","unstructured":"Baeza-Yates, R., & Ribeiro-Neto, B. (1999). Modern information retrieval. New York: ACM Press."},{"key":"46_CR4","volume-title":"Survey of text mining","year":"2003","unstructured":"Berry, M. W. (Ed.). (2003). Survey of text mining. New York: Springer."},{"key":"46_CR5","unstructured":"Berry, M. W., & Browne, M. (1999). Understanding search engines: Mathematical modeling and text retrieval. Philadelphia: Society for Industrial and Applied Mathematics."},{"key":"46_CR6","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei, D. M., Ng, A. Y., & Jordan, M. I. (2003). Latent Dirichlet allocation. Journal of Machine Learning Research, 3, 993\u20131022.","journal-title":"Journal of Machine Learning Research"},{"key":"46_CR7","doi-asserted-by":"crossref","first-page":"151","DOI":"10.1145\/321160.321165","volume":"10","author":"H Borko","year":"1963","unstructured":"Borko, H., & Bemick, M. D. (1963). Automatic document classification. Journal of the ACM, 10, 151\u2013162.","journal-title":"Journal of the ACM"},{"key":"46_CR8","unstructured":"Calderini, M., Franzoni, C., & Vezzulli, A. (2005). If star scientists do not patent: An event history analysis of scientific eminence and the decision to patent in the academic world. CESPRI Working Paper No. 169."},{"issue":"2","key":"46_CR9","doi-asserted-by":"crossref","first-page":"191","DOI":"10.1177\/053901883022002003","volume":"22","author":"M Callon","year":"1983","unstructured":"Callon, M., Courtial, J. P., Turner, W. A., & Bauin, S. (1983). From translations to problematic networks\u2014an introduction to co-word analysis. Social Science Information Sur Les Sciences Sociales, 22(2), 191\u2013235.","journal-title":"Social Science Information Sur Les Sciences Sociales"},{"key":"46_CR10","first-page":"607","volume-title":"Annual review of psychology","author":"JD Carroll","year":"1980","unstructured":"Carroll, J. D., & Arabie, P. (1980). Multidimensional scaling. In M. R. Rosenzweig & L. W. Porter (Eds.), Annual review of psychology (Vol. 31, pp. 607\u2013649). Palo Alto, CA: Annual Reviews, Inc."},{"issue":"3","key":"46_CR12","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1007\/BF02016875","volume":"31","author":"JP Courtial","year":"1994","unstructured":"Courtial, J. P. (1994). A coword analysis of Scientometrics. Scientometrics, 31(3), 251\u2013260.","journal-title":"Scientometrics"},{"issue":"6","key":"46_CR13","doi-asserted-by":"crossref","first-page":"391","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9","volume":"41","author":"S Deerwester","year":"1990","unstructured":"Deerwester, S., Dumais, S., Furnas, G., Landauer, T., & Harshman, R. (1990). Indexing by latent semantic analysis. Journal of the American Society for Information Science, 41(6), 391\u2013407.","journal-title":"Journal of the American Society for Information Science"},{"key":"46_CR14","doi-asserted-by":"crossref","unstructured":"Eckart, C., & Young, G. (1936). The approximation of one matrix by another of lower rank. Psychometrika, I, 211\u2013218.","DOI":"10.1007\/BF02288367"},{"key":"46_CR15","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/0048-7333(94)90024-8","volume":"23","author":"EC Engelsman","year":"1994","unstructured":"Engelsman, E. C., & van Raan, A. F. J. (1994). A patent based cartography of technology. Research Policy, 23, 1\u201326.","journal-title":"Research Policy"},{"key":"46_CR16","unstructured":"European Commission. (2003). Third European Report on S&T Indicators."},{"key":"46_CR17","unstructured":"Fabrizio, K. R., & DiMinin, A. (2005). Commercializing the laboratory: Faculty patenting and the open science environment. Working paper."},{"key":"46_CR18","unstructured":"Gl\u00e4nzel, W., et al. (2004). Biotechnology: An analysis of patents and publications. Report Steunpunt O&O Statistics ( www.steunpuntoos.be )."},{"issue":"6","key":"46_CR19","doi-asserted-by":"crossref","first-page":"1548","DOI":"10.1016\/j.ipm.2005.03.021","volume":"41","author":"P Glenisson","year":"2005","unstructured":"Glenisson, P., Gl\u00e4nzel, W., Janssens, F., & De Moor, B. (2005a). Combining full-text and bibliometric information in mapping scientific disciplines. Information Processing & Management, 41(6), 1548\u20131572.","journal-title":"Information Processing & Management"},{"issue":"1","key":"46_CR20","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1007\/s11192-005-0208-0","volume":"63","author":"P Glenisson","year":"2005","unstructured":"Glenisson, P., Gl\u00e4nzel, W., & Persson, O. (2005b). Combining full-text and bibliometric indicators: A pilot study. Scientometrics, 63(1), 163\u2013180.","journal-title":"Scientometrics"},{"issue":"9","key":"46_CR21","first-page":"4","volume":"7","author":"P Grzybek","year":"2004","unstructured":"Grzybek, P., & Kelih, E. (2004). Anton S. Budilovic (1846\u20131908): A forerunner of quantitative linguistics in Russia? Glottometrics, 7(9), 4\u201397.","journal-title":"Glottometrics"},{"key":"46_CR22","volume-title":"Association for computing machine\u2019s ninth conference on research and development in information retrieval","author":"D Harman","year":"1986","unstructured":"Harman, D. (1986). An experimental study of the factors important in document ranking. In F. Rabbit (Ed.), Association for computing machine\u2019s ninth conference on research and development in information retrieval. New York: Association for Computing Machines."},{"key":"46_CR23","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1002\/(SICI)1097-4571(199101)42:1<7::AID-ASI2>3.0.CO;2-P","volume":"42","author":"D Harman","year":"1991","unstructured":"Harman, D. (1991). Hew effective is suffixing? Journal of the American Society for Information Science, 42, 7\u201315.","journal-title":"Journal of the American Society for Information Science"},{"issue":"3","key":"46_CR24","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1111\/j.1467-9310.1986.tb01305.x","volume":"16","author":"D Hicks","year":"1986","unstructured":"Hicks, D., Martin, B. R., & Irvine, J. (1986). Bibliometric techniques for monitoring performance in technologically oriented research: The case of integrated-optics. R&D Management, 16(3), 211\u2013223.","journal-title":"R&D Management"},{"issue":"2","key":"46_CR25","doi-asserted-by":"crossref","first-page":"313","DOI":"10.1007\/BF02093627","volume":"37","author":"S Hinze","year":"1996","unstructured":"Hinze, S., & Grupp, H. (1996). Mapping of R&D structures in transdisciplinary areas: New biotechnology in food sciences. Scientometrics, 37(2), 313\u2013335.","journal-title":"Scientometrics"},{"key":"46_CR26","unstructured":"Hofmann, T. (1999). Probabilistic latent semantic indexing. In Proceedings of the Twenty-Second Annual International SIGIR Conference (pp. 50\u201357). New York: ACM Press."},{"issue":"6","key":"46_CR27","doi-asserted-by":"crossref","first-page":"1614","DOI":"10.1016\/j.ipm.2006.03.025","volume":"42","author":"F Janssens","year":"2006","unstructured":"Janssens, F., Leta, J., Gl\u00e4nzel, W., & De Moor, B. (2006). Towards mapping library and information science. Information Processing and Management, 42(6), 1614\u20131642.","journal-title":"Information Processing and Management"},{"key":"46_CR28","doi-asserted-by":"crossref","first-page":"217","DOI":"10.1016\/0020-0271(71)90051-9","volume":"7","author":"N Jardin","year":"1971","unstructured":"Jardin, N., & van Rijsbergen, C. J. (1971). The use of hierarchic clustering in information retrieval. Information Storage and Retrieval, 7, 217\u2013240.","journal-title":"Information Storage and Retrieval"},{"key":"46_CR29","unstructured":"Krovets, B. (1995). Word sense disambiguation for large text databases. Ph. D. Thesis. Department of Computer Science, University of Massachusetts Amherst."},{"key":"46_CR101","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1037\/0033-295X.104.2.211","volume":"104","author":"TK Landauer","year":"1997","unstructured":"Landauer, T. K., & Dumais, S. T. (1997). A solution to Plato's problem: The latent semantic analysis theory of acquisition, induction, and representation of knowledge. Psychological Review, 104, 211\u2013240.","journal-title":"Psychological Review"},{"key":"46_CR30","doi-asserted-by":"crossref","first-page":"177","DOI":"10.1177\/016555158100300403","volume":"3","author":"M Lennon","year":"1981","unstructured":"Lennon, M., Pierce, D. S., Tarry, B. D., & Willett, P. (1981). An evaluation of some conflation algorithms for information retrieval. Journal of Information Science, 3, 177\u2013183.","journal-title":"Journal of Information Science"},{"key":"46_CR31","first-page":"187","volume-title":"Handbook of quantitative science and technology research. The use of publication and patent statistics in studies of S&T systems","author":"E Leopold","year":"2004","unstructured":"Leopold, E., May, M., & Paa\u00df, G. (2004). Data mining and text mining for science & technology research. In H. F. Moed, W. Gl\u00e4nzel, & U. Schmoch (Eds.), Handbook of quantitative science and technology research. The use of publication and patent statistics in studies of S&T systems (pp. 187\u2013213). Dordrecht: Kluwer Academic Publishers."},{"issue":"11","key":"46_CR32","doi-asserted-by":"crossref","first-page":"991","DOI":"10.1002\/asi.20045","volume":"55","author":"L Leydesdorff","year":"2004","unstructured":"Leydesdorff, L. (2004). The university-industry knowledge relationship: analyzing patents and the science base of technologies. Journal of the American Society for Information Science and Technology, 55(11), 991\u20131001.","journal-title":"Journal of the American Society for Information Science and Technology"},{"key":"46_CR33","volume-title":"Foundations of statistical natural language processing","author":"CD Manning","year":"2000","unstructured":"Manning, C. D., & Sch\u00fctze, H. (2000). Foundations of statistical natural language processing. Cambridge: MIT Press."},{"issue":"2","key":"46_CR34","doi-asserted-by":"crossref","first-page":"151","DOI":"10.1023\/A:1005692621105","volume":"48","author":"M Meyer","year":"2000","unstructured":"Meyer, M. (2000). Patent citations in a novel field of technology: What can they tell about interactions of emerging communities of science and technology? Scientometrics, 48(2), 151\u2013178.","journal-title":"Scientometrics"},{"issue":"3","key":"46_CR35","doi-asserted-by":"crossref","first-page":"545","DOI":"10.1007\/s11192-006-0129-6","volume":"68","author":"M Meyer","year":"2006","unstructured":"Meyer, M. (2006). Knowledge integrators or weak links? An exploratory comparison of patenting researchers with their non-inventing peers in nano-science and technology. Scientometrics, 68(3), 545\u2013560.","journal-title":"Scientometrics"},{"key":"46_CR36","volume-title":"Information extraction: Algorithms and prospects in a retrieval context (The Information Retrieval Series 21)","author":"MF Moens","year":"2006","unstructured":"Moens, M. F. (2006). Information extraction: Algorithms and prospects in a retrieval context (The Information Retrieval Series 21). New York: Springer."},{"key":"46_CR37","doi-asserted-by":"crossref","unstructured":"Murray, F. & Stern, S. (2005). Do formal intellectual property rights hinder the free flow of scientific knowledge? An empirical test of the anti-commons hypothesis. NBER Working Paper No. 11465. Cambridge, MA: National Bureau of Economic Research.","DOI":"10.3386\/w11465"},{"key":"46_CR38","unstructured":"National Science Foundation (NSF). (2006). Science and Engineering Indicators."},{"issue":"4","key":"46_CR39","doi-asserted-by":"crossref","first-page":"443","DOI":"10.1016\/0048-7333(94)90007-8","volume":"23","author":"ECM Noyons","year":"1994","unstructured":"Noyons, E. C. M., van Raan, A. F. J., Grupp, H., & Schmoch, U. (1994). Exploring the science and technology interface\u2013inventor author relations in laser medicine. Research Policy, 23(4), 443\u2013457.","journal-title":"Research Policy"},{"key":"46_CR40","doi-asserted-by":"crossref","unstructured":"Ossorio, P. G. (1966). Classification space: A multivariate procedure for automatic document indexing and retrieval. Multivariate Behavior Research, 1, 479\u2013524.","DOI":"10.1207\/s15327906mbr0104_6"},{"issue":"3","key":"46_CR41","doi-asserted-by":"crossref","first-page":"130","DOI":"10.1108\/eb046814","volume":"14","author":"MF Porter","year":"1980","unstructured":"Porter, M. F. (1980). An algorithm for suffix stripping. Program, 14(3), 130\u2013137.","journal-title":"Program"},{"key":"46_CR42","unstructured":"Porter, M. F. (2001). Snowball: A language for stemming algorithms. ( www.snowball.tartarus.org\/texts\/introduction.html )."},{"key":"46_CR43","doi-asserted-by":"crossref","first-page":"587","DOI":"10.1007\/1-4020-2755-9_28","volume-title":"Handbook of quantitative science and technology research. The use of publication and patent statistics in studies of S&T systems","author":"AL Porter","year":"2004","unstructured":"Porter, A. L., & Newman, N. C. (2004). Patent profiling for competitive advantage. In H. F. Moed, W. Gl\u00e4nzel, & U. Schmoch (Eds.), Handbook of quantitative science and technology research. The use of publication and patent statistics in studies of S&T systems (pp. 587\u2013612). Dordrecht: Kluwer Academic Publishers."},{"key":"46_CR44","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1007\/978-3-642-86467-4_3","volume-title":"Dynamics of science-based innovation","author":"V Rabeharisoa","year":"1992","unstructured":"Rabeharisoa, V. (1992). A special mediation between science and technology: When inventors publish scientific articles in fuel cells. In H. Grupp (Ed.), Dynamics of science-based innovation (pp. 45\u201372). Berlin: Springer."},{"key":"46_CR45","volume-title":"Automatic information organization and retrieval","author":"G Salton","year":"1968","unstructured":"Salton, G. (1968). Automatic information organization and retrieval. New York: McGraw-Hill."},{"key":"46_CR46","volume-title":"Introduction to modern information retrieval","author":"G Salton","year":"1983","unstructured":"Salton, G., & McGill, M. J. (1983). Introduction to modern information retrieval. New York: McGraw Hill."},{"issue":"11","key":"46_CR47","doi-asserted-by":"crossref","first-page":"613","DOI":"10.1145\/361219.361220","volume":"18","author":"G Salton","year":"1975","unstructured":"Salton, G., Wong, A., & Yang, C. S. (1975). A Vector Space Model for Automatic Indexing. Communications of the ACM, 18(11), 613\u2013620.","journal-title":"Communications of the ACM"},{"key":"46_CR48","first-page":"9","volume-title":"Information retrieval research","author":"G Salton","year":"1981","unstructured":"Salton, G., & Wu, H. (1981). A term weighting model based on utility theory. In R. N. Oddy, S. E. Robertson, C. J. van Rijsbergen, & R. W. Williams (Eds.), Information retrieval research (pp. 9\u201322). Boston: Butterworths."},{"key":"46_CR49","first-page":"717","volume-title":"Handbook of quantitative science and technology research. The use of publication and patent statistics in studies of S&T systems","author":"U Schmoch","year":"2004","unstructured":"Schmoch, U. (2004). The technological output of scientific institutions. In H. F. Moed, W. Gl\u00e4nzel, & U. Schmoch (Eds.), Handbook of quantitative science and technology research. The use of publication and patent statistics in studies of S&T systems (pp. 717\u2013731). Dordrecht: Kluwer Academic Publishers."},{"key":"46_CR50","volume-title":"Automatic keyword classification for information Retrieval","author":"K Sparck Jones","year":"1971","unstructured":"Sparck Jones, K. (1971). Automatic keyword classification for information Retrieval. London: Buttersworth."},{"issue":"4","key":"46_CR51","doi-asserted-by":"crossref","first-page":"596","DOI":"10.1016\/j.respol.2006.02.003","volume":"35","author":"B Looy Van","year":"2006","unstructured":"Van Looy, B., Callaert, J., & Debackere, K. (2006). Publication and patent behavior of academic researchers: Conflicting, reinforcing or merely co-existing? Research Policy, 35(4), 596\u2013608.","journal-title":"Research Policy"},{"key":"46_CR52","doi-asserted-by":"crossref","first-page":"425","DOI":"10.1016\/j.respol.2003.09.004","volume":"33","author":"B Looy Van","year":"2004","unstructured":"Van Looy, B., Ranga, M., Callaert, J., Debackere, K., & Zimmermann, E. (2004). Combining entrepreneurial and scientific performance in academia: Towards a compounded and reciprocal Matthew Effect? Research Policy, 33, 425\u2013441.","journal-title":"Research Policy"},{"key":"46_CR53","unstructured":"van Rijsbergen, C. J., Robertson, S. E., & Porter, M. F. (1980). New models in probabilistic information retrieval. London: British Library (British Library Research and Development Report, No. 5587)."},{"key":"46_CR54","unstructured":"Vandromme, D., Magerman, T., Song, X., Van Looy, B., Hoskens, M., Glenisson, P., Thijs, B., Vertomme, J., De Moor, B., & Duflou, J. (2006). A comparative analysis of distance measures and text mining methods supporting domain studies. Paper presented at the Ninth STI indicator conference, Leuven, 2006."},{"issue":"1","key":"46_CR102","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1145\/195705.195717","volume":"13","author":"SKM Wong","year":"1995","unstructured":"Wong, S. K. M., & Yao, Y. Y. (1995). On modeling information retrieval with probabilistic inference. ACM Transactions on Information Systems, 13(1), 69\u201399.","journal-title":"ACM Transactions on Information Systems"},{"key":"46_CR55","unstructured":"Wyllys, R. E. (1975). Measuring scientific prose with rank-frequency (\u2018\u2018Zipf\u2019\u2019) curves: A new use for an old phenomenon. Proceedings of the American Society for Information Science, 12, 30\u201331."},{"key":"46_CR56","volume-title":"Human behavior and the principle of least effort: An introduction to human ecology","author":"GK Zipf","year":"1949","unstructured":"Zipf, G. K. (1949). Human behavior and the principle of least effort: An introduction to human ecology. Cambridge: Addison-Wesley."}],"container-title":["Scientometrics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11192-009-0046-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11192-009-0046-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11192-009-0046-6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,19]],"date-time":"2020-05-19T12:21:02Z","timestamp":1589890862000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11192-009-0046-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,6,10]]},"references-count":58,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2010,2]]}},"alternative-id":["46"],"URL":"https:\/\/doi.org\/10.1007\/s11192-009-0046-6","relation":{},"ISSN":["0138-9130","1588-2861"],"issn-type":[{"value":"0138-9130","type":"print"},{"value":"1588-2861","type":"electronic"}],"subject":[],"published":{"date-parts":[[2009,6,10]]}}}