r:document_classification
This is an old revision of the document!
Docu Classification
> c <- crude > c <- tm_map(c, content_transformer(tolower)) > c <- tm_map(c, content_transformer(removeNumbers)) > c <- tm_map(c, content_transformer(removeNumbers)) > x <- TermDocumentMatrix(c) > inspect(x[1:10, 1:10]) <<TermDocumentMatrix (terms: 10, documents: 10)>> Non-/sparse entries: 7/93 Sparsity : 93% Maximal term length: 10 Weighting : term frequency (tf) Docs Terms 127 144 191 194 211 236 237 242 246 248 -/, 0 0 0 0 0 0 0 4 0 0 -foot 0 0 0 0 0 0 0 0 0 0 -hour 0 0 0 0 0 0 0 0 0 0 -member 0 0 0 0 0 0 0 1 0 0 -nation 0 0 0 0 0 1 0 0 0 0 "(it) 0 0 0 0 0 0 1 0 0 0 "demand 0 1 0 0 0 0 0 0 0 0 "expansion 0 0 0 0 0 0 0 0 0 0 "for 0 0 0 0 0 0 1 0 0 0 "growth 0 0 0 0 0 0 1 0 0 0 > c <- tm_map(c, content_transformer(removePunctuation)) > x <- TermDocumentMatrix(c) > inspect(x[1:10, 1:10]) <<TermDocumentMatrix (terms: 10, documents: 10)>> Non-/sparse entries: 13/87 Sparsity : 87% Maximal term length: 9 Weighting : term frequency (tf) Docs Terms 127 144 191 194 211 236 237 242 246 248 abdulaziz 0 0 0 0 0 0 0 0 5 0 ability 0 2 0 0 0 3 0 0 0 0 able 0 0 0 0 0 0 0 0 0 0 about 0 1 0 0 1 0 1 0 2 2 above 0 2 0 0 0 3 0 0 0 2 abroad 0 0 0 0 0 1 0 0 0 0 accept 0 0 0 0 0 0 0 0 0 0 accord 0 0 0 0 0 0 0 0 0 5 according 0 0 0 0 0 0 0 0 0 0 across 0 0 0 0 0 0 0 0 0 0 > c <- crude > c <- tm_map(c, content_transformer(tolower)) > c <- tm_map(c, content_transformer(removePunctuation)) > c <- tm_map(c, content_transformer(removeNumbers)) > x <- TermDocumentMatrix(c) > inspect(x[1:10, 1:10]) <<TermDocumentMatrix (terms: 10, documents: 10)>> Non-/sparse entries: 13/87 Sparsity : 87% Maximal term length: 9 Weighting : term frequency (tf) Docs Terms 127 144 191 194 211 236 237 242 246 248 abdulaziz 0 0 0 0 0 0 0 0 5 0 ability 0 2 0 0 0 3 0 0 0 0 able 0 0 0 0 0 0 0 0 0 0 about 0 1 0 0 1 0 1 0 2 2 above 0 2 0 0 0 3 0 0 0 2 abroad 0 0 0 0 0 1 0 0 0 0 accept 0 0 0 0 0 0 0 0 0 0 accord 0 0 0 0 0 0 0 0 0 5 according 0 0 0 0 0 0 0 0 0 0 across 0 0 0 0 0 0 0 0 0 0 > > y <- TermDocumentMatrix(x, control=list(weighting=weightTfIdf)) Error in UseMethod("TermDocumentMatrix", x) : no applicable method for 'TermDocumentMatrix' applied to an object of class "c('TermDocumentMatrix', 'simple_triplet_matrix')" > y <- TermDocumentMatrix(c, control=list(weighting=weightTfIdf)) > inspect(y[1:10, 1:10]) <<TermDocumentMatrix (terms: 10, documents: 10)>> Non-/sparse entries: 13/87 Sparsity : 87% Maximal term length: 9 Weighting : term frequency - inverse document frequency (normalized) (tf-idf) Docs Terms 127 144 191 194 211 236 abdulaziz 0 0.000000000 0 0 0.0000000 0.00000000 ability 0 0.015079700 0 0 0.0000000 0.02268204 able 0 0.000000000 0 0 0.0000000 0.00000000 about 0 0.003641675 0 0 0.0181086 0.00000000 above 0 0.012792992 0 0 0.0000000 0.01924250 abroad 0 0.000000000 0 0 0.0000000 0.01193903 accept 0 0.000000000 0 0 0.0000000 0.00000000 accord 0 0.000000000 0 0 0.0000000 0.00000000 according 0 0.000000000 0 0 0.0000000 0.00000000 across 0 0.000000000 0 0 0.0000000 0.00000000 Docs Terms 237 242 246 248 abdulaziz 0.00000000 0 0.08575254 0.000000000 ability 0.00000000 0 0.00000000 0.000000000 able 0.00000000 0 0.00000000 0.000000000 about 0.00378776 0 0.01049149 0.009408741 above 0.00000000 0 0.00000000 0.016526179 abroad 0.00000000 0 0.00000000 0.000000000 accept 0.00000000 0 0.00000000 0.000000000 accord 0.00000000 0 0.00000000 0.048700455 according 0.00000000 0 0.00000000 0.000000000 across 0.00000000 0 0.00000000 0.000000000 > freqTerms <- findFreqTerms(x, lowfreq=10) > freqTerms [1] "about" "and" "are" "barrel" [5] "barrels" "bpd" "but" "crude" [9] "dlrs" "for" "from" "government" [13] "has" "industry" "its" "kuwait" [17] "last" "market" "meeting" "minister" [21] "mln" "new" "not" "official" [25] "oil" "one" "opec" "pct" [29] "price" "prices" "production" "reuter" [33] "said" "saudi" "sheikh" "that" [37] "the" "they" "this" "was" [41] "were" "will" "with" "world" [45] "would" > findAssocs(x, "oil", 0.7) $oil opec named clearly late prices trying 0.87 0.81 0.79 0.79 0.79 0.79 who winter markets said analysts agreement 0.79 0.79 0.78 0.78 0.77 0.76 emergency that above they buyers fixed 0.74 0.74 0.73 0.73 0.71 0.71 through 0.70 >
r/document_classification.1481678347.txt.gz · Last modified: 2016/12/14 09:49 by hkimscil