[R] k-Nearest Neighbors (kNN)

2019. 1. 20. 15:00

K_Nearest_Neighbors__KNN_

k-Nearest Neighbors (kNN)

Evan Jung January 18, 2019

1. The Concept of KNN

What is KNN? The word KNN is the abbreviation of kN(Nearest)N(Neighbors). Then what is K? k is therefore just the number of neighbors “voting” on the test example’s class. If k=1, then test examples are given the same label as the closest example in the training set. If k=3, the labels of the three closest classes are checked and the most common (i.e., occuring at least twice) label is assigned, and so on for larger ks.

Measuring Similarity with distance (ex: color by color)

$dist(p,q) = \sqrt{(p_{1} - q_{1})^2 + (p_{2} - q_{2})^2 + ... + (p_{n} - q_{n})^2}$

library(class) pred <- knn(training_data, testing_data, training_labels)

 library(class)
library(readr)
library(dplyr)

 ## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

 url <- "https://assets.datacamp.com/production/course_2906/datasets/knn_traffic_signs.csv"
signs <- read_csv(url)

 ## Parsed with column specification:
## cols(
##   .default = col_double(),
##   sample = col_character(),
##   sign_type = col_character()
## )
## See spec(...) for full column specifications.

glimpse(signs)

 ## Observations: 206
## Variables: 51
## $ id        <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1...
## $ sample    <chr> "train", "train", "train", "train", "train", "train"...
## $ sign_type <chr> "pedestrian", "pedestrian", "pedestrian", "pedestria...
## $ r1        <dbl> 155, 142, 57, 22, 169, 75, 136, 118, 149, 13, 123, 1...
## $ g1        <dbl> 228, 217, 54, 35, 179, 67, 149, 105, 225, 34, 124, 1...
## $ b1        <dbl> 251, 242, 50, 41, 170, 60, 157, 69, 241, 28, 107, 13...
## $ r2        <dbl> 135, 166, 187, 171, 231, 131, 200, 244, 34, 5, 83, 3...
## $ g2        <dbl> 188, 204, 201, 178, 254, 89, 203, 245, 45, 21, 61, 4...
## $ b2        <dbl> 101, 44, 68, 26, 27, 53, 107, 67, 1, 11, 26, 37, 26,...
## $ r3        <dbl> 156, 142, 51, 19, 97, 214, 150, 132, 155, 123, 116, ...
## $ g3        <dbl> 227, 217, 51, 27, 107, 144, 167, 123, 226, 154, 124,...
## $ b3        <dbl> 245, 242, 45, 29, 99, 75, 134, 12, 238, 140, 115, 12...
## $ r4        <dbl> 145, 147, 59, 19, 123, 156, 171, 138, 147, 21, 67, 4...
## $ g4        <dbl> 211, 219, 62, 27, 147, 169, 218, 123, 222, 46, 67, 5...
## $ b4        <dbl> 228, 242, 65, 29, 152, 190, 252, 85, 242, 41, 52, 49...
## $ r5        <dbl> 166, 164, 156, 42, 221, 67, 171, 254, 170, 36, 70, 1...
## $ g5        <dbl> 233, 228, 171, 37, 236, 50, 158, 254, 191, 60, 53, 1...
## $ b5        <dbl> 245, 229, 50, 3, 117, 36, 108, 92, 113, 26, 26, 141,...
## $ r6        <dbl> 212, 84, 254, 217, 205, 37, 157, 241, 26, 75, 26, 60...
## $ g6        <dbl> 254, 116, 255, 228, 225, 36, 186, 240, 37, 108, 26, ...
## $ b6        <dbl> 52, 17, 36, 19, 80, 42, 11, 108, 12, 44, 21, 18, 20,...
## $ r7        <dbl> 212, 217, 211, 221, 235, 44, 26, 254, 34, 13, 52, 9,...
## $ g7        <dbl> 254, 254, 226, 235, 254, 42, 35, 254, 45, 27, 45, 13...
## $ b7        <dbl> 11, 26, 70, 20, 60, 44, 10, 99, 19, 25, 27, 17, 20, ...
## $ r8        <dbl> 188, 155, 78, 181, 90, 192, 180, 108, 221, 133, 117,...
## $ g8        <dbl> 229, 203, 73, 183, 110, 131, 211, 106, 249, 163, 109...
## $ b8        <dbl> 117, 128, 64, 73, 9, 73, 236, 27, 184, 126, 83, 33, ...
## $ r9        <dbl> 170, 213, 220, 237, 216, 123, 129, 135, 226, 83, 110...
## $ g9        <dbl> 216, 253, 234, 234, 236, 74, 109, 123, 246, 125, 74,...
## $ b9        <dbl> 120, 51, 59, 44, 66, 22, 73, 40, 59, 19, 12, 12, 18,...
## $ r10       <dbl> 211, 217, 254, 251, 229, 36, 161, 254, 30, 13, 98, 2...
## $ g10       <dbl> 254, 255, 255, 254, 255, 34, 190, 254, 40, 27, 70, 1...
## $ b10       <dbl> 3, 21, 51, 2, 12, 37, 10, 115, 34, 25, 26, 11, 20, 2...
## $ r11       <dbl> 212, 217, 253, 235, 235, 44, 161, 254, 34, 9, 20, 28...
## $ g11       <dbl> 254, 255, 255, 243, 254, 42, 190, 254, 44, 23, 21, 2...
## $ b11       <dbl> 19, 21, 44, 12, 60, 44, 6, 99, 35, 18, 20, 19, 13, 1...
## $ r12       <dbl> 172, 158, 66, 19, 163, 197, 187, 138, 241, 85, 113, ...
## $ g12       <dbl> 235, 225, 68, 27, 168, 114, 215, 123, 255, 128, 76, ...
## $ b12       <dbl> 244, 237, 68, 29, 152, 21, 236, 85, 54, 21, 14, 12, ...
## $ r13       <dbl> 172, 164, 69, 20, 124, 171, 141, 118, 205, 83, 106, ...
## $ g13       <dbl> 235, 227, 65, 29, 117, 102, 142, 105, 229, 125, 69, ...
## $ b13       <dbl> 244, 237, 59, 34, 91, 26, 140, 75, 46, 19, 9, 12, 13...
## $ r14       <dbl> 172, 182, 76, 64, 188, 197, 189, 131, 226, 85, 102, ...
## $ g14       <dbl> 228, 228, 84, 61, 205, 114, 171, 124, 246, 128, 67, ...
## $ b14       <dbl> 235, 143, 22, 4, 78, 21, 140, 5, 59, 21, 6, 12, 13, ...
## $ r15       <dbl> 177, 171, 82, 211, 125, 123, 214, 106, 235, 85, 106,...
## $ g15       <dbl> 235, 228, 93, 222, 147, 74, 221, 94, 252, 128, 69, 4...
## $ b15       <dbl> 244, 196, 17, 78, 20, 22, 201, 53, 67, 21, 9, 11, 18...
## $ r16       <dbl> 22, 164, 58, 19, 160, 180, 188, 101, 237, 83, 43, 60...
## $ g16       <dbl> 52, 227, 60, 27, 183, 107, 211, 91, 254, 125, 29, 45...
## $ b16       <dbl> 53, 237, 60, 29, 187, 26, 227, 59, 53, 19, 11, 18, 1...

 signs2 <- signs[, -c(1:2)]
# sample
next_sign <- signs2[sample(NROW(signs2), 1), ]
next_sign <- next_sign[, -1]
str(next_sign)

 ## Classes 'tbl_df', 'tbl' and 'data.frame':    1 obs. of  48 variables:
##  $ r1 : num 179
##  $ g1 : num 195
##  $ b1 : num 188
##  $ r2 : num 67
##  $ g2 : num 22
##  $ b2 : num 24
##  $ r3 : num 65
##  $ g3 : num 21
##  $ b3 : num 23
##  $ r4 : num 28
##  $ g4 : num 35
##  $ b4 : num 28
##  $ r5 : num 106
##  $ g5 : num 115
##  $ b5 : num 109
##  $ r6 : num 99
##  $ g6 : num 115
##  $ b6 : num 109
##  $ r7 : num 102
##  $ g7 : num 121
##  $ b7 : num 115
##  $ r8 : num 91
##  $ g8 : num 77
##  $ b8 : num 74
##  $ r9 : num 91
##  $ g9 : num 90
##  $ b9 : num 85
##  $ r10: num 107
##  $ g10: num 118
##  $ b10: num 113
##  $ r11: num 85
##  $ g11: num 78
##  $ b11: num 73
##  $ r12: num 67
##  $ g12: num 22
##  $ b12: num 24
##  $ r13: num 146
##  $ g13: num 158
##  $ b13: num 150
##  $ r14: num 68
##  $ g14: num 25
##  $ b14: num 26
##  $ r15: num 68
##  $ g15: num 25
##  $ b15: num 26
##  $ r16: num 68
##  $ g16: num 82
##  $ b16: num 76

 # labeling
sign_types <- signs2$sign_type
# knn training
knn(train = signs2[-1], test = next_sign, cl = sign_types)

 ## [1] stop
## Levels: pedestrian speed stop

How the function knn() correctly classify the stop sign? knn() learned that stop signs are red.

2. Exploring the traffic sign dataset

Each previously observed street sign was divided into a 4x4 grid, and the red, green, and blue level for each of the 16 center pixels is recorded as illustrated here.

The result is a dataset that records the sign_type as well as 16 x 3 = 48 color properties of each sign.

 # 각각 Type에 대한 갯수 요약
table(signs2$sign_type)

 ## 
## pedestrian      speed       stop 
##         65         70         71

 # Sign Type에 따른 r10 해당하는 평균 빨간색 레벨
aggregate(r10 ~ sign_type, data = signs2, mean)

 ##    sign_type       r10
## 1 pedestrian 108.78462
## 2      speed  83.08571
## 3       stop 142.50704

Look at the sign type stop. It has higher value than other sign type. This is how kNN identifies similar signs.

3. Classifying a collection of road signs

Now that the autonomous vehicle has successfully stopped on its own, your team feels confident allowing the car to continue the test course.

The test course includes 59 additional road signs divided into three types:

 # get test signs
test_signs <- signs2[sample(NROW(signs2), 59), ]
# knn test
signs_pred <- knn(train = signs2[-1], test = test_signs[-1], cl = sign_types)
# Create a confusion matrix of the actual versus predicted values
signs_actual <- test_signs$sign_type
table(signs_actual, signs_pred)

 ##             signs_pred
## signs_actual pedestrian speed stop
##   pedestrian         19     0    0
##   speed               0    24    0
##   stop                0     0   16

 # Compute the accuracy
mean(signs_actual == signs_pred)

## [1] 1

4. Testing other ‘k’ values

By default, the knn() function in the class package uses only the single nearest neighbor.

Setting a k parameter allows the algorithm to consider additional nearby neighbors. This enlarges the collection of neighbors which will vote on the predicted class.

Compare k values of 1, 7, and 15 to examine the impact on traffic sign classification accuracy.

 signs_types <- signs2$sign_type
signs_test <- signs2[sample(NROW(signs2), 59), ]
signs_actual <- signs_test$sign_type
# Compute the accuracy of the baseline model (default k = 1)
k_1 <- knn(train = signs2[-1], test = signs_test[-1], cl = sign_types)
mean(signs_actual == k_1)

## [1] 1

 # Modify the above to set k = 7
k_7 <- knn(train = signs2[-1], test = signs_test[-1], cl = sign_types, k = 7)
mean(signs_actual == k_7)

## [1] 0.9661017

 # Set k = 15 and compare to the above
k_15 <- knn(train = signs2[-1], test = signs_test[-1], cl = sign_types, k = 15)
mean(signs_actual == k_15)

## [1] 0.9152542

5. Seeing how the neighbors voted

When multiple nearest neighbors hold a vote, it can sometimes be useful to examine whether the voters were unanimous or widely separated.

For example, knowing more about the voters’ confidence in the classification could allow an autonomous vehicle to use caution in the case there is any chance at all that a stop sign is ahead.

 # Use the prob parameter to get the proportion of votes for the winning class
sign_pred <- knn(train = signs2[-1], test = signs_test[-1], cl = sign_types, prob = TRUE, k = 7)
# Get the "prob" attribute from the predicted classes
sign_prob <- attr(sign_pred, "prob")
# Examine the first several predictions
head(sign_pred)

 ## [1] speed stop  speed speed stop  speed
## Levels: pedestrian speed stop

 # Examine the proportion of votes for the winning class
head(sign_prob)

## [1] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 0.7142857

6. Data preparation for kNN

kNN assumes numeric data. So, kNN benefits from normalized data. Min-Max normalize function is good enough for kNN Algorithm.

 normalize <- function(x) {
  return((x - min(x)) / (max(x) - min(x)))
}
# normalized version of r1
summary(normalize(signs2$r1))

 ##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.1935  0.3528  0.4046  0.6129  1.0000

summary(signs2$r1)

 ##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     3.0    51.0    90.5   103.3   155.0   251.0

Before performing kNN Algorithm, you must normalize data using a technique like min-max function above. why? It is to ensure all data elements may contribute equal shares to distance. Rescaling reduces the influence of extreme values on kNN’s distance function.

All Contents comes from DataCamp

datacamp iconì ëí ì´ë¯¸ì§ ê²ìê²°ê³¼

'R > [R] Machine Learning' 카테고리의 다른 글

[R] Classification Trees (0)	2019.02.03
[R] Supervised Learning, Logistic Regresison (0)	2019.01.25

내 블로그 - 관리자 홈 전환	`Q` `Q`
새 글 쓰기	`W` `W`

글 수정 (권한 있는 경우)	`E` `E`
댓글 영역으로 이동	`C` `C`

이 페이지의 URL 복사	`S` `S`
맨 위로 이동	`T` `T`
티스토리 홈 이동	`H` `H`
단축키 안내	`Shift` + `/` `⇧` + `/`

cozyDS

[R] k-Nearest Neighbors (kNN)

k-Nearest Neighbors (kNN)

1. The Concept of KNN

2. Exploring the traffic sign dataset

3. Classifying a collection of road signs

4. Testing other ‘k’ values

5. Seeing how the neighbors voted

6. Data preparation for kNN

'R > [R] Machine Learning' 카테고리의 다른 글

+ Recent posts

티스토리툴바

단축키

내 블로그

블로그 게시글

모든 영역

	##
	## Attaching package: 'dplyr'

	## The following objects are masked from 'package:stats':
	##
	## filter, lag

	## The following objects are masked from 'package:base':
	##
	## intersect, setdiff, setequal, union

	url <- "https://assets.datacamp.com/production/course_2906/datasets/knn_traffic_signs.csv"

	signs <- read_csv(url)

	## Parsed with column specification:
	## cols(
	## .default = col_double(),
	## sample = col_character(),
	## sign_type = col_character()
	## )

	## See spec(...) for full column specifications.

	## Observations: 206
	## Variables: 51
	## $ id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1...
	## $ sample <chr> "train", "train", "train", "train", "train", "train"...
	## $ sign_type <chr> "pedestrian", "pedestrian", "pedestrian", "pedestria...
	## $ r1 <dbl> 155, 142, 57, 22, 169, 75, 136, 118, 149, 13, 123, 1...
	## $ g1 <dbl> 228, 217, 54, 35, 179, 67, 149, 105, 225, 34, 124, 1...
	## $ b1 <dbl> 251, 242, 50, 41, 170, 60, 157, 69, 241, 28, 107, 13...
	## $ r2 <dbl> 135, 166, 187, 171, 231, 131, 200, 244, 34, 5, 83, 3...
	## $ g2 <dbl> 188, 204, 201, 178, 254, 89, 203, 245, 45, 21, 61, 4...
	## $ b2 <dbl> 101, 44, 68, 26, 27, 53, 107, 67, 1, 11, 26, 37, 26,...
	## $ r3 <dbl> 156, 142, 51, 19, 97, 214, 150, 132, 155, 123, 116, ...
	## $ g3 <dbl> 227, 217, 51, 27, 107, 144, 167, 123, 226, 154, 124,...
	## $ b3 <dbl> 245, 242, 45, 29, 99, 75, 134, 12, 238, 140, 115, 12...
	## $ r4 <dbl> 145, 147, 59, 19, 123, 156, 171, 138, 147, 21, 67, 4...
	## $ g4 <dbl> 211, 219, 62, 27, 147, 169, 218, 123, 222, 46, 67, 5...
	## $ b4 <dbl> 228, 242, 65, 29, 152, 190, 252, 85, 242, 41, 52, 49...
	## $ r5 <dbl> 166, 164, 156, 42, 221, 67, 171, 254, 170, 36, 70, 1...
	## $ g5 <dbl> 233, 228, 171, 37, 236, 50, 158, 254, 191, 60, 53, 1...
	## $ b5 <dbl> 245, 229, 50, 3, 117, 36, 108, 92, 113, 26, 26, 141,...
	## $ r6 <dbl> 212, 84, 254, 217, 205, 37, 157, 241, 26, 75, 26, 60...
	## $ g6 <dbl> 254, 116, 255, 228, 225, 36, 186, 240, 37, 108, 26, ...
	## $ b6 <dbl> 52, 17, 36, 19, 80, 42, 11, 108, 12, 44, 21, 18, 20,...
	## $ r7 <dbl> 212, 217, 211, 221, 235, 44, 26, 254, 34, 13, 52, 9,...
	## $ g7 <dbl> 254, 254, 226, 235, 254, 42, 35, 254, 45, 27, 45, 13...
	## $ b7 <dbl> 11, 26, 70, 20, 60, 44, 10, 99, 19, 25, 27, 17, 20, ...
	## $ r8 <dbl> 188, 155, 78, 181, 90, 192, 180, 108, 221, 133, 117,...
	## $ g8 <dbl> 229, 203, 73, 183, 110, 131, 211, 106, 249, 163, 109...
	## $ b8 <dbl> 117, 128, 64, 73, 9, 73, 236, 27, 184, 126, 83, 33, ...
	## $ r9 <dbl> 170, 213, 220, 237, 216, 123, 129, 135, 226, 83, 110...
	## $ g9 <dbl> 216, 253, 234, 234, 236, 74, 109, 123, 246, 125, 74,...
	## $ b9 <dbl> 120, 51, 59, 44, 66, 22, 73, 40, 59, 19, 12, 12, 18,...
	## $ r10 <dbl> 211, 217, 254, 251, 229, 36, 161, 254, 30, 13, 98, 2...
	## $ g10 <dbl> 254, 255, 255, 254, 255, 34, 190, 254, 40, 27, 70, 1...
	## $ b10 <dbl> 3, 21, 51, 2, 12, 37, 10, 115, 34, 25, 26, 11, 20, 2...
	## $ r11 <dbl> 212, 217, 253, 235, 235, 44, 161, 254, 34, 9, 20, 28...
	## $ g11 <dbl> 254, 255, 255, 243, 254, 42, 190, 254, 44, 23, 21, 2...
	## $ b11 <dbl> 19, 21, 44, 12, 60, 44, 6, 99, 35, 18, 20, 19, 13, 1...
	## $ r12 <dbl> 172, 158, 66, 19, 163, 197, 187, 138, 241, 85, 113, ...
	## $ g12 <dbl> 235, 225, 68, 27, 168, 114, 215, 123, 255, 128, 76, ...
	## $ b12 <dbl> 244, 237, 68, 29, 152, 21, 236, 85, 54, 21, 14, 12, ...
	## $ r13 <dbl> 172, 164, 69, 20, 124, 171, 141, 118, 205, 83, 106, ...
	## $ g13 <dbl> 235, 227, 65, 29, 117, 102, 142, 105, 229, 125, 69, ...
	## $ b13 <dbl> 244, 237, 59, 34, 91, 26, 140, 75, 46, 19, 9, 12, 13...
	## $ r14 <dbl> 172, 182, 76, 64, 188, 197, 189, 131, 226, 85, 102, ...
	## $ g14 <dbl> 228, 228, 84, 61, 205, 114, 171, 124, 246, 128, 67, ...
	## $ b14 <dbl> 235, 143, 22, 4, 78, 21, 140, 5, 59, 21, 6, 12, 13, ...
	## $ r15 <dbl> 177, 171, 82, 211, 125, 123, 214, 106, 235, 85, 106,...
	## $ g15 <dbl> 235, 228, 93, 222, 147, 74, 221, 94, 252, 128, 69, 4...
	## $ b15 <dbl> 244, 196, 17, 78, 20, 22, 201, 53, 67, 21, 9, 11, 18...
	## $ r16 <dbl> 22, 164, 58, 19, 160, 180, 188, 101, 237, 83, 43, 60...
	## $ g16 <dbl> 52, 227, 60, 27, 183, 107, 211, 91, 254, 125, 29, 45...
	## $ b16 <dbl> 53, 237, 60, 29, 187, 26, 227, 59, 53, 19, 11, 18, 1...

	signs2 <- signs[, -c(1:2)]

	# sample
	next_sign <- signs2[sample(NROW(signs2), 1), ]
	next_sign <- next_sign[, -1]

	str(next_sign)

	## Classes 'tbl_df', 'tbl' and 'data.frame': 1 obs. of 48 variables:
	## $ r1 : num 179
	## $ g1 : num 195
	## $ b1 : num 188
	## $ r2 : num 67
	## $ g2 : num 22
	## $ b2 : num 24
	## $ r3 : num 65
	## $ g3 : num 21
	## $ b3 : num 23
	## $ r4 : num 28
	## $ g4 : num 35
	## $ b4 : num 28
	## $ r5 : num 106
	## $ g5 : num 115
	## $ b5 : num 109
	## $ r6 : num 99
	## $ g6 : num 115
	## $ b6 : num 109
	## $ r7 : num 102
	## $ g7 : num 121
	## $ b7 : num 115
	## $ r8 : num 91
	## $ g8 : num 77
	## $ b8 : num 74
	## $ r9 : num 91
	## $ g9 : num 90
	## $ b9 : num 85
	## $ r10: num 107
	## $ g10: num 118
	## $ b10: num 113
	## $ r11: num 85
	## $ g11: num 78
	## $ b11: num 73
	## $ r12: num 67
	## $ g12: num 22
	## $ b12: num 24
	## $ r13: num 146
	## $ g13: num 158
	## $ b13: num 150
	## $ r14: num 68
	## $ g14: num 25
	## $ b14: num 26
	## $ r15: num 68
	## $ g15: num 25
	## $ b15: num 26
	## $ r16: num 68
	## $ g16: num 82
	## $ b16: num 76

	# labeling
	sign_types <- signs2$sign_type

	# knn training
	knn(train = signs2[-1], test = next_sign, cl = sign_types)

	# 각각 Type에 대한 갯수 요약
	table(signs2$sign_type)

	# Sign Type에 따른 r10 해당하는 평균 빨간색 레벨
	aggregate(r10 ~ sign_type, data = signs2, mean)

	## sign_type r10
	## 1 pedestrian 108.78462
	## 2 speed 83.08571
	## 3 stop 142.50704

	# get test signs
	test_signs <- signs2[sample(NROW(signs2), 59), ]

	# knn test
	signs_pred <- knn(train = signs2[-1], test = test_signs[-1], cl = sign_types)

	# Create a confusion matrix of the actual versus predicted values
	signs_actual <- test_signs$sign_type
	table(signs_actual, signs_pred)

	## signs_pred
	## signs_actual pedestrian speed stop
	## pedestrian 19 0 0
	## speed 0 24 0
	## stop 0 0 16

	signs_types <- signs2$sign_type
	signs_test <- signs2[sample(NROW(signs2), 59), ]

	signs_actual <- signs_test$sign_type

	# Compute the accuracy of the baseline model (default k = 1)
	k_1 <- knn(train = signs2[-1], test = signs_test[-1], cl = sign_types)
	mean(signs_actual == k_1)

	# Modify the above to set k = 7
	k_7 <- knn(train = signs2[-1], test = signs_test[-1], cl = sign_types, k = 7)
	mean(signs_actual == k_7)

	# Set k = 15 and compare to the above
	k_15 <- knn(train = signs2[-1], test = signs_test[-1], cl = sign_types, k = 15)
	mean(signs_actual == k_15)

	# Use the prob parameter to get the proportion of votes for the winning class
	sign_pred <- knn(train = signs2[-1], test = signs_test[-1], cl = sign_types, prob = TRUE, k = 7)

	# Get the "prob" attribute from the predicted classes
	sign_prob <- attr(sign_pred, "prob")

	# Examine the first several predictions
	head(sign_pred)

	## [1] speed stop speed speed stop speed
	## Levels: pedestrian speed stop

	# Examine the proportion of votes for the winning class
	head(sign_prob)

	normalize <- function(x) {
	return((x - min(x)) / (max(x) - min(x)))
	}

	# normalized version of r1
	summary(normalize(signs2$r1))

	## Min. 1st Qu. Median Mean 3rd Qu. Max.
	## 0.0000 0.1935 0.3528 0.4046 0.6129 1.0000

	## Min. 1st Qu. Median Mean 3rd Qu. Max.
	## 3.0 51.0 90.5 103.3 155.0 251.0