Violent Crime Rates by US State dataset summary

https://www.rdocumentation.org/packages/datasets/versions/3.6.2/topics/USArrests

library(cluster)
library(ggplot2)
library(factoextra)
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(GGally)
Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2
library(fossil)
Loading required package: sp
Loading required package: maps

Attaching package: ‘maps’

The following object is masked from ‘package:cluster’:

    votes.repub

Loading required package: shapefiles
Loading required package: foreign

Attaching package: ‘shapefiles’

The following objects are masked from ‘package:foreign’:

    read.dbf, write.dbf
head(USArrests)
summary(USArrests)
     Murder          Assault         UrbanPop          Rape      
 Min.   : 0.800   Min.   : 45.0   Min.   :32.00   Min.   : 7.30  
 1st Qu.: 4.075   1st Qu.:109.0   1st Qu.:54.50   1st Qu.:15.07  
 Median : 7.250   Median :159.0   Median :66.00   Median :20.10  
 Mean   : 7.788   Mean   :170.8   Mean   :65.54   Mean   :21.23  
 3rd Qu.:11.250   3rd Qu.:249.0   3rd Qu.:77.75   3rd Qu.:26.18  
 Max.   :17.400   Max.   :337.0   Max.   :91.00   Max.   :46.00  
df <- scale(USArrests)
summary(df)
     Murder           Assault           UrbanPop             Rape        
 Min.   :-1.6044   Min.   :-1.5090   Min.   :-2.31714   Min.   :-1.4874  
 1st Qu.:-0.8525   1st Qu.:-0.7411   1st Qu.:-0.76271   1st Qu.:-0.6574  
 Median :-0.1235   Median :-0.1411   Median : 0.03178   Median :-0.1209  
 Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000  
 3rd Qu.: 0.7949   3rd Qu.: 0.9388   3rd Qu.: 0.84354   3rd Qu.: 0.5277  
 Max.   : 2.2069   Max.   : 1.9948   Max.   : 1.75892   Max.   : 2.6444  
dist(df, method = "euclidean")
                 Alabama    Alaska   Arizona  Arkansas California  Colorado Connecticut  Delaware   Florida   Georgia
Alaska         2.7037541                                                                                             
Arizona        2.2935197 2.7006429                                                                                   
Arkansas       1.2898102 2.8260386 2.7177583                                                                         
California     3.2631104 3.0125415 1.3104842 3.7636409                                                               
Colorado       2.6510673 2.3265187 1.3650307 2.8310512  1.2876185                                                    
Connecticut    3.2152975 4.7399125 3.2628575 2.6076395  4.0663898 3.3279920                                          
Delaware       2.0192927 3.6213633 1.9093696 1.8003239  3.0737852 2.5547456   1.7568475                              
Florida        2.2981353 2.9967642 1.7493928 3.3721968  2.0250039 2.4458600   4.4700701 3.0614170                    
Georgia        1.1314351 2.8194388 2.7871963 2.2117614  3.3780585 2.8649105   3.9738227 2.9838715 2.1812958          
Hawaii         3.3885300 4.5301340 3.2621208 2.9723097  3.6589083 2.8233524   1.3843291 2.4748807 4.3596338 3.8105218
Idaho          2.9146623 4.0580555 3.5210071 1.7687255  4.4879436 3.4767685   1.6354214 2.0382540 4.6999827 3.8005715
Illinois       1.8734993 3.2670626 1.0825512 2.4626424  1.9117469 1.7898322   2.7400560 1.5584719 1.7711863 2.3135778
Indiana        2.0761411 3.3655952 2.6407486 1.4450503  3.4061273 2.3655622   1.6147898 1.6973340 3.6150778 2.6924143
Iowa           3.4878952 4.7251910 4.1157513 2.4252661  4.9708591 3.9406898   1.5470089 2.6068606 5.2682765 4.2517889
Kansas         2.2941096 3.6808173 2.7762838 1.5718411  3.6071725 2.6272281   1.2280424 1.5510864 3.8424558 3.0071474
Kentucky       1.8475879 3.5440903 3.3567681 1.0598104  4.2463809 3.2274013   2.3346386 2.2514939 3.9474983 2.4408198
Louisiana      0.7722224 2.9631431 2.2178519 2.0254276  3.0176625 2.6546743   3.5329409 2.3266996 1.7529677 0.8592544
Maine          3.4851115 4.8322605 4.2961903 2.3621893  5.2699843 4.2713441   1.8792141 2.6560808 5.3946798 4.3334217
Maryland       1.2896460 2.2777590 1.2117356 2.0582244  2.2312581 1.9667562   3.4968269 1.9624834 1.4355204 1.8388691
Massachusetts  2.9874810 4.3729925 2.5162281 2.6881270  3.2156499 2.6522793   0.9468199 1.4382527 3.7753087 3.6706708
                  Hawaii     Idaho  Illinois   Indiana      Iowa    Kansas  Kentucky Louisiana     Maine  Maryland
Alaska                                                                                                            
Arizona                                                                                                           
Arkansas                                                                                                          
California                                                                                                        
Colorado                                                                                                          
Connecticut                                                                                                       
Delaware                                                                                                          
Florida                                                                                                           
Georgia                                                                                                           
Hawaii                                                                                                            
Idaho          2.3658101                                                                                          
Illinois       2.7329756 3.2728945                                                                                
Indiana        1.5460727 1.4923351 2.2027081                                                                      
Iowa           2.1564575 0.8584962 3.7380070 1.7786548                                                            
Kansas         1.4648766 1.2103118 2.3228505 0.4287712 1.4699265                                                  
Kentucky       2.5203345 1.6565236 2.8478883 1.1790552 1.9426473 1.3020180                                        
Louisiana      3.5687157 3.5283772 1.6535178 2.4957547 4.0359614 2.7284126 2.4221964                              
Maine          2.7160558 0.8486112 3.9342034 2.1029158 0.6457158 1.7913753 1.9925855 4.0901924                    
Maryland       3.6148670 3.4014584 1.3429997 2.5430878 4.0642448 2.7400943 2.8229479 1.2739137 4.1259083          
Massachusetts  1.3276676 2.2201020 2.0080982 1.6615695 2.3510287 1.4343401 2.6284451 3.1524549 2.6920282 2.9743193
               Massachusetts  Michigan Minnesota Mississippi  Missouri   Montana  Nebraska    Nevada New Hampshire
Alaska                                                                                                            
Arizona                                                                                                           
Arkansas                                                                                                          
California                                                                                                        
Colorado                                                                                                          
Connecticut                                                                                                       
Delaware                                                                                                          
Florida                                                                                                           
Georgia                                                                                                           
Hawaii                                                                                                            
Idaho                                                                                                             
Illinois                                                                                                          
Indiana                                                                                                           
Iowa                                                                                                              
Kansas                                                                                                            
Kentucky                                                                                                          
Louisiana                                                                                                         
Maine                                                                                                             
Maryland                                                                                                          
Massachusetts                                                                                                     
               New Jersey New Mexico  New York North Carolina North Dakota      Ohio  Oklahoma    Oregon Pennsylvania
Alaska                                                                                                               
Arizona                                                                                                              
Arkansas                                                                                                             
California                                                                                                           
Colorado                                                                                                             
Connecticut                                                                                                          
Delaware                                                                                                             
Florida                                                                                                              
Georgia                                                                                                              
Hawaii                                                                                                               
Idaho                                                                                                                
Illinois                                                                                                             
Indiana                                                                                                              
Iowa                                                                                                                 
Kansas                                                                                                               
Kentucky                                                                                                             
Louisiana                                                                                                            
Maine                                                                                                                
Maryland                                                                                                             
Massachusetts                                                                                                        
               Rhode Island South Carolina South Dakota Tennessee     Texas      Utah   Vermont  Virginia Washington
Alaska                                                                                                              
Arizona                                                                                                             
Arkansas                                                                                                            
California                                                                                                          
Colorado                                                                                                            
Connecticut                                                                                                         
Delaware                                                                                                            
Florida                                                                                                             
Georgia                                                                                                             
Hawaii                                                                                                              
Idaho                                                                                                               
Illinois                                                                                                            
Indiana                                                                                                             
Iowa                                                                                                                
Kansas                                                                                                              
Kentucky                                                                                                            
Louisiana                                                                                                           
Maine                                                                                                               
Maryland                                                                                                            
Massachusetts                                                                                                       
               West Virginia Wisconsin
Alaska                                
Arizona                               
Arkansas                              
California                            
Colorado                              
Connecticut                           
Delaware                              
Florida                               
Georgia                               
Hawaii                                
Idaho                                 
Illinois                              
Indiana                               
Iowa                                  
Kansas                                
Kentucky                              
Louisiana                             
Maine                                 
Maryland                              
Massachusetts                         
 [ reached getOption("max.print") -- omitted 29 rows ]

Hierarchical Clustering

wardhclust <- hclust(dist(df, method = "euclidean"), method = "ward.D2")
plot(wardhclust)

fviz_dend(wardhclust, k = 4, rect = TRUE)
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.

completehclust <- hclust(dist(df, method = "euclidean"), method = "complete")
plot(completehclust)

fviz_dend(completehclust, k = 4, rect = TRUE)
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.

K-Means Clustering

fviz_nbclust(df, kmeans, method = "wss") + geom_vline(xintercept = 4, linetype = 2) + geom_vline(xintercept = 2, linetype = 2)

fviz_nbclust(df, kmeans, method = "silhouette")

kmeans(df, 4)
K-means clustering with 4 clusters of sizes 8, 13, 16, 13

Cluster means:
      Murder    Assault   UrbanPop        Rape
1  1.4118898  0.8743346 -0.8145211  0.01927104
2 -0.9615407 -1.1066010 -0.9301069 -0.96676331
3 -0.4894375 -0.3826001  0.5758298 -0.26165379
4  0.6950701  1.0394414  0.7226370  1.27693964

Clustering vector:
       Alabama         Alaska        Arizona       Arkansas     California       Colorado    Connecticut       Delaware 
             1              4              4              1              4              4              3              3 
       Florida        Georgia         Hawaii          Idaho       Illinois        Indiana           Iowa         Kansas 
             4              1              3              2              4              3              2              3 
      Kentucky      Louisiana          Maine       Maryland  Massachusetts       Michigan      Minnesota    Mississippi 
             2              1              2              4              3              4              2              1 
      Missouri        Montana       Nebraska         Nevada  New Hampshire     New Jersey     New Mexico       New York 
             4              2              2              4              2              3              4              4 
North Carolina   North Dakota           Ohio       Oklahoma         Oregon   Pennsylvania   Rhode Island South Carolina 
             1              2              3              3              3              3              3              1 
  South Dakota      Tennessee          Texas           Utah        Vermont       Virginia     Washington  West Virginia 
             2              1              4              3              2              3              3              2 
     Wisconsin        Wyoming 
             2              3 

Within cluster sum of squares by cluster:
[1]  8.316061 11.952463 16.212213 19.922437
 (between_SS / total_SS =  71.2 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss" "betweenss"    "size"         "iter"        
[9] "ifault"      
fviz_cluster(kmeans(df, 4), data = df, ellipse.type = "euclid",  star.plot = TRUE, repel = TRUE)

Principal Components Analysis

ggpairs(USArrests)

 plot: [1,1] [=====>---------------------------------------------------------------------------------------]  6% est: 0s 
 plot: [1,2] [===========>---------------------------------------------------------------------------------] 12% est: 0s 
 plot: [1,3] [================>----------------------------------------------------------------------------] 19% est: 0s 
 plot: [1,4] [======================>----------------------------------------------------------------------] 25% est: 0s 
 plot: [2,1] [============================>----------------------------------------------------------------] 31% est: 0s 
 plot: [2,2] [==================================>----------------------------------------------------------] 38% est: 0s 
 plot: [2,3] [========================================>----------------------------------------------------] 44% est: 0s 
 plot: [2,4] [=============================================>-----------------------------------------------] 50% est: 0s 
 plot: [3,1] [===================================================>-----------------------------------------] 56% est: 0s 
 plot: [3,2] [=========================================================>-----------------------------------] 62% est: 0s 
 plot: [3,3] [===============================================================>-----------------------------] 69% est: 0s 
 plot: [3,4] [=====================================================================>-----------------------] 75% est: 0s 
 plot: [4,1] [===========================================================================>-----------------] 81% est: 0s 
 plot: [4,2] [================================================================================>------------] 88% est: 0s 
 plot: [4,3] [======================================================================================>------] 94% est: 0s 
 plot: [4,4] [=============================================================================================]100% est: 0s 
                                                                                                                         

prcomp(df, center=TRUE)
Standard deviations (1, .., p=4):
[1] 1.5748783 0.9948694 0.5971291 0.4164494

Rotation (n x k) = (4 x 4):
                PC1        PC2        PC3         PC4
Murder   -0.5358995  0.4181809 -0.3412327  0.64922780
Assault  -0.5831836  0.1879856 -0.2681484 -0.74340748
UrbanPop -0.2781909 -0.8728062 -0.3780158  0.13387773
Rape     -0.5434321 -0.1673186  0.8177779  0.08902432
summary(prcomp(df, center=TRUE))
Importance of components:
                          PC1    PC2     PC3     PC4
Standard deviation     1.5749 0.9949 0.59713 0.41645
Proportion of Variance 0.6201 0.2474 0.08914 0.04336
Cumulative Proportion  0.6201 0.8675 0.95664 1.00000
fviz_eig(prcomp(df, center=TRUE), addlabels = TRUE)

Rand Index and Adjusted Rand Index

rand.index(cutree(wardhclust, k = 4), kmeans(df, 4)$cluster)
[1] 0.8440816
LS0tCnRpdGxlOiAiQ2x1c3RlciBBbmFseXNpcyIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICBjb2RlX2ZvbGRpbmc6IHNob3cKICAgIGZpZ19jYXB0aW9uOiB5ZXMKICAgIHRoZW1lOiB1bml0ZWQKICAgIHRvYzogeWVzCi0tLQoKIyMgVmlvbGVudCBDcmltZSBSYXRlcyBieSBVUyBTdGF0ZSBkYXRhc2V0IHN1bW1hcnkKCltodHRwczovL3d3dy5yZG9jdW1lbnRhdGlvbi5vcmcvcGFja2FnZXMvZGF0YXNldHMvdmVyc2lvbnMvMy42LjIvdG9waWNzL1VTQXJyZXN0c10oaHR0cHM6Ly93d3cucmRvY3VtZW50YXRpb24ub3JnL3BhY2thZ2VzL2RhdGFzZXRzL3ZlcnNpb25zLzMuNi4yL3RvcGljcy9VU0FycmVzdHMpCgo8IS0tLS0KYnJldyBpbnN0YWxsIGNtYWtlCmJyZXcgaW5zdGFsbCBubG9wdAotLT4KYGBge3IsIG1lc3NhZ2U9RkFMU0V9CmxpYnJhcnkoY2x1c3RlcikKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGZhY3RvZXh0cmEpCmxpYnJhcnkoR0dhbGx5KQpsaWJyYXJ5KGZvc3NpbCkKYGBgCgpgYGB7cn0KaGVhZChVU0FycmVzdHMpCmBgYAoKYGBge3J9CnN1bW1hcnkoVVNBcnJlc3RzKQpgYGAKCmBgYHtyfQpkZiA8LSBzY2FsZShVU0FycmVzdHMpCnN1bW1hcnkoZGYpCmBgYAoKYGBge3J9CmRpc3QoZGYsIG1ldGhvZCA9ICJldWNsaWRlYW4iKQpgYGAKCiMjIEhpZXJhcmNoaWNhbCBDbHVzdGVyaW5nCgpgYGB7cn0Kd2FyZGhjbHVzdCA8LSBoY2x1c3QoZGlzdChkZiwgbWV0aG9kID0gImV1Y2xpZGVhbiIpLCBtZXRob2QgPSAid2FyZC5EMiIpCnBsb3Qod2FyZGhjbHVzdCkKYGBgCgpgYGB7cn0KZnZpel9kZW5kKHdhcmRoY2x1c3QsIGsgPSA0LCByZWN0ID0gVFJVRSkKYGBgCgpgYGB7cn0KY29tcGxldGVoY2x1c3QgPC0gaGNsdXN0KGRpc3QoZGYsIG1ldGhvZCA9ICJldWNsaWRlYW4iKSwgbWV0aG9kID0gImNvbXBsZXRlIikKcGxvdChjb21wbGV0ZWhjbHVzdCkKYGBgCgpgYGB7cn0KZnZpel9kZW5kKGNvbXBsZXRlaGNsdXN0LCBrID0gNCwgcmVjdCA9IFRSVUUpCmBgYAoKIyMgSy1NZWFucyBDbHVzdGVyaW5nCgpgYGB7cn0KZnZpel9uYmNsdXN0KGRmLCBrbWVhbnMsIG1ldGhvZCA9ICJ3c3MiKSArIGdlb21fdmxpbmUoeGludGVyY2VwdCA9IDQsIGxpbmV0eXBlID0gMikgKyBnZW9tX3ZsaW5lKHhpbnRlcmNlcHQgPSAyLCBsaW5ldHlwZSA9IDIpCmBgYAoKYGBge3J9CmZ2aXpfbmJjbHVzdChkZiwga21lYW5zLCBtZXRob2QgPSAic2lsaG91ZXR0ZSIpCmBgYAoKYGBge3J9CmttZWFucyhkZiwgNCkKYGBgCgpgYGB7cn0KZnZpel9jbHVzdGVyKGttZWFucyhkZiwgNCksIGRhdGEgPSBkZiwgZWxsaXBzZS50eXBlID0gImV1Y2xpZCIsICBzdGFyLnBsb3QgPSBUUlVFLCByZXBlbCA9IFRSVUUpCmBgYAoKIyMgUHJpbmNpcGFsIENvbXBvbmVudHMgQW5hbHlzaXMKCgpgYGB7cn0KZ2dwYWlycyhVU0FycmVzdHMpCmBgYAoKYGBge3J9CnByY29tcChkZiwgY2VudGVyPVRSVUUpCmBgYAoKYGBge3J9CnN1bW1hcnkocHJjb21wKGRmLCBjZW50ZXI9VFJVRSkpCmBgYAoKYGBge3J9CmZ2aXpfZWlnKHByY29tcChkZiwgY2VudGVyPVRSVUUpLCBhZGRsYWJlbHMgPSBUUlVFKQpgYGAKCiMjIFJhbmQgSW5kZXggYW5kIEFkanVzdGVkIFJhbmQgSW5kZXgKCmBgYHtyfQpyYW5kLmluZGV4KGN1dHJlZSh3YXJkaGNsdXN0LCBrID0gNCksIGttZWFucyhkZiwgNCkkY2x1c3RlcikKYGBg