author: Rodica Ioana Lung date: 26.02.2020
========================================================
R is an limbaj de programare interpretat - majoritatea functiilor sunt scrise in R - se pot combina cu proceduri in C, C+, or FORTRAN - se pot apela comenzi de sistem din R
R este folosit pentru manipulare de date, statistica si grafice. Foloseste: - operatori (+ - <- * %*% .) pentru calcule - o colectie uriasa de functii - facilitati de reprezentare grafica - pachete de functii scrise de utilizatori: 800+
Avantaje
ofera interfata SQL *** Dezavantaje
pregatirea datelor e mai grea decat in SPSS sau SAS
cran.r-project.org/src/contrib/PACKAGES.html
http://www.rseek.org/ in loc de google
IMPORTANT - cu cat mai mult timp petreceti folosind R, cu atat mai comfortabili veti fi cu el :)
daca vreti sa incercati singuri:
install.packages('swirl')
library('swirl')
swirl()
output
working directory
setwd("E:/Dropbox/FSEGA/cursuri/2016-2017/semestrul 2/R/curs1")
incremental:true
data(cars)
summary(cars)
speed dist
Min. : 4.0 Min. : 2.00
1st Qu.:12.0 1st Qu.: 26.00
Median :15.0 Median : 36.00
Mean :15.4 Mean : 42.98
3rd Qu.:19.0 3rd Qu.: 56.00
Max. :25.0 Max. :120.00
incremental:true
plot(cars)
incremental:true
data(mtcars)
summary(mtcars)
mpg cyl disp hp
Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
Median :19.20 Median :6.000 Median :196.3 Median :123.0
Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
drat wt qsec vs
Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
Median :3.695 Median :3.325 Median :17.71 Median :0.0000
Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
am gear carb
Min. :0.0000 Min. :3.000 Min. :1.000
1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
Median :0.0000 Median :4.000 Median :2.000
Mean :0.4062 Mean :3.688 Mean :2.812
3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
Max. :1.0000 Max. :5.000 Max. :8.000
incremental:true
mtcars$wt
[1] 2.620 2.875 2.320 3.215 3.440 3.460 3.570 3.190 3.150 3.440 3.440
[12] 4.070 3.730 3.780 5.250 5.424 5.345 2.200 1.615 1.835 2.465 3.520
[23] 3.435 3.840 3.845 1.935 2.140 1.513 3.170 2.770 3.570 2.780
mean(mtcars$wt)
[1] 3.21725
?mean afiseaza Help pt functia mean
incremental:true
Vector
a=c(1,2,3)
a
[1] 1 2 3
a=c(a,100:120)
a
[1] 1 2 3 100 101 102 103 104 105 106 107 108 109 110 111 112 113
[18] 114 115 116 117 118 119 120
=== incremental:true
a[4]
[1] 100
a[2:6]
[1] 2 3 100 101 102
a[5]=3456
a[5]
[1] 3456
a[1:10]
[1] 1 2 3 100 3456 102 103 104 105 106
a[c(4,5,9)]
[1] 100 3456 105
=== incremental:true
Atribuirile se fac cu <- sau ALT-
nume <- c("Ana", "Maria",
"Angela","Andrei",
"Mihai", "Ioana",
"Lucian")
nume
[1] "Ana" "Maria" "Angela" "Andrei" "Mihai" "Ioana" "Lucian"
numbers <- c(1:10)
numbers
[1] 1 2 3 4 5 6 7 8 9 10
numbers <- c(numbers, 11:20)
numbers
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
==== incremental:true
nume <- c("Ana", "Maria",
"Angela","Andrei",
"Mihai", "Ioana",
"Lucian", 'NUMELEVOSTRU')
mystery = nchar(nume)
mystery
[1] 3 5 6 6 5 5 6 12
mystery == 6
[1] FALSE FALSE TRUE TRUE FALSE FALSE TRUE FALSE
nume[mystery == 6]
[1] "Angela" "Andrei" "Lucian"
=== incremental:true
data(mtcars)
names(mtcars)
[1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
[11] "carb"
?mtcars
=== incremental:true
mtcars
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
==== incremental:true
str(mtcars)
'data.frame': 32 obs. of 11 variables:
$ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
$ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
$ disp: num 160 160 108 258 360 ...
$ hp : num 110 110 93 110 175 105 245 62 95 123 ...
$ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
$ wt : num 2.62 2.88 2.32 3.21 3.44 ...
$ qsec: num 16.5 17 18.6 19.4 17 ...
$ vs : num 0 0 1 1 0 1 0 1 1 1 ...
$ am : num 1 1 1 0 0 0 0 0 0 0 ...
$ gear: num 4 4 4 3 3 3 3 4 4 4 ...
$ carb: num 4 4 1 1 2 1 4 2 2 4 ...
dim(mtcars)
[1] 32 11
=== incremental:true
rownames(mtcars)
[1] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710"
[4] "Hornet 4 Drive" "Hornet Sportabout" "Valiant"
[7] "Duster 360" "Merc 240D" "Merc 230"
[10] "Merc 280" "Merc 280C" "Merc 450SE"
[13] "Merc 450SL" "Merc 450SLC" "Cadillac Fleetwood"
[16] "Lincoln Continental" "Chrysler Imperial" "Fiat 128"
[19] "Honda Civic" "Toyota Corolla" "Toyota Corona"
[22] "Dodge Challenger" "AMC Javelin" "Camaro Z28"
[25] "Pontiac Firebird" "Fiat X1-9" "Porsche 914-2"
[28] "Lotus Europa" "Ford Pantera L" "Ferrari Dino"
[31] "Maserati Bora" "Volvo 142E"
colnames(mtcars)
[1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
[11] "carb"
=== incremental:true
rownames(mtcars) <- c(1:32)
mtcars
mpg cyl disp hp drat wt qsec vs am gear carb
1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
11 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
12 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
13 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
14 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
15 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
16 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
17 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
18 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
19 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
20 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
21 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
22 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
23 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
24 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
25 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
26 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
27 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
28 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
29 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
30 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
31 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
32 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
=== incremental:true
data(mtcars)
head(mtcars, 10)
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
===
head(mtcars)
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
=== incremental:true
tail(mtcars, 3)
mpg cyl disp hp drat wt qsec vs am gear carb
Ferrari Dino 19.7 6 145 175 3.62 2.77 15.5 0 1 5 6
Maserati Bora 15.0 8 301 335 3.54 3.57 14.6 0 1 5 8
Volvo 142E 21.4 4 121 109 4.11 2.78 18.6 1 1 4 2
tail(mtcars)
mpg cyl disp hp drat wt qsec vs am gear carb
Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.7 0 1 5 2
Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2
Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.5 0 1 5 4
Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.5 0 1 5 6
Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.6 0 1 5 8
Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.6 1 1 4 2
===
mtcars$mpg
[1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
[15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
[29] 15.8 19.7 15.0 21.4
mean(mtcars$mpg)
[1] 20.09062
am aflat despre:
c, nchar, data, str, dim, names, row.names, head, tail, mean
=== incremental:true
state<-read.csv("E:/Dropbox/FSEGA/cursuri/2016-2017/semestrul 2/R/date/stateData.csv")
Fie trecem doar numele fisierului (in ghilimele) daca e in directorul de lucru, fie trecem calea intreaga catre fisier.
str(state)
'data.frame': 50 obs. of 12 variables:
$ X : Factor w/ 50 levels "Alabama","Alaska",..: 1 2 3 4 5 6 7 8 9 10 ...
$ state.abb : Factor w/ 50 levels "AK","AL","AR",..: 2 1 4 3 5 6 7 8 9 10 ...
$ state.area : int 51609 589757 113909 53104 158693 104247 5009 2057 58560 58876 ...
$ state.region : int 2 4 4 2 4 4 1 2 2 2 ...
$ population : int 3615 365 2212 2110 21198 2541 3100 579 8277 4931 ...
$ income : int 3624 6315 4530 3378 5114 4884 5348 4809 4815 4091 ...
$ illiteracy : num 2.1 1.5 1.8 1.9 1.1 0.7 1.1 0.9 1.3 2 ...
$ life.exp : num 69 69.3 70.5 70.7 71.7 ...
$ murder : num 15.1 11.3 7.8 10.1 10.3 6.8 3.1 6.2 10.7 13.9 ...
$ highSchoolGrad: num 41.3 66.7 58.1 39.9 62.6 63.9 56 54.6 52.6 40.6 ...
$ frost : int 20 152 15 65 20 166 139 103 11 60 ...
$ area : int 50708 566432 113417 51945 156361 103766 4862 1982 54090 58073 ...
=== incremental:true
head(state)
X state.abb state.area state.region population income
1 Alabama AL 51609 2 3615 3624
2 Alaska AK 589757 4 365 6315
3 Arizona AZ 113909 4 2212 4530
4 Arkansas AR 53104 2 2110 3378
5 California CA 158693 4 21198 5114
6 Colorado CO 104247 4 2541 4884
illiteracy life.exp murder highSchoolGrad frost area
1 2.1 69.05 15.1 41.3 20 50708
2 1.5 69.31 11.3 66.7 152 566432
3 1.8 70.55 7.8 58.1 15 113417
4 1.9 70.66 10.1 39.9 65 51945
5 1.1 71.71 10.3 62.6 20 156361
6 0.7 72.06 6.8 63.9 166 103766
incremental:true
subset(state,state.region==1)
X state.abb state.area state.region population income
7 Connecticut CT 5009 1 3100 5348
19 Maine ME 33215 1 1058 3694
21 Massachusetts MA 8257 1 5814 4755
29 New Hampshire NH 9304 1 812 4281
30 New Jersey NJ 7836 1 7333 5237
32 New York NY 49576 1 18076 4903
38 Pennsylvania PA 45333 1 11860 4449
39 Rhode Island RI 1214 1 931 4558
45 Vermont VT 9609 1 472 3907
illiteracy life.exp murder highSchoolGrad frost area
7 1.1 72.48 3.1 56.0 139 4862
19 0.7 70.39 2.7 54.7 161 30920
21 1.1 71.83 3.3 58.5 103 7826
29 0.7 71.23 3.3 57.6 174 9027
30 1.1 70.93 5.2 52.5 115 7521
32 1.4 70.55 10.9 52.7 82 47831
38 1.0 70.43 6.1 50.2 126 44966
39 1.3 71.90 2.4 46.4 127 1049
45 0.6 71.64 5.5 57.1 168 9267
=== incremental:true
nordestsubset <- subset(state,state.region==1)
head(nordestsubset)
X state.abb state.area state.region population income
7 Connecticut CT 5009 1 3100 5348
19 Maine ME 33215 1 1058 3694
21 Massachusetts MA 8257 1 5814 4755
29 New Hampshire NH 9304 1 812 4281
30 New Jersey NJ 7836 1 7333 5237
32 New York NY 49576 1 18076 4903
illiteracy life.exp murder highSchoolGrad frost area
7 1.1 72.48 3.1 56.0 139 4862
19 0.7 70.39 2.7 54.7 161 30920
21 1.1 71.83 3.3 58.5 103 7826
29 0.7 71.23 3.3 57.6 174 9027
30 1.1 70.93 5.2 52.5 115 7521
32 1.4 70.55 10.9 52.7 82 47831
dim(nordestsubset)
[1] 9 12
incremental:true
state[state$state.region==3,]
X state.abb state.area state.region population income
13 Illinois IL 56400 3 11197 5107
14 Indiana IN 36291 3 5313 4458
15 Iowa IA 56290 3 2861 4628
16 Kansas KS 82264 3 2280 4669
22 Michigan MI 58216 3 9111 4751
23 Minnesota MN 84068 3 3921 4675
25 Missouri MO 69686 3 4767 4254
27 Nebraska NE 77227 3 1544 4508
34 North Dakota ND 70665 3 637 5087
35 Ohio OH 41222 3 10735 4561
41 South Dakota SD 77047 3 681 4167
49 Wisconsin WI 56154 3 4589 4468
illiteracy life.exp murder highSchoolGrad frost area
13 0.9 70.14 10.3 52.6 127 55748
14 0.7 70.88 7.1 52.9 122 36097
15 0.5 72.56 2.3 59.0 140 55941
16 0.6 72.58 4.5 59.9 114 81787
22 0.9 70.63 11.1 52.8 125 56817
23 0.6 72.96 2.3 57.6 160 79289
25 0.8 70.69 9.3 48.8 108 68995
27 0.6 72.60 2.9 59.3 139 76483
34 0.8 72.78 1.4 50.3 186 69273
35 0.8 70.82 7.4 53.2 124 40975
41 0.5 72.08 1.7 53.3 172 75955
49 0.7 72.48 3.0 54.5 149 54464
=== incremental:true
state[state$state.region==3,c(1,2)]
X state.abb
13 Illinois IL
14 Indiana IN
15 Iowa IA
16 Kansas KS
22 Michigan MI
23 Minnesota MN
25 Missouri MO
27 Nebraska NE
34 North Dakota ND
35 Ohio OH
41 South Dakota SD
49 Wisconsin WI
=== incremental:true
nordest=state[state$state.region==1,]
head(nordest)
X state.abb state.area state.region population income
7 Connecticut CT 5009 1 3100 5348
19 Maine ME 33215 1 1058 3694
21 Massachusetts MA 8257 1 5814 4755
29 New Hampshire NH 9304 1 812 4281
30 New Jersey NJ 7836 1 7333 5237
32 New York NY 49576 1 18076 4903
illiteracy life.exp murder highSchoolGrad frost area
7 1.1 72.48 3.1 56.0 139 4862
19 0.7 70.39 2.7 54.7 161 30920
21 1.1 71.83 3.3 58.5 103 7826
29 0.7 71.23 3.3 57.6 174 9027
30 1.1 70.93 5.2 52.5 115 7521
32 1.4 70.55 10.9 52.7 82 47831
dim(nordest)
[1] 9 12
cu tabelul stateData.csv:
Tot cu tabelul stateData.csv