Cu tabelul pseudo_facebook.tsv:
str(pf)
'data.frame': 99003 obs. of 15 variables:
$ userid : int 2094382 1192601 2083884 1203168 1733186 1524765 1136133 1680361 1365174 1712567 ...
$ age : int 14 14 14 14 14 14 13 13 13 13 ...
$ dob_day : int 19 2 16 25 4 1 14 4 1 2 ...
$ dob_year : int 1999 1999 1999 1999 1999 1999 2000 2000 2000 2000 ...
$ dob_month : Factor w/ 12 levels "ian","feb","mar",..: 11 11 11 12 12 12 1 1 1 2 ...
$ gender : Factor w/ 2 levels "female","male": 2 1 2 1 2 2 2 1 2 2 ...
$ tenure : int 266 6 13 93 82 15 12 0 81 171 ...
$ friend_count : int 0 0 0 0 0 0 0 0 0 0 ...
$ friendships_initiated: int 0 0 0 0 0 0 0 0 0 0 ...
$ likes : int 0 0 0 0 0 0 0 0 0 0 ...
$ likes_received : int 0 0 0 0 0 0 0 0 0 0 ...
$ mobile_likes : int 0 0 0 0 0 0 0 0 0 0 ...
$ mobile_likes_received: int 0 0 0 0 0 0 0 0 0 0 ...
$ www_likes : int 0 0 0 0 0 0 0 0 0 0 ...
$ www_likes_received : int 0 0 0 0 0 0 0 0 0 0 ...
pf <- read.csv('pseudo_facebook.tsv',sep='\t')
str(pf)
'data.frame': 99003 obs. of 15 variables:
$ userid : int 2094382 1192601 2083884 1203168 1733186 1524765 1136133 1680361 1365174 1712567 ...
$ age : int 14 14 14 14 14 14 13 13 13 13 ...
$ dob_day : int 19 2 16 25 4 1 14 4 1 2 ...
$ dob_year : int 1999 1999 1999 1999 1999 1999 2000 2000 2000 2000 ...
$ dob_month : int 11 11 11 12 12 12 1 1 1 2 ...
$ gender : Factor w/ 2 levels "female","male": 2 1 2 1 2 2 2 1 2 2 ...
$ tenure : int 266 6 13 93 82 15 12 0 81 171 ...
$ friend_count : int 0 0 0 0 0 0 0 0 0 0 ...
$ friendships_initiated: int 0 0 0 0 0 0 0 0 0 0 ...
$ likes : int 0 0 0 0 0 0 0 0 0 0 ...
$ likes_received : int 0 0 0 0 0 0 0 0 0 0 ...
$ mobile_likes : int 0 0 0 0 0 0 0 0 0 0 ...
$ mobile_likes_received: int 0 0 0 0 0 0 0 0 0 0 ...
$ www_likes : int 0 0 0 0 0 0 0 0 0 0 ...
$ www_likes_received : int 0 0 0 0 0 0 0 0 0 0 ...
pf <- pseudo_facebook
is.factor(pf$dob_month)
[1] TRUE
str(pf)
'data.frame': 99003 obs. of 15 variables:
$ userid : int 2094382 1192601 2083884 1203168 1733186 1524765 1136133 1680361 1365174 1712567 ...
$ age : int 14 14 14 14 14 14 13 13 13 13 ...
$ dob_day : int 19 2 16 25 4 1 14 4 1 2 ...
$ dob_year : int 1999 1999 1999 1999 1999 1999 2000 2000 2000 2000 ...
$ dob_month : Factor w/ 12 levels "ian","feb","mar",..: 11 11 11 12 12 12 1 1 1 2 ...
$ gender : Factor w/ 2 levels "female","male": 2 1 2 1 2 2 2 1 2 2 ...
$ tenure : int 266 6 13 93 82 15 12 0 81 171 ...
$ friend_count : int 0 0 0 0 0 0 0 0 0 0 ...
$ friendships_initiated: int 0 0 0 0 0 0 0 0 0 0 ...
$ likes : int 0 0 0 0 0 0 0 0 0 0 ...
$ likes_received : int 0 0 0 0 0 0 0 0 0 0 ...
$ mobile_likes : int 0 0 0 0 0 0 0 0 0 0 ...
$ mobile_likes_received: int 0 0 0 0 0 0 0 0 0 0 ...
$ www_likes : int 0 0 0 0 0 0 0 0 0 0 ...
$ www_likes_received : int 0 0 0 0 0 0 0 0 0 0 ...
library(ggplot2)
library(gridExtra)
package <U+393C><U+3E31>gridExtra<U+393C><U+3E32> was built under R version 3.5.3
g1 <- qplot(pf$likes)
g2 <- qplot(pf$www_likes)
g3 <- qplot(pf$mobile_likes)
g1
g2
g3
grid.arrange(g1,g2,g3, ncol=3)
g1 <- qplot(likes, data=pf)+
facet_wrap(~dob_month, ncol=4)
g2 <- qplot(www_likes, data=pf)+
facet_wrap(~dob_month, ncol=4)
g3 <- qplot(mobile_likes, data=pf)+
facet_wrap(~dob_month, ncol=4)
g1
g2
g3
options(repr.plot.width=5, repr.plot.height=20)
grid.arrange(g1,g2,g3,ncol=1)
g4 <- qplot(y=www_likes, data=pf, geom="boxplot")
g4
qplot(y=www_likes, data=pf, x=dob_month, geom="boxplot")
qplot(y=www_likes, data=pf, x=dob_month, geom="boxplot")+
coord_cartesian(ylim=c(0,20))
qplot(x=www_likes, data=pf, geom="freqpoly")
qplot(x=www_likes, data=pf, geom="freqpoly", color=dob_month)