setwd("e:/Dropbox (Personal)/FSEGA/cursuri/2018-2019/R/cursuri")
pf <- read.csv('pseudo_facebook.tsv', sep='\t')
library(ggplot2)
library(gridExtra)
options(repr.plot.width=4.5, repr.plot.height=2.5)
Exemplu: am vrea sa stim daca cineva a folosit sau nu mobilul pentru a accesa facebook
summary(pf$mobile_likes)
summary(pf$mobile_likes>0)
mobile_check_in <- NA
mobile_check_in <- pf$mobile_likes>0
summary(mobile_check_in)
sau direct in pf:
pf$mobile_check_in <- ifelse(pf$mobile_likes>0,1,0)
str(pf)
?ifelse
si apoi o convertesc in factor:
pf$mobile_check_in <- factor(pf$mobile_check_in, labels=c('no','yes'))
summary(pf$mobile_check_in)
levels(pf$mobile_check_in)
qplot(x=age, y=friend_count, data=pf)
qplot(age, friend_count, data=pf)
qplot(friend_count, age, data=pf)
sau cu ggplot:
ggplot(aes(x=age, y=friend_count), data=pf)+
geom_point()
ggplot(aes(x=age, y=friend_count), data=pf)+
geom_point(alpha=1/10)
ggplot(aes(x=age, y=friend_count), data=pf)+
geom_point(alpha=1/30)
alpha = 1/20 : transparenta punctelor: 20 de puncte suprapuse apar complet colorate, un punct are transparenta 1/20
ggplot(aes(x=age, y=friend_count), data=pf)+
geom_point(alpha=1/20)+
xlim(13,90)
ggplot(aes(x=age, y=friend_count), data=pf)+
geom_point(alpha=1/20)+
xlim(13,90)+
geom_jitter(alpha=1/20)
geom_jitter() adauga un zgomot pentru a reduce aspectul de 'linii'
# acelasi lucru, alta variabila
ggplot(aes(x=age, y=friendships_initiated), data=pf)+
geom_point()+
xlim(13,90)
ggplot(aes(x=age, y=friendships_initiated), data=pf)+
geom_point(aes(color=I("coral")))+
xlim(13,90)
ggplot(aes(x=age, y=friendships_initiated), data=pf)+
geom_point(aes(color=gender))+
xlim(13,90)+
xlab('explicatie axa X')+
ylab('explicatie axa Y')+
ggtitle('titlu grafic', subtitle = 'subtitlu')+
labs(caption="si inca o explicatie")
install.packages('dplyr', repos = "http://cran.us.r-project.org", dependencies=T)
library(dplyr)
Grupam datele:
c4 <- read.csv('curs4.csv')
c4
grupat <- group_by(c4,Var1)
grupat
str(grupat)
grupe_varsta <- group_by(pf,age)
str(grupe_varsta)
si apoi calculam statisticile dorite pentru fiecare grup:
c4sumar <- summarise(grupat,
media=mean(Var2),
numar=n(),
suma=sum(Var2),
mediana=median(Var2)
)
c4sumar
str(c4sumar)
pf.fc_age <- summarise(grupe_varsta,
friend_count_media=mean(friend_count),
friend_count_mediana=median(friend_count),
n=n())
str(pf.fc_age)
head(pf.fc_age)
Daca e nevoie il ordonam:
c4sumar <- arrange(c4sumar,numar)
c4sumar
c4sumar <- arrange(c4sumar,desc(numar))
c4sumar
pf.fc_age <- arrange(pf.fc_age,age)
tail(pf.fc_age)
Alternativ, putem realiza totul dintr-o instructiune:
pf.fc_age_nou <- pf %>%
group_by(age) %>%
summarise(friend_count_medie=mean(friend_count),
friend_count_mediana=median(friend_count),
n=n()) %>%
arrange(age)
head(pf.fc_age_nou)
Acum putem reprezenta grafic media friend_count /age
ggplot(aes(x=age, y=friend_count_mediana), data=pf.fc_age)+
geom_point()
ggplot(aes(x=age, y=friend_count_media), data=pf.fc_age)+
geom_line()
ggplot(aes(x=age, y=friend_count_media), data=pf.fc_age)+
geom_line(color='coral')
adaugam media pe grafic
ggplot(aes(x=age, y=friend_count), data=pf)+
geom_point(alpha=1/10, color='orange')+
xlim(13,90)+
geom_line(stat='summary', fun.y=mean)
sau mediana
ggplot(aes(x=age, y=friend_count), data=pf)+
geom_point(alpha=1/10, color='orange')+
xlim(13,90)+
geom_line(stat='summary', fun.y=median)
sau amandoua
ggplot(aes(x=age, y=friend_count), data=pf)+
geom_point(alpha=1/10, color='orange')+
xlim(13,90)+
geom_line(stat='summary', fun.y=median)+
geom_line(stat='summary', fun.y=mean, color='red')
si peste ele si quantile
ggplot(aes(x=age, y=friend_count), data=pf)+
geom_point(alpha=1/10, color='orange')+
xlim(13,90)+
geom_line(stat='summary', fun.y=median)+
geom_line(stat='summary', fun.y=mean, color='red')+
geom_line(stat='summary', fun.y=quantile, fun.args=list(probs=0.1), linetype=2, color='blue')
summary(pf$friend_count)
quantile(pf$friend_count, probs=0.25)
quantile(pf$friend_count, probs=c(0.1,0.25,0.5,0.75,0.90))
ggplot(aes(x=age, y=friend_count), data=pf)+
geom_point(alpha=1/10, color='orange')+
xlim(13,90)+
geom_line(stat='summary', fun.y=median)+
geom_line(stat='summary', fun.y=mean, color='red')+
geom_line(stat='summary', fun.y=quantile, fun.args=list(probs=0.1), linetype=2, color='blue')+
geom_line(stat='summary', fun.y=quantile, fun.args=list(probs=0.9), linetype=2, color='blue')
ggplot(aes(x=age, y=friend_count), data=pf)+
geom_point(alpha=1/10, color='orange')+
geom_line(stat='summary', fun.y=median)+
geom_line(stat='summary', fun.y=mean, color='red')+
geom_line(stat='summary', fun.y=quantile, fun.args=list(probs=0.1), linetype=2, color='blue')+
geom_line(stat='summary', fun.y=quantile, fun.args=list(probs=0.9), linetype=2, color='blue')+
coord_cartesian(ylim=c(0,1000), xlim=c(13,90))
Cu tabelul pseudo_facebook:
str(pf)