There Were 50 or More Warnings

There were 50 or more warnings (use warnings() to see the first 50)
> walk2(.x = "realDonaldTrump", .y = df_grouped$data, .f =

wordcloud_custom)
[1] "realDonaldTrump"
> df_grouped <- tweets_tidy %>% group_by(autor, token) %>% count(token)
%>%
+ group_by(autor) %>% mutate(frecuencia = n / n()) %>%
+ arrange(autor, desc(frecuencia)) %>% nest() %>%
+ filter(autor == "sebastianpinera")
> walk2(.x = "sebastianpinera", .y = df_grouped$data, .f =
wordcloud_custom)
[1] "sebastianpinera"
> df_grouped <- tweets_tidy %>% group_by(autor, token) %>% count(token)
%>%
+ filter(autor == "fgcabezadevaca")
>
> walk2(.x = "fgcabezadevaca", .y = df_grouped$data, .f =
wordcloud_custom)
[1] "fgcabezadevaca"
> nubesebastian<-df_grouped <- tweets_tidy %>% group_by(autor, token) %>%
count(token) %>%
> walk2(.x = "sebastianpinera", .y = df_grouped$data, .f =
wordcloud_custom)
> nubecabezadevaca<-df_grouped <- tweets_tidy %>% group_by(autor, token)
%>% count(token) %>%
> walk2(.x = "fgcabezadevaca", .y = df_grouped$data, .f =
wordcloud_custom)
> Rsebastian<-df_grouped <- tweets_tidy %>% group_by(autor, token) %>%
count(token) %>%
> imagenSebastian<-walk2(.x = "sebastianpinera", .y = df_grouped$data, .f
= wordcloud_custom)
>
> Rcabezadevaca<-df_grouped <- tweets_tidy %>% group_by(autor, token) %>%
count(token) %>%
>
> imagenCabeza<-walk2(.x = "fgcabezadevaca", .y = df_grouped$data, .f =
wordcloud_custom)
> wordcloud_custom <- function(grupo, df){
+ print(grupo)
+ wordcloud(words = df$token, freq = df$frecuencia,
+ max.words = 400, random.order = FALSE, rot.per = 0.35,
+ colors = brewer.pal(8, "Dark2"))
+ }
>
> Rsebastian<-df_grouped <- tweets_tidy %>% group_by(autor, token) %>%
count(token) %>%
> imagenSebastian<-walk2(.x = "sebastianpinera", .y = df_grouped$data, .f
= wordcloud_custom)
>
> Rcabezadevaca<-df_grouped <- tweets_tidy %>% group_by(autor, token) %>%
count(token) %>%
>
> imagenCabeza<-walk2(.x = "fgcabezadevaca", .y = df_grouped$data, .f =
wordcloud_custom)
>
>
>
> RLopez<-df_grouped <- tweets_tidy %>% group_by(autor, token) %>%
count(token) %>%
+ filter(autor == "lopezobrador_")
>
> imagenLopez<-walk2(.x = "lopezobrador_", .y = df_grouped$data, .f =
wordcloud_custom)
[1] "lopezobrador_"
>
>
>
> RBarack<-df_grouped <- tweets_tidy %>% group_by(autor, token) %>%
count(token) %>%
+ filter(autor == "BarackObama")
>
> imagenBarack<-walk2(.x = "BarackObama", .y = df_grouped$data, .f =
wordcloud_custom)
[1] "BarackObama"
>
>
>
> RTrump<-df_grouped <- tweets_tidy %>% group_by(autor, token) %>%
count(token) %>%
+ filter(autor == "realDonaldTrump")
>
> imagenTrump<-walk2(.x = "realDonaldTrump", .y = df_grouped$data, .f =
wordcloud_custom)
[1] "realDonaldTrump"
> # Sentimientos:
> tweets_sent <- inner_join(x = tweets_tidy, y = sentimientos, by =
c("token" = "Palabra"))
> tweets_sent
# A tibble: 7,006 x 6
autor fecha tweet_id token
Puntuacion Word
<chr> <dttm> <chr> <chr>
<int> <chr>
1 sebastianpinera 2019-10-31 21:25:17 1190016918826172424 realizar
2 accomplish
2 sebastianpinera 2019-10-31 21:25:17 1190016918826172424 firme
2 steadfast
3 sebastianpinera 2019-10-31 21:25:17 1190016918826172424 compromiso
2 commitment
4 sebastianpinera 2019-10-31 21:25:17 1190016918826172424 lucha
-1 fight
-2 struggle
6 sebastianpinera 2019-10-31 21:25:17 1190016918826172424 solución
1 solution
7 sebastianpinera 2019-10-31 00:03:19 1189694302659723266 aprobación
2 approval
2 endorsement
9 sebastianpinera 2019-10-30 18:04:44 1189604062142091265 caos
-2 chaos
10 sebastianpinera 2019-10-30 18:04:44 1189604062142091265 paz
2 peace
# ... with 6,996 more rows
> tweets_sent %>% group_by(autor, tweet_id) %>%
+ summarise(sentimiento_promedio = sum(Puntuacion)) %>%
+ head()
# A tibble: 6 x 3
# Groups: autor [1]
autor tweet_id sentimiento_promedio
<chr> <chr> <int>
1 BarackObama 1001131488103030784 4
2 BarackObama 1030129887623958538 3
3 BarackObama 1039512025406349312 1
4 BarackObama 1046803503988006912 4
5 BarackObama 1056303165610618880 -1
6 BarackObama 1059517324053622785 -3
+ group_by(autor) %>%
+ summarise(positivos = 100 * sum(sentimiento_promedio > 0) / n(),
+ neutros = 100 * sum(sentimiento_promedio == 0) / n(),
+ negativos = 100 * sum(sentimiento_promedio < 0) / n())
# A tibble: 5 x 4
autor positivos neutros negativos
<chr> <dbl> <dbl> <dbl>
1 BarackObama 51.2 0 48.8
2 fgcabezadevaca 85.9 2.99 11.1
3 lopezobrador_ 59.3 3.60 37.1
4 realDonaldTrump 57.1 0 42.9
5 sebastianpinera 73.7 4.11 22.2
> tweets_sent_2 <- tweets_sent
> tweets_sent_2 <-
+ tweets_sent_2 %>%
+ mutate(Puntuacion = ifelse(Puntuacion > 0, "Positiva", "Negativa"))
> tweets_sent_2 <-
+ tweets_sent_2 %>%
+ filter(token != "no")
> # Word Cloud:
> tweets_sent_2 %>% group_by(autor) %>%
+ filter(autor == "fgcabezadevaca") %>%
+ count(token, Puntuacion, sort = TRUE) %>%
+ acast(token ~ Puntuacion, value.var = "n", fill = 0) %>%
+ comparison.cloud(colors = c("indianred1", "lightseagreen"),
+ max.words = 100)
+ filter(autor == "lopezobrador_") %>%
+ max.words = 100)
There were 39 warnings (use warnings() to see them)
+ filter(autor == "sebastianpinera") %>%
+ max.words = 100)
+ filter(autor == "BarackObama") %>%
+ max.words = 100)
+ filter(autor == "realDonaldTrump") %>%
+ max.words = 100)
c("token" = "Word"))
> tweets_sent
# A tibble: 4,348 x 6
Palabra Puntuacion
<chr> <int>
1 sebastianpinera 2019-10-30 21:04:15 1189649235739860992 difficult
difícil -1
2 sebastianpinera 2019-10-30 21:04:15 1189649235739860992 great
estupendo 3
3 sebastianpinera 2019-10-30 21:04:15 1189649235739860992 pain
dolor -2
4 sebastianpinera 2019-10-30 21:04:15 1189649235739860992 cancel
cancelar -1
5 sebastianpinera 2019-10-30 21:04:15 1189649235739860992 urgent
urgente -1
6 sebastianpinera 2019-10-30 21:04:15 1189649235739860992 solutions
soluciones 1
7 sebastianpinera 2019-10-30 21:04:15 1189649235739860992 demands
demandas -1
8 sebastianpinera 2019-10-28 11:34:14 1188781012966359040 favor
favor 2
9 sebastianpinera 2019-10-04 19:13:04 1180199174861705216 big
grande 1
10 sebastianpinera 2019-09-28 16:12:22 1177979372013342720 criminal
criminal -3
+ head()
# A tibble: 6 x 3
# Groups: autor [1]
<chr> <chr> <int>
1 BarackObama 1001131488103030784 4
2 BarackObama 1002573982212788224 -5
3 BarackObama 1005117568913412098 -2
4 BarackObama 1006554191676964864 13
5 BarackObama 1017766186296193024 9
6 BarackObama 1019617669958586368 7
# A tibble: 5 x 4
1 BarackObama 76.5 4.31 19.2
2 fgcabezadevaca 93.6 0 6.38
3 lopezobrador_ 64.2 0 35.8
4 realDonaldTrump 59.5 4.58 35.9
> tweets_sent_2 <-
+ tweets_sent_2 %>%
> tweets_sent_2 <-
+ tweets_sent_2 %>%
+ max.words = 100)
+ max.words = 100)
>
> tweets_sentEN <- inner_join(x = tweets_tidy, y = sentimientos, by =
> tweets_sent
# A tibble: 7,006 x 6
Puntuacion Word
<int> <chr>
2 accomplish
2 steadfast
2 commitment
-1 fight
-2 struggle
1 solution
2 approval
2 endorsement
-2 chaos
2 peace
+ head()
# A tibble: 6 x 3
# Groups: autor [1]
<chr> <chr> <int>
1 BarackObama 1001131488103030784 4
2 BarackObama 1030129887623958538 3
3 BarackObama 1039512025406349312 1
4 BarackObama 1046803503988006912 4
5 BarackObama 1056303165610618880 -1
6 BarackObama 1059517324053622785 -3
# A tibble: 5 x 4
> tweets_sent_2 <-
+ tweets_sent_2 %>%
> tweets_sent_2 <-
+ tweets_sent_2 %>%
>
> # Word Cloud:
+ max.words = 100)
>
+ max.words = 100)
>
+ max.words = 100)
> tweets_sent_2EN %>% group_by(autor) %>%
+ max.words = 100)
Error in eval(lhs, parent, parent) :
objeto 'tweets_sent_2EN' no encontrado
>
>
>
>
+ max.words = 100)
Error in eval(lhs, parent, parent) :
objeto 'tweets_sent_2EN' no encontrado
> # Sentimientos:
>
> tweets_sent
# A tibble: 7,006 x 6
Puntuacion Word
<int> <chr>
2 accomplish
2 steadfast
2 commitment
-1 fight
-2 struggle
1 solution
2 approval
2 endorsement
-2 chaos
2 peace
+ head()
# A tibble: 6 x 3
# Groups: autor [1]
<chr> <chr> <int>
1 BarackObama 1001131488103030784 4
2 BarackObama 1030129887623958538 3
3 BarackObama 1039512025406349312 1
4 BarackObama 1046803503988006912 4
5 BarackObama 1056303165610618880 -1
6 BarackObama 1059517324053622785 -3
> #//
>
> tweets_sentEN %>% group_by(autor, tweet_id) %>%
+ head()
# A tibble: 6 x 3
# Groups: autor [1]
<chr> <chr> <int>
1 BarackObama 1001131488103030784 4
2 BarackObama 1002573982212788224 -5
3 BarackObama 1005117568913412098 -2
4 BarackObama 1006554191676964864 13
5 BarackObama 1017766186296193024 9
6 BarackObama 1019617669958586368 7
> #//
# A tibble: 5 x 4
> #####
>
# A tibble: 5 x 4
1 BarackObama 76.5 4.31 19.2
> tweets_sent_2EN <- tweets_sent
> #####
> tweets_sent_2 <-
+ tweets_sent_2 %>%
> tweets_sent_2 <-
+ tweets_sent_2 %>%
> #####
>
>
> tweets_sent_2EN <-
+ tweets_sent_2EN %>%
>
> #####
> # Word Cloud:
+ max.words = 100)
>
+ max.words = 100)
>
+ max.words = 100)
>
>
>
+ max.words = 100)
>
>
>
>
+ max.words = 100)
> sentimiento_promedio
Error: objeto 'sentimiento_promedio' no encontrado
+ head()
# A tibble: 6 x 3
# Groups: autor [1]
<chr> <chr> <int>
1 BarackObama 1001131488103030784 4
2 BarackObama 1002573982212788224 -5
3 BarackObama 1005117568913412098 -2
4 BarackObama 1006554191676964864 13
5 BarackObama 1017766186296193024 9
6 BarackObama 1019617669958586368 7
# A tibble: 5 x 4
1 BarackObama 76.5 4.31 19.2
> tweets_sent_2EN <- tweets_sent
# A tibble: 5 x 4
1 BarackObama 76.5 4.31 19.2
> tweets_sent_2EN <- tweets_sentEN
> # Sentimientos:
>
> tweets_sent
# A tibble: 7,006 x 6
Puntuacion Word
<int> <chr>
2 accomplish
2 steadfast
2 commitment
-1 fight
-2 struggle
1 solution
2 approval
2 endorsement
-2 chaos
2 peace
+ head()
# A tibble: 6 x 3
# Groups: autor [1]
<chr> <chr> <int>
1 BarackObama 1001131488103030784 4
2 BarackObama 1030129887623958538 3
3 BarackObama 1039512025406349312 1
4 BarackObama 1046803503988006912 4
5 BarackObama 1056303165610618880 -1
6 BarackObama 1059517324053622785 -3
> #//
>
+ head()
# A tibble: 6 x 3
# Groups: autor [1]
<chr> <chr> <int>
1 BarackObama 1001131488103030784 4
2 BarackObama 1002573982212788224 -5
3 BarackObama 1005117568913412098 -2
4 BarackObama 1006554191676964864 13
5 BarackObama 1017766186296193024 9
6 BarackObama 1019617669958586368 7
> #//
# A tibble: 5 x 4
> #####
>
# A tibble: 5 x 4
1 BarackObama 76.5 4.31 19.2
> tweets_sent_2EN <- tweets_sentEN
> #####
> tweets_sent_2 <-
+ tweets_sent_2 %>%
> tweets_sent_2 <-
+ tweets_sent_2 %>%
> #####
>
>
>
> #####
> # Word Cloud:
+ max.words = 100)
>
+ max.words = 100)
>
+ max.words = 100)
>
>
>
+ max.words = 100)
>
>
>
>
+ max.words = 100)
> tweets <- bind_rows(datos_new1, datos_new2,
datos_new3,datos_new4,datos_new5)
> tweets <- tweets %>% select(user_id, status_id, screen_name,
created_at, text, source, retweet_location, country)
> tweets <- tweets %>% rename(autor = screen_name, fecha = created_at,
texto = text, tweet_id = status_id, fuente = source)
> tweets
# A tibble: 6,317 x 8
user_id tweet_id autor fecha texto
fuente retweet_location country
<chr> <chr> <chr> <dttm> <chr>
<chr> <chr> <chr>
1 13623532 1190016918~ sebast~ 2019-10-31 21:25:17 Agradezco a Pdte de
España ~ Twitter ~ NA NA
2 13623532 1189694302~ sebast~ 2019-10-31 00:03:19 Agradezco al Congreso
aproba~ Twitter ~ NA NA
3 13623532 1189649235~ sebast~ 2019-10-30 21:04:15 As President,my duty
is to p~ Twitter ~ NA NA
4 13623532 1189604062~ sebast~ 2019-10-30 18:04:44 Estas prioridades
significan~ Twitter ~ NA NA
5 13623532 1189604060~ sebast~ 2019-10-30 18:04:44 Prioridades del
Gobierno,rec~ Twitter ~ NA NA
6 13623532 1189578778~ sebast~ 2019-10-30 16:24:16 Como Pdte tengo el
deber de ~ Twitter ~ NA NA
7 13623532 1188882016~ sebast~ 2019-10-28 18:15:35 Este
#CambiodeGabinete signi~ Twitter ~ NA NA
8 13623532 1188851511~ sebast~ 2019-10-28 16:14:22 En @MindepChile asume
como M~ Twitter ~ NA NA
9 13623532 1188851090~ sebast~ 2019-10-28 16:12:42 En @MinisterioBBNN
asume com~ Twitter ~ NA NA
10 13623532 1188850546~ sebast~ 2019-10-28 16:10:32 En @MintrabChile
asume como ~ Twitter ~ NA NA
> tweets %>%
+ count(fuente, hour = hour(with_tz(fecha, "EST"))) %>%
+ mutate(percent = n / sum(n)) %>%
+ ggplot(aes(hour, percent, color = fuente)) +
+ geom_line() +
+ scale_y_continuous(labels = percent_format()) +
+ labs(x = "Hora del dia (EST)",
+ y = "% of tweets",
+ color = "")
> # Fuente: https://rud.is/books/21-recipes/extracting-tweet-
entities.html
> # Top 10 de los hastags presentes en los datos extraidos
> tweets <- bind_rows(datos_new1, datos_new2,
datos_new3,datos_new4,datos_new5)
> tweets <- tweets %>%
+ select(retweet_status_id, hashtags, retweet_count) %>%
+ unnest() %>%
+ mutate(hashtags = tolower(hashtags)) %>%
+ count(hashtags, sort=TRUE) %>%
+ filter(hashtags != "retweet_status_id") %>%
+ top_n(10)
Selecting by n
Warning message:
`cols` is now required.
Please use `cols = c(hashtags)`
> tweets
# A tibble: 10 x 2
hashtags n
<chr> <int>
1 tam 175
2 tamaulipas 155
3 doyourjob 150
4 actonclimate 132
5 cdvictoria 82
6 tiemposmejores 72
7 sotu 65
8 reynosa 56
9 getcovered 55
10 orgullotamaulipeco 54
> # Graficacion de followers en comun entre los usuarios:
> # get a list of twitter handles you want to compare
> rstaters <- c("BarackObama",
+ "lopezobrador_",
+ "fgcabezadevaca","sebastianpinera","realDonaldTrump")
> # scrape the user_id of all followers for each handle in the list and
bind into 1 dataframe
> followers <- rstaters %>%
+ map_df(~ get_followers(.x, n = 20000, retryonratelimit = TRUE) %>%
+ mutate(account = .x))
Downloading [=============>---------------------------] 33%75000
followers!
Downloading [===================>---------------------] 50%50000
followers!
Downloading [=========================================] 100%
25000 followers!
Waiting about 14.8 minutes for rate limit reset...
Downloading [=============>---------------------------] 33%75000
followers!
Downloading [===================>---------------------] 50%50000
followers!
>
> head(followers)
# A tibble: 6 x 2
user_id account
<chr> <chr>
1 998342443295739904 BarackObama
2 1190793940267495426 BarackObama
3 771505494384967680 BarackObama
4 1187831666364108800 BarackObama
5 1118647163406274560 BarackObama
6 1191353321120030722 BarackObama
>
> # get a de-duplicated list of all followers
> aRdent_followers <- unique(followers$user_id)
>
> # for each follower, get a binary indicator of whether they follow each
tweeter or not and bind to one dataframe
> binaries <- rstaters %>%
+ map_dfc(~ ifelse(aRdent_followers %in% filter(followers, account
== .x)$user_id, 1, 0) %>%
+ as.data.frame) # UpSetR doesn't like tibbles
>
> # set column names
> names(binaries) <- rstaters
>
> # have a look at the data
> glimpse(binaries)
Observations: 96,530
Variables: 5
$ BarackObama <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
$ lopezobrador_ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ fgcabezadevaca <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ sebastianpinera <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ realDonaldTrump <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...
>
> # plot the sets with UpSetR
> upset(binaries, nsets = 7, main.bar.color = "SteelBlue", sets.bar.color
= "DarkCyan",
+ sets.x.label = "Follower Count", text.scale = c(rep(1.4, 5), 1),
order.by = "freq")

There Were 50 or More Warnings

Uploaded by

Copyright:

Available Formats

There Were 50 or More Warnings

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

There Were 50 or More Warnings

Uploaded by

Copyright:

Available Formats

There were 50 or more warnings (use warnings() to see the first 50)

> walk2(.x = "realDonaldTrump", .y = df_grouped$data, .f =

You might also like

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.