Last updated: 2018-01-02
Code version: 800f055
source(paste0(here::here(),"/R/depends.R"))
Loading required package: pacman
source(paste0(here::here(),"/R/mount.R"))
source(paste0(here::here(),"/R/extract-word-freq_functions.R"))
sessionInfo()
R version 3.4.2 (2017-09-28)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.1
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] secret_1.0.0 entropy_1.2.1 lubridate_1.7.1
[4] here_0.1 tidytext_0.1.5 dbplyr_1.1.0
[7] forcats_0.2.0 stringr_1.2.0 dplyr_0.7.4
[10] purrr_0.2.4 readr_1.1.1 tidyr_0.7.2
[13] tibble_1.3.4 ggplot2_2.2.1.9000 tidyverse_1.2.1
[16] RSQLite_2.0 DBI_0.7 pacman_0.4.6
loaded via a namespace (and not attached):
[1] Rcpp_0.12.14 lattice_0.20-35 assertthat_0.2.0
[4] rprojroot_1.2 digest_0.6.12 psych_1.7.8
[7] R6_2.2.2 cellranger_1.1.0 plyr_1.8.4
[10] backports_1.1.1 evaluate_0.10.1 httr_1.3.1
[13] rlang_0.1.4 curl_3.0 lazyeval_0.2.1
[16] readxl_1.0.0 rstudioapi_0.7 blob_1.1.0
[19] Matrix_1.2-12 rmarkdown_1.8 foreign_0.8-69
[22] bit_1.1-12 munsell_0.4.3 broom_0.4.2
[25] compiler_3.4.2 janeaustenr_0.1.5 modelr_0.1.1
[28] pkgconfig_2.0.1 mnormt_1.5-5 htmltools_0.3.6
[31] openssl_0.9.9 crayon_1.3.4 SnowballC_0.5.1
[34] grid_3.4.2 nlme_3.1-131 jsonlite_1.5
[37] gtable_0.2.0 git2r_0.19.0 magrittr_1.5
[40] scales_0.5.0.9000 tokenizers_0.1.4 cli_1.0.0
[43] stringi_1.1.6 reshape2_1.4.2 bindrcpp_0.2
[46] xml2_1.1.1 tools_3.4.2 bit64_0.9-7
[49] glue_1.2.0 hms_0.4.0 parallel_3.4.2
[52] yaml_2.1.15 colorspace_1.3-2 rvest_0.3.2
[55] memoise_1.1.0 knitr_1.17 bindr_0.1
[58] haven_1.1.0
wkcase
(ie whether working / accessing data locally or on the cluster)
args <- commandArgs(trailingOnly = TRUE)
if(length(args) != 0) {
} else {
source(paste0(here::here(), "/params/extract_word_freq.R"))
}
Mount volume if required. Set up output directory (out_dir
)
switch(wkcase,
"smb_local" = {
mount_ooominds1_volume()
db <- dbConnect(RSQLite::SQLite(),
dbname = "~/../../Volumes/ooominds1/Shared/corpus.byu.edu/a1517_now/now_db")
out_dir <- "~/../../Volumes/ooominds1/Shared/corpus.byu.edu/extraction_out/data/"
},
"local_local" = {
mount_ooominds1_volume()
db <- dbConnect(RSQLite::SQLite(),
dbname = paste0(here::here(), "/data/now_db/now_db"))
out_dir <- "~/../../Volumes/ooominds1/Shared/corpus.byu.edu/extraction_out/data/"
},
"smb_sharc" = {
db <- dbConnect(RSQLite::SQLite(),
dbname = "shared/ooominds1/Shared/corpus.byu.edu/a1517_now/now_db")
out_dir <- "shared/ooominds1/Shared/corpus.byu.edu/extraction_out/data/"
}
)
dbListTables(db)
[1] "corpus" "del_lexicon" "lexicon" "samples" "sources"
[6] "targets"
dbListFields(db, "sources")
[1] "textID" "ID" "date" "country" "source" "url"
[7] "textTitle"
source_list <- dbGetQuery(db, 'SELECT DISTINCT source FROM sources') %>% arrange(source)
head(source_list, 50)
# A tibble: 50 x 1
source
<chr>
1 'Bourne this way
2 (e) Science News
3 (e) Science News (press release) (registration)
4 @U2
5 /FILM
6 10 Connects
7 100 Mile Free Press
8 100 Mile House Free Press
9 100.7 WZLX Classic Rock
10 1011now
# ... with 40 more rows
I’ve also created a csv containing the names of the target publications we are interested in extracting article data from.
target_sources <- read_csv(paste0(here::here(), "/data/target_sources.csv"))
target_sources
# A tibble: 31 x 2
source country
<chr> <chr>
1 Daily Mail UK
2 The Guardian UK
3 The Independent UK
4 Financial Times UK
5 Independent UK
6 Metro UK
7 Evening Standard UK
8 The National UK
9 Daily Star UK
10 The Courier UK
# ... with 21 more rows
First, I check that all target sources match a source in the source_list
df.
stopifnot(all(target_sources$source %in% source_list$source))
I then write the df of target_sources
to a table in the database.
dbWriteTable(db, "targets", target_sources, overwrite = T)
This allows fast extraction of article level metadata from the sources
table by using an inner join query. Using this query I extract information on all available articles from the target sources into target_subset
tbl.
target_subset <- dbGetQuery(db, "select sources.* from targets inner join sources on sources.source = targets.source;") %>% filter(country %in% c("GB", "US"))
I’ve written target_subset
to file to do some initial exploration of article level data availability from the different time periods discussed (see report: r01-article_availability
)
target_subset %>% write_csv(paste0(here::here(),"/data/target_subset.csv"))
Next I sampled from the available articles (ie by textID
). I set the seed to maintain reproducibility of sampling and extraction. The function calculates the mininum article count per time period (min_n
). Then, min_n
number of articles are sampled from each time period.
set.seed(171124)
samples <- sample_textIDs(target_subset)
Randomly selected weeks starting:
2010-03-07 2011-04-24 2012-01-15 2012-09-23 2012-12-30 2013-01-20 2014-09-07 2014-12-14 2016-04-24
***
Available samples per time period:
# A tibble: 4 x 2
# Groups: time_p [4]
time_p n
<chr> <int>
1 brexit 11668
2 contin 316857
3 us_elec 6447
4 weeks 7980
min n: 6447
samples
# A tibble: 25,788 x 3
# Groups: time_p [4]
textID time_p source
<int> <chr> <chr>
1 8995204 brexit Daily Mail
2 8995215 brexit Daily Mail
3 8995230 brexit Daily Mail
4 8995241 brexit Daily Mail
5 8995265 brexit Daily Mail
6 8996091 brexit Daily Mail
7 8996099 brexit Daily Mail
8 8996117 brexit Daily Mail
9 8996135 brexit Daily Mail
10 8997934 brexit Daily Mail
# ... with 25,778 more rows
I write this table to the database too, again to enable extraction of all words in the corpus associated with the articles.
dbWriteTable(db, "samples", samples, overwrite = T)
corpus_spls <- dbGetQuery(db, "SELECT samples.textID, samples.time_p, samples.source, corpus.wordID, lexicon.word, lexicon.lemma, lexicon.PoS FROM samples INNER JOIN corpus ON corpus.textID = samples.textID INNER JOIN lexicon ON lexicon.wordID = corpus.wordID;")
At this stage, the sampled corpus is cleaned of stop words, is trimmed to words with word frequencies equal or greater than minfreq
(defaults to 5) and calculates nn
, counts per source
per time_p
.
In column total_n
I also calculate the total word count per source
per time_p
which is then used to calculate cpm
, count per million words. This is for your reference only at the minute as, my understanding is that, the shrinkage applied later does not require scaling of the data (shrinkage itself scales the data to between 0-1) so I’m currently just using the raw counts.
corpus_counts <- get_word_counts(corpus_spls)
Joining, by = "word"
Joining, by = "word"
corpus_counts
# A tibble: 1,139,985 x 6
word time_p source nn total_n cpm
<chr> <chr> <chr> <int> <int> <int>
1 people us_elec Daily Mail 3534 896505 3941
2 time us_elec Daily Mail 3316 896505 3698
3 share us_elec Daily Mail 2911 896505 3247
4 people weeks Daily Mail 2625 694983 3777
5 people brexit Daily Mail 2516 613985 4097
6 time weeks Daily Mail 2491 694983 3584
7 told us_elec Daily Mail 2358 896505 2630
8 people contin Daily Mail 2313 600013 3854
9 home us_elec Daily Mail 2267 896505 2528
10 children us_elec Daily Mail 2165 896505 2414
# ... with 1,139,975 more rows
Finally, I reshape the df into a wide format, with each source in a column and each row a word contained in the overall sampled corpus (ie the complete word list across all time_p
and sources
).
wide_counts <- widen_word_counts(corpus_counts, value = "nn")
wide_counts %>% order_wide %>% head(150)
# A tibble: 150 x 33
# Groups: time_p [4]
word time_p `Baltimore Sun` `Chicago Tribune` `Daily Mail`
<fctr> <chr> <dbl> <dbl> <dbl>
1 people us_elec 36 37 3534
2 people weeks 75 103 2625
3 people brexit 64 388 2516
4 people contin 50 96 2313
5 time us_elec 31 32 3316
6 time weeks 51 72 2491
7 time contin 22 65 2155
8 time brexit 40 276 2115
9 police brexit 141 464 1996
10 world weeks 5 48 1436
# ... with 140 more rows, and 28 more variables: `Daily Star` <dbl>,
# `Dallas Morning News` <dbl>, `Evening Standard` <dbl>, `Financial
# Times` <dbl>, `Los Angeles Times` <dbl>, Metro <dbl>, `Milwaukee
# Journal Sentinel` <dbl>, `Minneapolis Star Tribune` <dbl>, `National
# Geographic` <dbl>, `New York Daily News` <dbl>, `New York
# Magazine` <dbl>, `New York Post` <dbl>, Politico <dbl>, `The
# Atlantic` <dbl>, `The Courier` <dbl>, `The Guardian` <dbl>, `The
# Independent` <dbl>, `The National` <dbl>, `The New Yorker` <dbl>, `The
# Seattle Times` <dbl>, `The Week Magazine` <dbl>, TIME <dbl>, `Wall
# Street Journal` <dbl>, `Washington Times` <dbl>, Independent <dbl>,
# `The Sun` <dbl>, `The Times` <dbl>, `Washington Post` <dbl>
In the end I have applied the shrinkage to each time_p
(timeperiod) x source
separately. In our original discussion we had agreed to apply the smoothing at the source level. But because the smoothing is applied to counts, not raw data, and the counts we are interested are counts per time_p
per source
, trying to apply smoothing at the source level meant that for each source, 4 separate counts (one for each time_p
) were smoothed for each word. That just felt a bit dodgy statistically but if you feel that approach is justified it is easy to change. Just let me know.
Also I used the r package entropy
produced by the authors of, and accompanying the paper you supplied.
Jean Hausser and Korbinian Strimmer (2014). entropy: Estimation of Entropy, Mutual Information and Related Quantities. R package version 1.2.1. https://CRAN.R-project.org/package=entropy
wide_shrunk_freqs <- shrink_wide(wide_counts)
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0682
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.026
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.005
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0852
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0455
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0276
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.116
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0226
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0434
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1595
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0503
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.2241
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0508
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1596
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0774
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0413
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.034
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.3733
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0115
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.009
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0426
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.121
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0543
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.135
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0155
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0721
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0423
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1081
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0886
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0055
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.264
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0509
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0839
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0632
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0425
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0631
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1272
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1357
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1003
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0931
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1479
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1439
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1442
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0229
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.2635
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0096
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0146
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1866
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1557
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1262
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1775
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0434
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0467
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.144
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0767
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.3776
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.2741
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0863
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1461
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1593
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0036
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.4239
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0571
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0386
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0459
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1221
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0572
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1126
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1524
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0314
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0652
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.174
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1614
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.2062
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.009
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.6883
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0102
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0822
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1671
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0187
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0318
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0697
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0862
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.5736
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0756
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0732
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0048
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.2382
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0623
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0832
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0562
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.053
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0671
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1084
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1173
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0559
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0746
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1741
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1242
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1832
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0239
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.2598
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0101
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0136
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.2001
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1618
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0915
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1533
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0357
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.04
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1055
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.1781
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.2212
Estimating optimal shrinkage intensity lambda.freq (frequencies): 0.0983
Note that the counts are now converted to frequencies, totalling 1 for each source
x time_p
. This then introduces re-scaling as well. To get to counts per million we would just need to multiply with 1,000,000. If you prefer this (ie the data in count per million) let me know and I’ll add this step to the analysis. I’ve also left th
wide_shrunk_freqs %>% order_wide %>% head(150)
# A tibble: 150 x 33
word time_p `Baltimore Sun` `Chicago Tribune` `Daily Mail`
<fctr> <chr> <dbl> <dbl> <dbl>
1 people brexit 4.126409e-03 0.004723244 0.0040774327
2 people weeks 4.283548e-03 0.003395721 0.0037591381
3 people contin 3.930650e-03 0.003805380 0.0038337098
4 time contin 1.730356e-03 0.002576971 0.0035718362
5 people us_elec 3.640010e-03 0.002831998 0.0039277946
6 time brexit 2.579373e-03 0.003359941 0.0034275831
7 time weeks 2.913160e-03 0.002374025 0.0035672465
8 trump brexit 9.808012e-07 0.003128666 0.0007001592
9 police brexit 9.089815e-03 0.005648342 0.0032347349
10 time us_elec 3.134745e-03 0.002449605 0.0036855060
# ... with 140 more rows, and 28 more variables: `Daily Star` <dbl>,
# `Dallas Morning News` <dbl>, `Evening Standard` <dbl>, `Financial
# Times` <dbl>, `Los Angeles Times` <dbl>, Metro <dbl>, `Milwaukee
# Journal Sentinel` <dbl>, `Minneapolis Star Tribune` <dbl>, `National
# Geographic` <dbl>, `New York Daily News` <dbl>, `New York
# Magazine` <dbl>, `New York Post` <dbl>, Politico <dbl>, `The
# Atlantic` <dbl>, `The Courier` <dbl>, `The Guardian` <dbl>, `The
# Independent` <dbl>, `The National` <dbl>, `The New Yorker` <dbl>, `The
# Seattle Times` <dbl>, `The Week Magazine` <dbl>, TIME <dbl>, `Wall
# Street Journal` <dbl>, `Washington Times` <dbl>, Independent <dbl>,
# `The Sun` <dbl>, `The Times` <dbl>, `Washington Post` <dbl>
Create out_dir
directory (if it doesn’t exist already)
dir.create(out_dir, recursive = T)
Warning in dir.create(out_dir, recursive = T): '/Users/Anna/../../Volumes/
ooominds1/Shared/corpus.byu.edu/extraction_out/data' already exists
Split wide df into the four time_p
and write to csv individually.
split(wide_shrunk, factor(wide_shrunk$time_p)) %>% map(~write_csv(.x, paste0(out_dir, unique(.x$time_p), "_freqs.csv")))
Write the full wide_shrunk_freqs
and the long corpus_counts
dfs to .csv
write_csv(wide_shrunk_freqs, paste0(out_dir, "all_freqs.csv"))
write_csv(corpus_counts, paste0(out_dir, "all_counts_long.csv")
Reminder of where the files are:
out_dir
[1] "~/../../Volumes/ooominds1/Shared/corpus.byu.edu/extraction_out/data/"
.Rmd
into an .R
rscript ro run on the cluster.knitr::purl(paste0(here::here(),"/analysis/extract_word_freq.Rmd"), paste0(here::here(),"/code/m04_extract_word_freq.R"), documentation = 0)
|
| | 0%
|
|. | 1%
|
|.. | 3%
|
|.. | 4%
|
|... | 5%
|
|.... | 6%
|
|..... | 8%
|
|...... | 9%
|
|....... | 10%
|
|....... | 11%
|
|........ | 13%
|
|......... | 14%
|
|.......... | 15%
|
|........... | 16%
|
|............ | 18%
|
|............ | 19%
|
|............. | 20%
|
|.............. | 22%
|
|............... | 23%
|
|................ | 24%
|
|................ | 25%
|
|................. | 27%
|
|.................. | 28%
|
|................... | 29%
|
|.................... | 30%
|
|..................... | 32%
|
|..................... | 33%
|
|...................... | 34%
|
|....................... | 35%
|
|........................ | 37%
|
|......................... | 38%
|
|.......................... | 39%
|
|.......................... | 41%
|
|........................... | 42%
|
|............................ | 43%
|
|............................. | 44%
|
|.............................. | 46%
|
|.............................. | 47%
|
|............................... | 48%
|
|................................ | 49%
|
|................................. | 51%
|
|.................................. | 52%
|
|................................... | 53%
|
|................................... | 54%
|
|.................................... | 56%
|
|..................................... | 57%
|
|...................................... | 58%
|
|....................................... | 59%
|
|....................................... | 61%
|
|........................................ | 62%
|
|......................................... | 63%
|
|.......................................... | 65%
|
|........................................... | 66%
|
|............................................ | 67%
|
|............................................ | 68%
|
|............................................. | 70%
|
|.............................................. | 71%
|
|............................................... | 72%
|
|................................................ | 73%
|
|................................................. | 75%
|
|................................................. | 76%
|
|.................................................. | 77%
|
|................................................... | 78%
|
|.................................................... | 80%
|
|..................................................... | 81%
|
|..................................................... | 82%
|
|...................................................... | 84%
|
|....................................................... | 85%
|
|........................................................ | 86%
|
|......................................................... | 87%
|
|.......................................................... | 89%
|
|.......................................................... | 90%
|
|........................................................... | 91%
|
|............................................................ | 92%
|
|............................................................. | 94%
|
|.............................................................. | 95%
|
|............................................................... | 96%
|
|............................................................... | 97%
|
|................................................................ | 99%
|
|.................................................................| 100%
[1] "/Users/Anna/Documents/workflows/RSE_clients/news-scrape/code/m04_extract_word_freq.R"
readLines(paste0(here::here(),"/code/m04_extract_word_freq.R")) %>%
gsub("##", "", .) %>%
writeLines(paste0(here::here(),"/code/m04_extract_word_freq.R"))
This R Markdown site was created with workflowr