############################# # Making category line plots ############################# library(tidyverse) # library(topicmodels) #install in ubuntu libgsl-dev and gsl-bin source("./R/expectation_functions.R") # Arguments path <- "./Data/ProcessedQueries/References/" path.plots <- "./Rocio/Plots/" path_processed_dictionaries <- "./Data/Dictionary/Papers-Term/" path_dictionary_info <- "./Data/Dictionary/" papers <- read.csv(file = paste0(path,"cleaned_papers_all_years_simple.csv"),stringsAsFactors = FALSE) data_decade <- papers %>% filter(pubyear > 2008 & pubyear < 2019) ######## Category ############# dictionary <- "Taxonomy" corrected_df <- expectation_functions(dictionary, data_all = data_decade, num_cat=NULL, ini_cat=1, paper_cat_out=FALSE, filter_lines=FALSE, suffix=NULL, path_processed_dictionaries) values_prop <- sort(unique(corrected_df$prop_papers)) values_breaks <- seq(from=0,to=max(values_prop)+0.1,by=0.1) values_year <- seq(from=min(corrected_df$year), to=max(corrected_df$year),by=1) ############################## # Plotting ############################## # We're going to plot our lines and then adjust the color, alpha, and linetype to better read the data plot_df <- corrected_df head(plot_df) # Run a quick linear model to measure which trend lines are positive or negative # we'll reference this when we choose our colors here <- by(plot_df, plot_df$category, function(x) lm(x$prop_papers ~ x$year)$coefficients[2] ) plot_df$category <- factor(plot_df$category, levels= names(sort(here))) # Create a grouping variable based on this value grouping <- data.frame(category = c(names(here)[here<=0.003 & here>=(-0.003)],names(here)[here<(-0.003)],names(here)[here>0.003])) grouping$group <- seq_along(grouping$category) plot_df <- merge(plot_df,grouping, by='category') # Now to make our aesthetic features which will be added with scale_*_manual() # Colors # Make a color ramp where the amount of 'grays' will determine the highlighted categories colfunc <- colorRampPalette(c("red",'gray','gray','gray','gray','gray','gray',"blue")) colorz <- colfunc(nrow(here)) names(colorz) <- names(sort(here)) # line types # just need to spread linetypes out enough so that the color and alpha can help distinguish as well # manual linetypez <- c(1,2,4,3,5,5,3,4,2,1) # or random # linetypez <- rep(1:6,times=ceiling(length(levels(plot_df$Topic))/6)) # linetypez <- linetypez[seq_along(levels(plot_df$Topic))] names(linetypez) <- names(sort(here)) # alpha # Changing alpha will help to make the important categories pop. # Create a gradient of alphas from 1 -> .2 -> so no trend lines are grayed out. nz <- length(here) # automatically # alphaz <- c((1*nz/2):(.2*nz/2)/nz*2,(.2*nz/2):(1*nz/2)/nz*2,ifelse(nz%%2==0,NULL,1)) # or manually alphaz <- c(1,.7,.4,.4,.4,.4,.4,.4,.4,1) names(alphaz) <- names(sort(here)) # You have to include color, linetype, and alpha in the mapping even if youre going to override it anyway. ggplot( data = plot_df, mapping = aes(x = year, y = prop_papers, color = category, group = group, linetype = category, alpha = category) ) + geom_line(size=1.5) + scale_color_manual(name='Data Device',values = colorz) + scale_linetype_manual(name='Data Device',values = linetypez) + scale_alpha_manual(name='Data Device',values = alphaz)+ theme_bw()+xlab("") + ylab("Proportion of articles in a year") + theme(axis.text.x = element_text(angle = 15, hjust = 1,size=16),axis.text.y = element_text(size=16), legend.position = "bottom", legend.justification = "right",legend.text=element_text(size=15), axis.title.y = element_text(margin = margin(r=10),size=17), axis.title.x = element_text(margin = margin(t=10)), legend.key.size = unit(2,"line"), legend.title=element_text(size=16)) # ggsave(paste0(path.plots,"taxonomy_ts_all.png"),height=10,width=12)