## ---- echo = 4:5, message = FALSE, warning = FALSE------------------------- # This installs packages from BioConductor # source("https://bioconductor.org/biocLite.R") # biocLite("powerTCR") library(powerTCR) data("repertoires") ## -------------------------------------------------------------------------- str(repertoires) ## ---- warning = FALSE, cache=TRUE------------------------------------------ # This will loop through our list of sample repertoires, # and store a fit in each fits <- list() for(i in seq_along(repertoires)){ # Choose a sequence of possible u for your model fit # Ideally, you want to search a lot of thresholds, but for quick # computation, we are only going to test 4 thresholds <- unique(round(quantile(repertoires[[i]], c(.75,.8,.85,.9)))) fits[[i]] <- fdiscgammagpd(repertoires[[i]], useq = thresholds, shift = min(repertoires[[i]])) } names(fits) <- names(repertoires) ## -------------------------------------------------------------------------- # You could also look at the first sample by typing fits[[1]] fits$samp1 ## -------------------------------------------------------------------------- # Grab mles of fits: get_mle(fits) # Grab negative log likelihoods of fits get_nllh(fits) ## ---- cache=T-------------------------------------------------------------- desponds_fits <- list() for(i in seq_along(repertoires)){ desponds_fits[[i]] <- fdesponds(repertoires[[i]]) } names(desponds_fits) <- names(repertoires) desponds_fits$samp1 ## -------------------------------------------------------------------------- # The number of clones in each sample n1 <- length(repertoires[[1]]) n2 <- length(repertoires[[2]]) # Grids of quantiles to check # (you want the same number of points as were observed in the sample) q1 <- seq(n1/(n1+1), 1/(n1+1), length.out = n1) q2 <- seq(n2/(n2+1), 1/(n2+1), length.out = n2) # Compute the value of fitted distributions at grid of quantiles theor1 <- qdiscgammagpd(q1, fits[[1]]) theor2 <- qdiscgammagpd(q2, fits[[2]]) ## ---- fig.wide=TRUE, echo=2:7---------------------------------------------- par(mfrow = c(1,2)) plot(log(repertoires[[1]]), log(seq_len(n1)), pch = 16, cex = 2, xlab = "log clone size", ylab = "log rank", main = "samp1") points(log(theor1), log(seq_len(n1)), pch = 'x', col = "darkcyan") plot(log(repertoires[[2]]), log(seq_len(n2)), pch = 16, cex = 2, xlab = "log clone size", ylab = "log rank", main = "samp2") points(log(theor2), log(seq_len(n2)), pch = 'x', col = "chocolate") ## -------------------------------------------------------------------------- # Simulate 3 sampled repertoires set.seed(123) s1 <- rdiscgammagpd(1000, shape = 3, rate = .15, u = 25, sigma = 15, xi = .5, shift = 1) s2 <- rdiscgammagpd(1000, shape = 3.1, rate = .14, u = 26, sigma = 15, xi = .6, shift = 1) s3 <- rdiscgammagpd(1000, shape = 10, rate = .3, u = 45, sigma = 20, xi = .7, shift = 1) ## -------------------------------------------------------------------------- bad <- rdiscgammagpd(1000, shape = 1, rate = 2, u = 25, sigma = 10, xi = .5, shift = 1, phi = .2) plot(log(sort(bad, decreasing = TRUE)), log(seq_len(1000)), pch = 16, xlab = "log clone size", ylab = "log rank", main = "bad simulation") ## ---- cache=TRUE----------------------------------------------------------- # Fit model to the data at the true thresholds sim_fits <- list("fit1" = fdiscgammagpd(s1, useq = 25), "fit2" = fdiscgammagpd(s2, useq = 26), "fit3" = fdiscgammagpd(s3, useq = 45)) # Compute the pairwise JS distance between 3 fitted models distances <- matrix(rep(0, length(sim_fits)^2), nrow = length(sim_fits)) colnames(distances) <- rownames(distances) <- c("s1","s2","s3") grid <- min(c(s1,s2,s3)):10000 for(i in seq_len((length(sim_fits)-1))){ for(j in (i+1):length(sim_fits)){ distances[i,j] <- JS_dist(sim_fits[[i]], sim_fits[[j]], grid, modelType = "Spliced") } } # Distances are symmetric distances <- distances + t(distances) ## -------------------------------------------------------------------------- distances ## ---- fig.small=TRUE------------------------------------------------------- clusterPlot(distances, method = "ward.D")