# Introduction

1. Correct before fast
2. Correct after fast  -- reproducible script; test cases
3. Kernigan -- Everyone knows that debugging is twice as hard as
   writing a program in the first place. So if you are as clever as
   you can be when you write it, how will you ever debug it?

# Profiling

Toy functions

    slow <- function(n=3) {
        for (i in seq_len(n * 1000000)) {}
        TRUE
    }
    fast <- fast1 <- fast2 <- function(n=1) {
        for (i in seq_len(n * 1000000)) {}
        TRUE
    }

Simple example -- self and total time

    f1 <- function() {
        fast1()
        slow()
        fast2()
    }
    
    Rprof(); f1(); Rprof(NULL); summaryRprof()

Re-use -- times are cummulative

    f2 <- function() {
        fast1(); fast1(); fast1()
        slow()
        fast1(); fast1(); fast1()
    }
    
    Rprof(); f2(); Rprof(NULL); summaryRprof()

What to do?

    f3 <- function() {
        ## heavy calculation ahead, so see if necessary
        if (!slow())  # check, usually fails
            return()
        if (fast())   # check, usually succeeds
            return()
        slow(); slow(); slow();
    }
        
    Rprof(); f3(); Rprof(NULL); summaryRprof()

Solution?

Line profiling -- not very mature

    source("line-profiling.R", keep.source=TRUE)
    Rprof(line.profiling=TRUE); f4(); Rprof(NULL)
    summaryRprof(lines="show")

# Memory

Compile with --enable-memory-profiling

COW (copy-on-write)

    x <- 1:10
    tracemem(x)
    y <- x
    x[1] <- 3L                          # copy-on-write

Function calls

    g <- function(v) sqrt(v[1:5])
    tracemem(x <- 1:10)
    g(x)                                # no write
    h <- function(v) { v[1] <- 2L; v}
    tracemem(x <- 1:10)
    h(x)                                # copy-on-write

'NAMED' status -- sometimes safe to write without copy

    x <- 1:10
    tracemem(x)
    x[1] <- 3L                          # no copy!

    x <- 1:10
    .Internal(inspect(x))               # marked safe not to copy
    x[1] <- 3L
    .Internal(inspect(x))

    y <- x
    .Internal(inspect(x))               # same point in memory
    .Internal(inspect(y))               # marked for copy-on-write
    x[1] <- 2L
    .Internal(inspect(x))               # new memory, not copy-on-write
    .Internal(inspect(y))               # old memory, copy-on-write

# Common pitfalls

e.g., http://bioconductor.org/help/course-materials/2013/CSAMA2013/thursday/morning/morgan-big-data.pdf

Efficient code
1. Vectorize
2. Pre-allocate and fill
3. Exploit existing software
4. Appropriate algorithms
5. Avoid expensive conveniences

Big data resource management
1. Restriction
2. Sampling
3. Iteration
4. Parallelization