作为一个开源软件,R的一个很是大的利益就是我们可以随意查察所有算法的源代码,在对这些源代码举办阐明的进程中不只可以加深对算法的认识,并且可以大步提高对R语言的把握水平。所以接下来我重点写点关于各统计要领的R语言源代码的表明。本日先对如何查察源代码做点先容。

最直接的要领虽然是直接键入函数,大部门函数源代码就可以直接显现出来

> fivenum
function (x, na.rm = TRUE) 
{
    xna <- is.na(x)
    if (na.rm) 
        x <- x[!xna]
    else if (any(xna)) 
        return(rep.int(NA, 5))
    x <- sort(x)
    n <- length(x)
    if (n == 0) 
        rep.int(NA, 5)
    else {
        n4 <- floor((n + 3)/2)/2
        d <- c(1, n4, (n + 1)/2, n + 1 - n4, n)
        0.5 * (x[floor(d)] + x[ceiling(d)])
    }
}
<bytecode: 0x01f32270>
<environment: namespace:stats>

尚有些函数直接键入出不来历代码,主要是因为R是面向工具设计的措施语言,差异的工具,计较方法也差异,所以要通过methods()来进一步界说详细的查察工具
> mean
function (x, ...) 
UseMethod("mean")
<bytecode: 0x019a54e4>
<environment: namespace:base>
不可,用methods查察

> methods(mean)
[1] mean.data.frame mean.Date       mean.default    mean.difftime  
[5] mean.POSIXct    mean.POSIXlt  

任意选择一个举办查察
> mean.data.frame
function (x, ...) 
{
    msg <- "mean(<data.frame>) is deprecated.\n Use colMeans() or sapply(*, mean) instead."
    warning(paste(msg, collapse = ""), call. = FALSE, domain = NA)
    sapply(X = x, FUN = mean, ...)
}
<bytecode: 0x0260c92c>
<environment: namespace:base>

查察包里的函数进程一样,譬喻查察神经网络的源代码进程如下

> library(nnet)
> nnet
function (x, ...) 
UseMethod("nnet")
<bytecode: 0x0180d6f4>
<environment: namespace:nnet>
> methods(nnet)
[1] nnet.default nnet.formula
> nnet.default
function (x, y, weights, size, Wts, mask = rep(TRUE, length(wts)), 
    linout = FALSE, entropy = FALSE, softmax = FALSE, censored = FALSE, 
    skip = FALSE, rang = 0.7, decay = 0, maxit = 100, Hess = FALSE, 
    trace = TRUE, MaxNWts = 1000, abstol = 1e-04, reltol = 1e-08, 
    ...) 
{
    net <- NULL
    x <- as.matrix(x)
    y <- as.matrix(y)
    if (any(is.na(x))) 
        stop("missing values in 'x'")
    if (any(is.na(y))) 
        stop("missing values in 'y'")
    if (dim(x)[1L] != dim(y)[1L]) 
        stop("nrows of 'x' and 'y' must match")
    if (linout && entropy) 
        stop("entropy fit only for logistic units")
    if (softmax) {
        linout <- TRUE
        entropy <- FALSE
    }
    if (censored) {
        linout <- TRUE
        entropy <- FALSE
        softmax <- TRUE
    }
    net$n <- c(dim(x)[2L], size, dim(y)[2L])
    net$nunits <- as.integer(1L + sum(net$n))
    net$nconn <- rep(0, net$nunits + 1L)
    net$conn <- numeric(0L)
    net <- norm.net(net)
    if (skip) 
        net <- add.net(net, seq(1L, net$n[1L]), seq(1L + net$n[1L] + 
            net$n[2L], net$nunits - 1L))
    if ((nwts <- length(net$conn)) == 0) 
        stop("no weights to fit")
    if (nwts > MaxNWts) 
        stop(gettextf("too many (%d) weights", nwts), domain = NA)
    nsunits <- net$nunits
    if (linout) 
        nsunits <- net$nunits - net$n[3L]
    net$nsunits <- nsunits
    net$decay <- decay
    net$entropy <- entropy
    if (softmax && NCOL(y) < 2L) 
        stop("'softmax = TRUE' requires at least two response categories")
    net$softmax <- softmax
    net$censored <- censored
    if (missing(Wts)) 
        if (rang > 0) 
            wts <- runif(nwts, -rang, rang)
        else wts <- rep(0, nwts)
    else wts <- Wts
    if (length(wts) != nwts) 
        stop("weights vector of incorrect length")
    if (length(mask) != length(wts)) 
        stop("incorrect length of 'mask'")
    if (trace) {
        cat("# weights: ", length(wts))
        nw <- sum(mask != 0)
        if (nw < length(wts)) 
            cat(" (", nw, " variable)\n", sep = "")
        else cat("\n")
        flush.console()
    }
    if (length(decay) == 1L) 
        decay <- rep(decay, length(wts))
    .C(VR_set_net, as.integer(net$n), as.integer(net$nconn), 
        as.integer(net$conn), as.double(decay), as.integer(nsunits), 
        as.integer(entropy), as.integer(softmax), as.integer(censored))
    ntr <- dim(x)[1L]
    nout <- dim(y)[2L]
    if (missing(weights)) 
        weights <- rep(1, ntr)
    if (length(weights) != ntr || any(weights < 0)) 
        stop("invalid weights vector")
    Z <- as.double(cbind(x, y))
    storage.mode(weights) <- "double"
    tmp <- .C(VR_dovm, as.integer(ntr), Z, weights, as.integer(length(wts)), 
        wts = as.double(wts), val = double(1), as.integer(maxit), 
        as.logical(trace), as.integer(mask), as.double(abstol), 
        as.double(reltol), ifail = integer(1L))
    net$value <- tmp$val
    net$wts <- tmp$wts
    net$convergence <- tmp$ifail
    tmp <- matrix(.C(VR_nntest, as.integer(ntr), Z, tclass = double(ntr * 
        nout), as.double(net$wts))$tclass, ntr, nout)
    dimnames(tmp) <- list(rownames(x), colnames(y))
    net$fitted.values <- tmp
    tmp <- y - tmp
    dimnames(tmp) <- list(rownames(x), colnames(y))
    net$residuals <- tmp
    .C(VR_unset_net)
    if (entropy) 
        net$lev <- c("0", "1")
    if (softmax) 
        net$lev <- colnames(y)
    net$call <- match.call()
    if (Hess) 
        net$Hessian <- nnetHess(net, x, y, weights)
    class(net) <- "nnet"
    net
}
<bytecode: 0x01e37238>
<environment: namespace:nnet>

其他教程

2017-12-04


作为一个开源软件,R的一个很是大的利益就是我们可以随意查察所有算法的源代码,在对这些源代码举办阐明的进程中不只可以加深对算法的认识,并且可以大步提高对R语言的把握水平