R语言原生数据结构总结

目录：向量、数组、矩阵、因子、列表、数据框

一、向量

生成方法

c()

c(1,7:9)
c(1:5, 10.5, "next")

## uses with a single argument to drop attributes
x <- 1:4
names(x) <- letters[1:4]#把字母表的前四个作为x的名字（利用names()函数）
x
c(x)          # has names
as.vector(x)  # no names
dim(x) <- c(2,2)
x
c(x)
as.vector(x)#转换成向量格式

seq()

> seq(0, 1, length.out = 11)
 [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0
> seq(stats::rnorm(20)) # effectively 'along'
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
> seq(1, 9, by = 2)     # matches 'end'
[1] 1 3 5 7 9
> seq(1, 9, by = pi)    # stays below 'end'
[1] 1.000000 4.141593 7.283185
> seq(1, 6, by = 3)
[1] 1 4
> seq(1.575, 5.125, by = 0.05)
 [1] 1.575 1.625 1.675 1.725 1.775 1.825 1.875 1.925 1.975 2.025
[11] 2.075 2.125 2.175 2.225 2.275 2.325 2.375 2.425 2.475 2.525
[21] 2.575 2.625 2.675 2.725 2.775 2.825 2.875 2.925 2.975 3.025
[31] 3.075 3.125 3.175 3.225 3.275 3.325 3.375 3.425 3.475 3.525
[41] 3.575 3.625 3.675 3.725 3.775 3.825 3.875 3.925 3.975 4.025
[51] 4.075 4.125 4.175 4.225 4.275 4.325 4.375 4.425 4.475 4.525
[61] 4.575 4.625 4.675 4.725 4.775 4.825 4.875 4.925 4.975 5.025
[71] 5.075 5.125
> seq(17) # same as 1:17, or even better seq_len(17)
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17

start:end

> 1:10 #等价于seq(1:10)
 [1]  1  2  3  4  5  6  7  8  9 10
> seq(1,10)
 [1]  1  2  3  4  5  6  7  8  9 10

二、数组

可调用接口

array(data = NA, dim = length(data), dimnames = NULL)#生成一个数组
as.array(x, ...)#强制类型转换为数组
is.array(x)#判断是否是数组类型
dim(x)#查看数组维度（注：dim是个通用函数）

生成方法

    array(1:3, c(2,4)) # recycle 1:3 "2 2/3 times"
    #     [,1] [,2] [,3] [,4]
    #[1,]    1    3    2    1
    #[2,]    2    1    3    2

array(c(1,4,5,6,7),dim=c(4,3,2))

注：可以看到，若指定的维度所需元素个数大于已有元素个数，则按列重复

三、矩阵

可调用接口

matrix(data = NA, nrow = 1, ncol = 1, byrow = FALSE,dimnames = NULL)
    
    as.matrix(x, ...)
    ## S3 method for class 'data.frame'
    as.matrix(x, rownames.force = NA, ...)
    
    is.matrix(x)

生成方法

1
2
3

mdat <- matrix(c(1,2,3, 11,12,13), nrow = 2, ncol = 3, byrow = TRUE,
                    dimnames = list(c("row1", "row2"),
                                    c("C.1", "C.2", "C.3")))

这里的dimnames是给矩阵的行列命名。

当然也可以事先不命名，等生成矩阵之后再命名：

> mdat <- matrix(c(1,2,3, 11,12,13), nrow = 2, ncol = 3, byrow = TRUE)
> mdat
     [,1] [,2] [,3]
[1,]    1    2    3
[2,]   11   12   13
> dimnames(mdat)=list(c('a','b'),c('r','g','p'))
> mdat
   r  g  p
a  1  2  3
b 11 12 13

那单独给列或者行命名呢？可以这样：

> mdat <- matrix(c(1,2,3, 11,12,13), nrow = 2, ncol = 3, byrow = TRUE)
> mdat
     [,1] [,2] [,3]
[1,]    1    2    3
[2,]   11   12   13
> colnames(mdat) <- c('a','b','c')
> mdat
      a  b  c
[1,]  1  2  3
[2,] 11 12 13
> rownames(mdat)=c('zz','hh')
> mdat
    a  b  c
zz  1  2  3
hh 11 12 13

四、因子

在R语言中，factor指的就是那些非数值型变量。

可调用接口

factor(x = character(), levels, labels = levels,
           exclude = NA, ordered = is.ordered(x), nmax = NA)
    
    ordered(x, ...)
    
    is.factor(x)
    is.ordered(x)
    
    as.factor(x)
    as.ordered(x)
    
    addNA(x, ifany = FALSE)

生成方法

> x=c(1:5)
> factor(x)
[1] 1 2 3 4 5
Levels: 1 2 3 4 5#

当然，在选取数据框中某一列非数值型变量时，返回的结果就是factor

> a=data.frame(name=c('Mike','Jane','King','Tom'),height=c(160,155,166,159),weight=c(65,48,55,53))
> a
  name height weight
1 Mike    160     65
2 Jane    155     48
3 King    166     55
4  Tom    159     53
> a$name
[1] Mike Jane King Tom 
Levels: Jane King Mike Tom
> a$height
[1] 160 155 166 159
> is.factor(a$name)
[1] TRUE

五、列表

列表是个大杂烩，啥都可以往里面装

最简单粗暴的方法，就是用list()函数去作用一下：

> s=1:5
> s
[1] 1 2 3 4 5
> is.list(s)
[1] FALSE
> lis=list(s)
> is.list(lis)
[1] TRUE

六、数据框

不多说，直接上栗子

> L3 <- LETTERS[1:3]#选定'A','B','C'
> fac <- sample(L3, 10, replace = TRUE)#从选定的'A','B','C'中随机取样，取10次
> d <- data.frame(x = 1, y = 1:10, fac = fac)
> d
   x  y fac
1  1  1   A
2  1  2   B
3  1  3   A
4  1  4   A
5  1  5   A
6  1  6   C
7  1  7   C
8  1  8   C
9  1  9   B
10 1 10   B

上面的栗子指定了列名字，若不指定，则默认输出列名，如下：

> data.frame(1, 1:10, 11:20)
   X1 X1.10 X11.20
1   1     1     11
2   1     2     12
3   1     3     13
4   1     4     14
5   1     5     15
6   1     6     16
7   1     7     17
8   1     8     18
9   1     9     19
10  1    10     20
> data.frame(2, 1:10, 11:20)
   X2 X1.10 X11.20
1   2     1     11
2   2     2     12
3   2     3     13
4   2     4     14
5   2     5     15
6   2     6     16
7   2     7     17
8   2     8     18
9   2     9     19
10  2    10     20

选取某一列的方法：df['colname']或者df$colname

选取多个列的方法：

> d
   x  y fac
1  1  1   A
2  1  2   B
3  1  3   A
4  1  4   A
5  1  5   A
6  1  6   C
7  1  7   C
8  1  8   C
9  1  9   B
10 1 10   B
> d[,1:2]
   x  y
1  1  1
2  1  2
3  1  3
4  1  4
5  1  5
6  1  6
7  1  7
8  1  8
9  1  9
10 1 10

若要添加一列，直接将向量/数组赋值即可，继续上面的栗子：

> d['newcol']=array(31:40)
> d
   x  y fac newcol
1  1  1   A     31
2  1  2   B     32
3  1  3   A     33
4  1  4   A     34
5  1  5   A     35
6  1  6   C     36
7  1  7   C     37
8  1  8   C     38
9  1  9   B     39
10 1 10   B     40

还有合并两个数据框可以用cbind（注意两个数据框的第一个维度（行）必须一样）：

> d1=data.frame(f1=seq(61:70),f2=c(101:110))
> d1
   f1  f2
1   1 101
2   2 102
3   3 103
4   4 104
5   5 105
6   6 106
7   7 107
8   8 108
9   9 109
10 10 110
> cbind(d,d1)
   x  y fac newcol f1  f2
1  1  1   A     31  1 101
2  1  2   B     32  2 102
3  1  3   A     33  3 103
4  1  4   A     34  4 104
5  1  5   A     35  5 105
6  1  6   C     36  6 106
7  1  7   C     37  7 107
8  1  8   C     38  8 108
9  1  9   B     39  9 109
10 1 10   B     40 10 110