1 R Basics

1.1 Generate Random Sample

set random seed to get same result for repeating

set.seed(42)

a random vector size of 20, follows to Normal Distribution

rnorm(20)

 [1]  1.37095845 -0.56469817  0.36312841  0.63286260  0.40426832 -0.10612452
 [7]  1.51152200 -0.09465904  2.01842371 -0.06271410  1.30486965  2.28664539
[13] -1.38886070 -0.27878877 -0.13332134  0.63595040 -0.28425292 -2.65645542
[19] -2.44046693  1.32011335

rnorm(20, mean = 10, sd = 2)

 [1]  9.386723  6.437383  9.656165 12.429349 13.790387  9.139062  9.485461
 [8]  6.473674 10.920195  8.720010 10.910900 11.409675 12.070207  8.782147
[15] 11.009910  6.565983  8.431082  8.298185  5.171585 10.072245

a random vector size of 10, follows to Uniform Distribution

runif(20)

 [1] 0.5816040025 0.1579052082 0.3590283059 0.6456318784 0.7758233626
 [6] 0.5636468416 0.2337033986 0.0899805163 0.0856120649 0.3052183695
[11] 0.6674265147 0.0002388966 0.2085699569 0.9330341273 0.9256447486
[16] 0.7340943010 0.3330719834 0.5150633298 0.7439746463 0.6191592400

runif(20, min = 0, max = 1)

 [1] 0.626245345 0.217157698 0.216567311 0.388945029 0.942455692 0.962608014
 [7] 0.739855279 0.733245906 0.535761290 0.002272966 0.608937453 0.836801559
[13] 0.751522563 0.452731573 0.535789994 0.537376695 0.001380844 0.355665954
[19] 0.612133090 0.828942131

runif(20, 0, 1)

 [1] 0.3567220 0.4106351 0.5734759 0.5896783 0.7196573 0.3949730 0.9192039
 [8] 0.9625703 0.2335235 0.7244976 0.9036345 0.6034741 0.6315073 0.9373858
[15] 0.8504828 0.5798209 0.8214039 0.1137186 0.7645078 0.6236135

prod(40:36)

[1] 78960960

prod(5:1)/prod(40:36)

[1] 1.519738e-06

1/choose(40,5)

[1] 1.519738e-06

pvec <- seq(0,1,0.05)
pvec

 [1] 0.00 0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40 0.45 0.50 0.55 0.60 0.65 0.70
[16] 0.75 0.80 0.85 0.90 0.95 1.00

x <- rnorm(60000)
quantile(x, pvec)

          0%           5%          10%          15%          20%          25% 
-4.043276349 -1.656275635 -1.286941232 -1.045470141 -0.852443729 -0.684532015 
         30%          35%          40%          45%          50%          55% 
-0.529470949 -0.387580521 -0.257069669 -0.124515346  0.002733696  0.126948433 
         60%          65%          70%          75%          80%          85% 
 0.251286891  0.381425011  0.524315026  0.675289348  0.843406262  1.033822369 
         90%          95%         100% 
 1.282235340  1.649455790  4.328091274

quantile(x)

          0%          25%          50%          75%         100% 
-4.043276349 -0.684532015  0.002733696  0.675289348  4.328091274

x.1 <- rnorm(20)
x.2 <- rnorm(20) + runif(20)
x.3 <- x.1 * 20 + x.2 * 4 - rnorm(20)

df <- data.frame(x1 = x.1, x2 = x.2, x3 = x.3)

1.2 Matrix

A <- cor(df)
A

          x1        x2        x3
x1 1.0000000 0.1598481 0.9841209
x2 0.1598481 1.0000000 0.3275044
x3 0.9841209 0.3275044 1.0000000

svd(A)

$d
[1] 2.0934081396 0.9056768657 0.0009149947

$u
           [,1]       [,2]       [,3]
[1,] -0.6617282  0.3025436 -0.6859906
[2,] -0.3022505 -0.9449705 -0.1252015
[3,] -0.6861198  0.1244917  0.7167576

$v
           [,1]       [,2]       [,3]
[1,] -0.6617282  0.3025436 -0.6859906
[2,] -0.3022505 -0.9449705 -0.1252015
[3,] -0.6861198  0.1244917  0.7167576

iA <- solve(A)

cA <- chol(A)

t(cA)  %*% cA

          x1        x2        x3
x1 1.0000000 0.1598481 0.9841209
x2 0.1598481 1.0000000 0.3275044
x3 0.9841209 0.3275044 1.0000000

solve(cA) %*% t(solve(cA))

           x1        x2         x3
x1  514.61173  93.64606 -537.10967
x2   93.64606  18.16131  -98.10696
x3 -537.10967 -98.10696  561.71133

solve(A)

           x1        x2         x3
x1  514.61173  93.64606 -537.10967
x2   93.64606  18.16131  -98.10696
x3 -537.10967 -98.10696  561.71133

1.3 string

astr <- as.character(12345)
bstr <- "NULL"
cstr <- "NOT LOWER, not upper"
c(astr, bstr, cstr)

[1] "12345"                "NULL"                 "NOT LOWER, not upper"

1.3.1 length of a string

nchar(astr)

[1] 5

c(toupper(cstr), tolower(cstr))

[1] "NOT LOWER, NOT UPPER" "not lower, not upper"

1.3.2 substring

both substr and substring can extract substring, and substring defined with default value for the last = 1000000L. Also, substring can exact multiple substrings while substr can’t.

substr("HELLO", 2,3)

[1] "EL"

substr("HELLO", c(1,2,3),c(3,4,5))

[1] "HEL"

substring("HELLO", c(1,2,3),c(3,4,5))

[1] "HEL" "ELL" "LLO"

a = "12345678"
substr(a,2,2)

[1] "2"

substr(a,2,4)

[1] "234"

# substr(a,2)
# Error in substr(a, 2) : argument "stop" is missing, with no default

substring(a,2)

[1] "2345678"

substr(a,3,4) <- "Toyota"
a

[1] "12To5678"

substring(a,3) <- "Toyata"
a

[1] "12Toyata"

1.3.3 combine strings

a = "12345"

b = "6789"

paste(a,b)

[1] "12345 6789"

paste(a,b,sep="")

[1] "123456789"

paste(a,b,sep="-")

[1] "12345-6789"

paste(a,b,sep = "toyota")

[1] "12345toyota6789"

a <- c('Taipei', "Tokyo", 'Singapore', 'Hong Kong', "Johor Bahru", "Kuala Lumpur")
paste(a)

[1] "Taipei"       "Tokyo"        "Singapore"    "Hong Kong"    "Johor Bahru" 
[6] "Kuala Lumpur"

paste(a, "?", sep = "")

[1] "Taipei?"       "Tokyo?"        "Singapore?"    "Hong Kong?"   
[5] "Johor Bahru?"  "Kuala Lumpur?"

paste(a, "is where", sep = " ")

[1] "Taipei is where"       "Tokyo is where"        "Singapore is where"   
[4] "Hong Kong is where"    "Johor Bahru is where"  "Kuala Lumpur is where"

paste(a, "is where", sep = " ", collapse = ", and ")

[1] "Taipei is where, and Tokyo is where, and Singapore is where, and Hong Kong is where, and Johor Bahru is where, and Kuala Lumpur is where"

paste(c("a","b"), c("1","2","3"))

[1] "a 1" "b 2" "a 3"

paste(c("a","b","c","d"), c("1","2","3"))

[1] "a 1" "b 2" "c 3" "d 1"

1.3.4 split strings

strsplit(",,a,bb,", split = ",")

[[1]]
[1] ""   ""   "a"  "bb"

strsplit(",,a,,", split = ",")

[[1]]
[1] ""  ""  "a" ""

strsplit(",,a,", split = ",")

[[1]]
[1] ""  ""  "a"

1.3.5 replace

replace by vector

s <- "ccccaaaassssaaa"
chartr("s", "A", s)

[1] "ccccaaaaAAAAaaa"

chartr("sc", "AI", s)

[1] "IIIIaaaaAAAAaaa"

sub("a", "b", s)

[1] "ccccbaaassssaaa"

gsub("a", "b", s)

[1] "ccccbbbbssssbbb"

Note that chartr is vector-behavior function, which changes char-by-char; as gsub replaces the whole target substring globally, sub only changes the first occurance. ### Regular Expression try: type ?grep in your console. Then see the help page “Pattern Matching and Replacement”

# grep()

1.4 todo

c("Hello")

[1] "Hello"

c("Hello")

[1] "Hello"

c("Hello")

[1] "Hello"

c("Hello")

[1] "Hello"

c("Hello")

[1] "Hello"

c("Hello")

[1] "Hello"

c("Hello")

[1] "Hello"

c("Hello")

[1] "Hello"

c("Hello")

[1] "Hello"

c("Hello")

[1] "Hello"

c("Hello")

[1] "Hello"

c("Hello")

[1] "Hello"

c("Hello")

[1] "Hello"

c("Hello")

[1] "Hello"

c("Hello")

[1] "Hello"

1.5 tricks

1.5.1 deal with potential exception

pass a string of the variable name to check if it’s defined, return NULL if not found.

x0 <- "demo"
get0("x0")

[1] "demo"

get0("xa_nondefined")

NULL

1.5.2 RMarkdown features

When you write {r} after three ` signs you can give some features:

You can leave {r} by default and get very annoying outputs

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(tidyverse)

Warning: package 'ggplot2' was built under R version 4.5.2

Warning: package 'readr' was built under R version 4.5.2

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.1     ✔ readr     2.1.6
✔ ggplot2   4.0.1     ✔ stringr   1.5.2
✔ lubridate 1.9.4     ✔ tibble    3.3.0
✔ purrr     1.1.0     ✔ tidyr     1.3.1

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

you can write {r, warning=FALSE, message=FALSE} means no suppresses warnings and no suppresses messages.

library(dplyr)
library(tidyverse)

echo = FALSE means to hide the code trunk

include=FALSE means to hide the code trunk and the result

eval = FALSE show but no executing

results = 'hide' will hide the code

error = TRUE continue to knit even there’s error

1.5.3 package management

install if not exists then load the package.

if (!requireNamespace("tidyverse")) install.packages('tidyverse')
library(tidyverse)

This can also be made with package pacman.

library(pacman)
pacman::p_load(mblm)

c("Hello")

[1] "Hello"

1.5.4 generate PDF from rmarkdown

tinytex::install_tinytex()

RStudio
Quarto

quarto render task.Rmd --to pdf

(Also can use VSCode)