run ?files
file.create(..., showWarnings = TRUE)
file.exists(...)
file.remove(...)
file.rename(from, to)
file.append(file1, file2)
file.copy(from, to, overwrite = recursive, recursive = FALSE,
copy.mode = TRUE, copy.date = FALSE)
file.symlink(from, to)
file.link(from, to)
...
The function names are pretty self explanatory but read the details because the behavior may vary depending on OS and unwanted operations on files can have terrible consequences…
Look at the “See Also” section… there is much more available!
# get or set the path to working directory
getwd()
setwd()
# Construct the path to files from components in a platform-independent way.
file.path("R", letters[1:3], letters[5:7])
## [1] "R/a/e" "R/b/f" "R/c/g"
# Produce a character vector of the names of files or directories in the named directory
(myFiles <- list.files(path = getwd(), full.names = TRUE, pattern = "\\.html"))
## [1] "/media/cunnac/DONNEES/CUNNAC/Lab-Related/Communications/Teaching/R_trainning_module/slides/RmdFilesWithoutAnswersForHandouts/R_trainning_slides_11__R-OS_20150908_handout.html"
list.dirs(path = getwd())
## [1] "/media/cunnac/DONNEES/CUNNAC/Lab-Related/Communications/Teaching/R_trainning_module/slides/RmdFilesWithoutAnswersForHandouts"
(myDir <- file.path(getwd(), "deleteMeAfterUse"))
## [1] "/media/cunnac/DONNEES/CUNNAC/Lab-Related/Communications/Teaching/R_trainning_module/slides/RmdFilesWithoutAnswersForHandouts/deleteMeAfterUse"
if( dir.exists(myDir ) || dir.create(path = myDir)) unlink(myDir, recursive = TRUE)
# unlink() can delete non-empty directories !!!
# These functions extract parts (prefixpath and file name) of a full file path:
identical(myFiles, file.path(dirname(myFiles), basename(myFiles)))
## [1] TRUE
tempfile() # the full path to a temporary file
## [1] "/tmp/RtmpRPOwjo/file1cbb66fcc98e"
tempfile(tmpdir = getwd(), fileext = ".IRD")
## [1] "/media/cunnac/DONNEES/CUNNAC/Lab-Related/Communications/Teaching/R_trainning_module/slides/RmdFilesWithoutAnswersForHandouts/file1cbb6e52d0e.IRD"
basename(tempfile(pattern = "IntermediateFile", tmpdir = getwd(), fileext = ".001"))
## [1] "IntermediateFile1cbb46c4a91d.001"
It is good programming practice to use on.exit()
when dealing with temporary files to cleanup whatever happens otherwise you may possibly end up with hundreds of them in your wd during testing.
When the day is off or you have to do something else, you can save all or a few objects in your session to resart in (mostly) the same conditions as before you closed R.
To save individual objects in your workspace to disk:
save(v, file = "myVobject.Rdata")
# OR
save(list = sample(ls(), size = 2), file = "myVobject.Rdata")
To save your entire workspace to disk:
save.image(file = "myWorkspace_20150908.Rdata")
Try to run these, close everything, restart and reload the corresponding objects with:
load(file = "myVobject.Rdata", verbose = TRUE)
load(file = "myWorkspace_20150908.Rdata", verbose = TRUE)
load()
will not re-attach the packages you used…
readLines() # dump lines of the file in a vector. No processing/parsing
scan() # as a low level way to parse content
# as a way to input data interactively
# Run this and type several words or groups of words
# Press Enter twice to quit
mykeyboardInput <- scan(what = "")
mykeyboardInput
The workhorse function is:
read.table(file, header = FALSE, sep = "", quote = "\"'",
dec = ".", numerals = c("allow.loss", "warn.loss", "no.loss"),
row.names, col.names, as.is = !stringsAsFactors,
na.strings = "NA", colClasses = NA, nrows = -1,
skip = 0, check.names = TRUE, fill = !blank.lines.skip,
strip.white = FALSE, blank.lines.skip = TRUE,
comment.char = "#",
allowEscapes = FALSE, flush = FALSE,
stringsAsFactors = default.stringsAsFactors(),
fileEncoding = "", encoding = "unknown", text, skipNul = FALSE)
Try to copy the “df_something.txt” files from the trainning material to your working directory (or change it to where the files are).
With Rstudio open these files and look at their content. What is different among them?
Now lets try to import the data from “df_tab.txt” with read.table()
str(read.table(file = "df_tab.txt", sep = ","))
# Weird, right
str(read.table(file = "df_tab.txt", header = FALSE , sep = "\t", stringsAsFactors = TRUE))
# Slightly better but where are the colnames, why are they all factors?
str(read.table(file = "df_tab.txt", header = TRUE , sep = "\t", stringsAsFactors = TRUE))
# Is that what we want ?
str(read.table(file = "df_tab.txt", header = TRUE , sep = "\t", stringsAsFactors = FALSE))
# The text column is no longer a factor.
str(read.table(file = "df_tab.txt", header = TRUE , sep = "\t", stringsAsFactors = TRUE, dec = ","))
# What happens now?
read.table()
has convenience functions to rapidly type code for importing specificaly formated tables:
Function | sep | dec |
---|---|---|
read.csv | , | . |
read.csv2 | ; | , |
read.delim | \t | . |
read.delim2 | \t | , |
Now try to import the content of the “df_something.txt” files in your session using the proper convenience function of read.table()
(read.csv
, read.delim
, etc…).
Try to make sure they are correctly imported using str()
or looking in the Environment panel of Rstudio.
df_coma <- read.csv("df_coma.txt")
df_semiColonDecComa <- read.csv2("df_semiColon.txt")
df_semiColonDecDot <- read.csv2("df_semiColon.txt", dec = ".")
# Do you see the effect on the mode of the nature of the resulting columns ?
df_tab1 <- read.delim("df_tab.txt")
df_tab2 <- read.delim2("df_tab.txt")
# Do you see the effect on the mode of the nature of the resulting columns ?
As a side note, it is pretty easy to import data directly from the web:
irisFromWeb <- read.csv(file = "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/iris.csv")
str(irisFromWeb)
## 'data.frame': 150 obs. of 6 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
Before you get into complicated analysis, it is recommended to check upfront that what you have is what you want with str()
or summary()
or more specific tests that you will design:
writeLines()
cat()
The workhorse function is:
write.table(x, file = "", append = FALSE, quote = TRUE, sep = " ",
eol = "\n", na = "NA", dec = ".", row.names = TRUE,
col.names = TRUE, qmethod = c("escape", "double"),
fileEncoding = "")
Convenience funcions:
write.csv()
write.csv2()
Save iris
to a text file with the funciton of your choice and read the corresponding file to R with the appropriate function.
filePath <- "./myIris.csv"
write.csv(iris, file = filePath, row.names = FALSE)
irisFromLocalF <- read.csv(filePath)
file.remove(filePath)
## [1] TRUE
str(irisFromLocalF)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
gdata::read.xls()
utils::read.fwf()
gdata::write.fwf()
For even more exotic input/output format see the foreign package.
Do not re-invent the wheel, specialized packages have dedicated function to handle specific format such as biological sequences:
ape::read.dna()
Biostrings::readDNAStringSet()
There is an extensive infrastructure to interact with relational data bases (DBI, RODBC, dplyr, … packages). Beyond the scope of this training.
Exemple from one of my scripts:
#####################################
out_name <- "pacbio_454_corrected"
querySeqFile <- "/mnt/DONNEES/CUNNAC/Lab-Related/Exp_Projects/BacterialGenomics/BAI3_Cosmids/BAI3_Cosmids/talC.fas"
blastOutputFile <- "TALEsInSelfCorrectedMAI1PacBioBlast.txt"
com <- paste("blastn -db", out_name,
"-num_threads 1 -max_target_seqs 500 -perc_identity 80 -evalue 0.001 -query",
querySeqFile,
"-out", blastOutputFile,
"-outfmt \"6 qseqid qlen sseqid bitscore evalue pident length mismatch gapopen qstart qend sstart send\"",
sep = " ")
message(com) # or could use cat()
system(com, intern = TRUE)
#####################################
Create a new R script with the code below, call it “MonScript.R”:
2+2
## [1] 4
jpeg("mon_graphique.jpg")
plot(rnorm(10))
dev.off()
## png
## 2
3+3
## [1] 6
Run on the DOS terminal:
Something like below but with the proper path to your file
R CMD BATCH D:\Formation\R\script.R
What happens?