Detect charset encoding of the character or raw vector.

ced_enc_detect(x, enc_hint = NULL, lang_hint = NULL)

Arguments

x

Raw or character vector.

enc_hint

Character vector with encoding hint.

lang_hint

Character vector with langauge code hint.

Value

Character vector with suggested encodings.

Examples

# detect character vector with ASCII strings ascii <- "I can eat glass and it doesn't hurt me." ced_enc_detect(ascii)
#> [1] "US-ASCII"
ced_enc_detect(charToRaw(ascii))
#> [1] "US-ASCII"
# detect character vector with UTF-8 strings utf8 <- "\u4e0b\u5348\u597d" print(utf8)
#> [1] "下午好"
ced_enc_detect(utf8)
#> [1] "UTF-8"
ced_enc_detect(charToRaw(utf8))
#> [1] "UTF-8"
# path to examples ex_path <- system.file("test.txt", package = "ced") ex_txt <- read.dcf(ex_path, all = TRUE) # russian text print(ex_txt[["France"]])
#> NULL
ced_enc_detect(ex_txt[["Russian"]])
#> [1] "UTF-8"
ced_enc_detect(iconv(ex_txt[["Russian"]], "utf8", "ibm866"))
#> [1] "IBM866"
ced_enc_detect(iconv(ex_txt[["Russian"]], "utf8", "windows-1251"))
#> [1] "windows-1251"
ced_enc_detect(iconv(ex_txt[["Russian"]], "utf8", "koi8-r"))
#> [1] "KOI8-R"
# chinese text print(ex_txt[["Chinese"]])
#> [1] "我能吞下玻璃而不伤身体。"
ced_enc_detect(ex_txt[["Chinese"]])
#> [1] "UTF-8"
ced_enc_detect(iconv(ex_txt[["Chinese"]], "utf8", "gb18030"))
#> [1] "GB2312"
# korean text print(ex_txt[["Korean"]])
#> [1] "나는 유리를 먹을 수 있어요. 그래도 아프지 않아요"
ced_enc_detect(ex_txt[["Korean"]])
#> [1] "UTF-8"
ced_enc_detect(iconv(ex_txt[["Korean"]], "utf8", "uhc"))
#> [1] "EUC-KR"
ced_enc_detect(iconv(ex_txt[["Korean"]], "utf8", "iso-2022-kr"))
#> [1] "ISO-2022-KR"
# japanese text print(ex_txt[["Japanese"]])
#> [1] "私はガラスを食べられます。それは私を傷つけません。"
ced_enc_detect(ex_txt[["Japanese"]])
#> [1] "UTF-8"
ced_enc_detect(iconv(ex_txt[["Japanese"]], "utf8", "shift_jis"))
#> [1] "Shift_JIS"
ced_enc_detect(iconv(ex_txt[["Japanese"]], "utf8", "iso-2022-jp"))
#> [1] "ISO-2022-JP"
# \donttest{ # detect encoding of the web pages content if (require("curl")) { detect_enc_url <- function(u) ced_enc_detect(curl_fetch_memory(u)$content) detect_enc_url("https://www.corriere.it") detect_enc_url("https://www.vk.com") detect_enc_url("https://www.qq.com") detect_enc_url("https://kakaku.com") detect_enc_url("https://etoland.co.kr") }
#> Loading required package: curl
#> [1] "EUC-KR"
# }