----------------------------------------
-- Converts ISO-8859-1 encoded string to "binary" UTF-8 string (D10 only)
----------------------------------------
on iso88591_to_utf8_string (str)
ret = ""
len = str.length
repeat with i = 1 to len
n = chartonum(str.char[i])
if (n < 128) then
put numtochar(n) after ret
else
put numtochar(bitOr(192, n/64)) after ret
put numtochar(bitOr(128, bitAnd(n,63))) after ret
end if
end repeat
return ret
end
----------------------------------------
-- Converts ISO-8859-1 encoded file to UTF-8 file (D10 only)
-- requires fileio xtra
----------------------------------------
on iso88591_to_utf8_file (inputFile, outputFile)
inputStr = file_get_contents(inputFile)
outputStr = iso88591_to_utf8 (inputStr)
file_put_contents(outputFile, outputStr)
end
----------------------------------------
-- Converts Windows-1252 (CP-1252) encoded string to "binary" UTF-8 string (D10 only)
-- see http//en.wikipedia.org/wiki/Windows-1252 for details
-- concernig difference of ISO-8859-1 and CP-1252
-- Notice: This is the default encoding for Director 10 and older on the PC.
----------------------------------------
on cp1252_to_utf8_string (str)
ret = ""
len = str.length
cp = ["€","","‚","ƒ","„","…","†","‡","ˆ","‰","Š","‹","Œ","","Ž","","","‘","’","“","”","•","–","—","˜","™","š","›","œ","","ž","Ÿ"]
repeat with i = 1 to len
n = chartonum(str.char[i])
if (n < 128) then
put numtochar(n) after ret
else if n<=159 then
put cp[n-127] after ret
else
put numtochar(bitOr(192, n/64)) after ret
put numtochar(bitOr(128, bitAnd(n,63))) after ret
end if
end repeat
return ret
end
----------------------------------------
-- Converts ISO-8859-1 encoded file to UTF-8 file (D10 only)
-- requires fileio xtra
----------------------------------------
on iso88591_to_utf8_file (inputFile, outputFile)
inputStr = file_get_contents(inputFile)
outputStr = cp1252_to_utf8_string (inputStr)
file_put_contents(outputFile, outputStr)
end
----------------------------------------
-- Converts Windows-1252 (CP-1252) encoded string to "binary" UTF-8 string (D10 only).
-- This version is usually faster because it takes into account that
-- there are more ASCII characters than non-ASCII characters, therefor,
-- instead of converting char by char as the in the original version,
-- the code identifies contigous ASCII chunks and copies them at once.
----------------------------------------
on cp1252_to_utf8_string_faster (str)
ret = ""
len = str.length
cp = ["€","","‚","ƒ","„","…","†","‡","ˆ","‰","Š","‹","Œ","","Ž","","","‘","’","“","”","•","–","—","˜","™","š","›","œ","","ž","Ÿ"]
repeat with i = 1 to len
j = i
repeat while true
n = chartonum(str.char[j])
if n>127 then exit repeat -- non-ascii char found
j = j+1
if j>len then
put str.char[i..len] after ret -- copy last ascii chunk
return ret
end if
end repeat
if j>i then
put str.char[i..j-1] after ret -- copy ascii chunk at once
i = j
end if
if n<=159 then
put cp[n-127] after ret
else
put numtochar(bitOr(192, n/64)) after ret
put numtochar(bitOr(128, bitAnd(n,63))) after ret
end if
end repeat
return ret
end
----------------------------------------
-- reads whole (non-binary) file into string
----------------------------------------
on file_get_contents (tFile)
fp = xtra("fileIO").new()
fp.openFile(tFile,1)
err = fp.status()
if (err) then return false
ret = fp.readFile()
fp.closeFile()
fp = 0
return ret
end
----------------------------------------
-- saves (also binary) string as file
----------------------------------------
on file_put_contents (tFile, tString)
fp = xtra("fileIO").new()
fp.openFile(tFile, 1)
err = fp.status()
if not (err) then fp.delete()
else if (err and not (err = -37)) then return false
fp.createFile(tFile)
err = fp.status()
if (err) then return false
fp.openFile(tFile, 2)
err = fp.status()
if (err) then return false
fp.writeString(tString)
fp.closeFile()
fp=0
return true
end