JSON is a very common data interchange format. I don't love it, but sooner or later you have to deal with it.
So, this morning I wrote a JSON parser for MiniScript. This (or something very like it) will be in /sys/lib in Mini Micro 0.9, and will ship with command-line MiniScript once that gets support for import. But I thought I'd share here anyway, in case anyone has need of it before then.
Note that there are three useful entry points:
- parse(jsonString): returns a MiniScript data structure representing the data.
- unescape(s): undoes JSON-style escape sequences, converting \t into char(9) for example.
- hexToInt(hexString): converts a hex string like "002f" into a number like 47.
🤔 Given a big ol' input file, this will also serve as a great benchmark of general MiniScript processing.
So here's the code. It's a bit long, but a third or so of that is the unit tests. (And note that the syntax colorer on this forum gets a little confused by the backslashes... sorry about that.)
Enjoy,
Joe
Parser = {}
Parser.source = ""
Parser._sourceLen = 0
Parser._p = 0 // index of next character to consume in source
Parser.init = function(source)
self.source = source
self._sourceLen = source.len
end function
Parser.parse = function(source=null)
if source != null then self.init source
self._p = 0
return self._parseElement
end function
whitespace = " " + char(9) + char(10) + char(13)
Parser._skipWhitespace = function()
while self._p < self._sourceLen
c = self.source[self._p]
if whitespace.indexOf(c) == null then break
self._p = self._p + 1
end while
end function
Parser._parseElement = function()
return self._parseValue // for now!
end function
Parser._parseValue = function()
self._skipWhitespace
c = self.source[self._p]
if c == """" then return self._parseString
if "0123456789-".indexOf(c) != null then return self._parseNumber
if c == "[" then return self._parseList
if c == "{" then return self._parseMap
if c == "t" and self.source[self._p:self._p+4] == "true" then
self._p = self._p + 4
return true
end if
if c == "f" and self.source[self._p:self._p+5] == "false" then
self._p = self._p + 4
return false
end if
if c == "n" and self.source[self._p:self._p+4] == "null" then
self._p = self._p + 4
return null
end if
end function
Parser._parseList = function()
self._p = self._p + 1 // skip "["
self._skipWhitespace
result = []
while self._p < self._sourceLen
c = self.source[self._p]
if c == "]" then break
result.push self._parseElement
self._skipWhitespace
// after an element, we should have either a comma or a ']'
c = self.source[self._p]
if c == "," then
self._p = self._p + 1
self._skipWhitespace
end if
end while
self._p = self._p + 1
return result
end function
Parser._parseMap = function()
self._p = self._p + 1 // skip "{"
self._skipWhitespace
result = {}
while self._p < self._sourceLen
// grab the key (must be a string)
c = self.source[self._p]
if c == "}" then break
if c != """" then
print "JSON error: object member key must be a string literal" // ToDo: better error handling!
print "Error at position " + self._p + ": " + self.source[self._p : self._p+20]
return null
end if
key = self._parseString
self._skipWhitespace
// next token must be a colon
if self.source[self._p] != ":" then
print "JSON error: colon expected"
print "Error at position " + self._p + ": " + self.source[self._p : self._p+20]
return null
end if
self._p = self._p + 1
self._skipWhitespace
// grab the value (could be anything)
value = self._parseElement
result[key] = value
self._skipWhitespace
// after a a key/value pair, we should have either a comma or a '}'
c = self.source[self._p]
if c == "," then
self._p = self._p + 1
self._skipWhitespace
end if
end while
self._p = self._p + 1
return result
end function
// Get a string literal from the source. Advance to the next
// character after the closing quote.
Parser._parseString = function()
self._p = self._p + 1
startPos = self._p
anyEscape = false
while self._p < self._sourceLen
c = self.source[self._p]
self._p = self._p + 1
if c == """" then break
if c == "\" then
anyEscape = true
self._p = self._p + 1
end if
end while
result = self.source[startPos : self._p-1]
if anyEscape then result = unescape(result)
return result
end function
// Undo the escaped characters in the given string, converting
// (for example) "\t" into char(9), etc.
unescape = function(s)
result = []
i = 0
maxi = s.len
while i < maxi
di = 1
if s[i] == "\" then
di = 2
c = s[i+1]
if c == "b" then
result.push char(8)
else if c == "t" then
result.push char(9)
else if c == "n" then
result.push char(10)
else if c == "f" then
result.push char(12)
else if c == "r" then
result.push char(13)
else if c == "u" then
// Unicode code point!
hex = s[i+2:i+6]
result.push char(hexToInt(hex))
di = 6
else
result.push c
end if
else
result.push s[i]
end if
i = i + di
end while
return result.join("")
end function
_hexDigitMap = {}
for i in range(0,15)
if i < 10 then
_hexDigitMap[str(i)] = i
else
_hexDigitMap[char(55+i)] = i // (lowercase hex digit)
_hexDigitMap[char(87+i)] = i // (uppercase hex digit)
end if
end for
hexToInt = function(s)
result = 0
for c in s
result = result * 16 + _hexDigitMap[c]
end for
return result
end function
// Get a numeric literal from the source. Advance to the next
// character after the number.
Parser._parseNumber = function()
startPos = self._p
while self._p < self._sourceLen
c = self.source[self._p]
// Note that we are rather permissive here, consuming things like
// 1-2e5+4E7, which is not valid JSON. But we're not trying to be
// a JSON validator; we're just trying to grok valid JSON as quickly
// as we can.
if "0123456789+-eE".indexOf(c) == null then break
self._p = self._p + 1
end while
result = val(self.source[startPos : self._p])
return result
end function
// Convenience function to parse JSON in a single statement (no need for new)
parse = function(jsonString)
p = new Parser
return p.parse(jsonString)
end function
if locals == globals then
print "Unit testing: json"
errorCount = 0
assertEqual = function(actual, expected, note)
if actual != expected then
print "Unit test failure: expected " + expected + ", got " + actual
outer.errorCount = errorCount + 1
end if
end function
p = new Parser
p.init(" true ")
assertEqual p.parse, "true"
p = new Parser
assertEqual p.parse(char(13) + "42"), 42
assertEqual p.parse("""\tHello, \""Bob\""."""), char(9) + "Hello, ""Bob""."
assertEqual p.parse("""\u002F"""), "/"
assertEqual p.parse("[1, 2 , 3]"), [1, 2, 3]
assertEqual p.parse("[ ""hey"", true, [0]]"), ["hey", true, [0]]
assertEqual p.parse("{""hey"": ""ho"", ""9"" : 81}"), {"hey":"ho", "9":81}
// And here's a longer example... remember, quotes doubled only
// to make them valid MiniScript string literals...
data = parse("{""widget"": {" +
" ""debug"": ""on""," +
" ""window"": {" +
" ""title"": ""Sample Konfabulator Widget""," +
" ""name"": ""main_window""," +
" ""width"": 500," +
" ""height"": 300" +
" }," +
" ""image"": { " +
" ""src"": ""Images\\Sun.png""," +
" ""name"": ""sun1""," +
" ""hOffset"": 250," +
" ""vOffset"": 250," +
" ""alignment"": ""center""" +
" }," +
" ""text"": {" +
" ""data"": ""Click Here""," +
" ""size"": 36," +
" ""style"": ""bold""," +
" ""name"": ""text1""," +
" ""hOffset"": 250," +
" ""vOffset"": 100," +
" ""alignment"": ""center""," +
" ""onMouseUp"": ""sun1.opacity = (sun1.opacity / 100) * 90;""" +
" }}}")
assertEqual data.widget.debug, "on"
assertEqual data.widget.window.width, 500
assertEqual data.widget.image.src, "Images\Sun.png"
assertEqual data.widget.text.size, 36
if errorCount == 0 then
print "All tests passed. Huzzah!"
else
print errorCount + " error" + "s" * (errorCount!=1) + " found."
end if
end if