Newer
Older
browserjam / scripts / parser.gd
class_name Parser

const EOF = ''

enum State {
	Text,
	TagName,
	Attribute,
	AttributeName,
	AttributeEquals,
	AttributeValueStart,
	AttributeValueUnquoted,
	AttributeValueQuoted,
	DocType,
}

var _source: String
var _state = State.Text
var _index: int = 0

var _current_token: Token
var _current_attribute_name: String = ''
var _current_attribute_value: String = ''

func _init(source: String) -> void:
	self._source = source
	self._current_token = Token.new()

func _next_char() -> String:
	if _index >= len(_source):
		return EOF
	var c = _source[_index]
	_index += 1
	return c

func _emit_token() -> Token:
	var token = _current_token
	_current_token = Token.new()
	return token

func _on_text() -> Token:
	match _next_char():
		'<':
			_state = State.TagName
			if len(_current_token.text) > 0:
				_current_token.type = Token.Type.Text
				return _emit_token()
		EOF:
			if len(_current_token.text) > 0:
				_current_token.type = Token.Type.Text
				return _emit_token()
		var c:
			_current_token.text += c
	return null

func _on_tag_name() -> Token:
	match _next_char():
		' ', '\t', '\n', '\r':
			_state = State.Attribute
		'/':
			_current_token.is_closing = true
		'!':
			_state = State.DocType
		'>':
			_state = State.Text
			_current_token.type = Token.Type.Tag
			return _emit_token()
		EOF:
			assert(false, 'TODO')
		var c:
			_current_token.tag_name += c
	return null

func _on_attibute() -> Token:
	match _next_char():
		' ', '\t', '\n', '\r':
			pass
		'>':
			_state = State.Text
			_current_token.type = Token.Type.Tag
			return _emit_token()
		EOF:
			assert(false, 'TODO')
		var c:
			_current_attribute_name = c
			_state = State.AttributeName
	return null

func _on_attibute_name() -> Token:
	match _next_char():
		' ', '\t', '\n', '\r':
			_state = State.AttributeEquals
		'=':
			_state = State.AttributeValueStart
		'>':
			_state = State.Text
			_current_token.type = Token.Type.Tag
			return _emit_token()
		EOF:
			assert(false, 'TODO')
		var c:
			_current_attribute_name += c
	return null

func _on_attibute_equals() -> Token:
	match _next_char():
		' ', '\t', '\n', '\r':
			pass
		'=':
			_state = State.AttributeValueStart
		'>':
			_state = State.Text
			_current_token.type = Token.Type.Tag
			return _emit_token()
		_:
			assert(false, 'TODO')
	return null

func _on_attibute_value_start() -> Token:
	match _next_char():
		' ', '\t', '\n', '\r':
			pass
		'"':
			_state = State.AttributeValueQuoted
			_current_attribute_value = ''
		'>':
			_state = State.Text
			_current_token.type = Token.Type.Tag
			return _emit_token()
		EOF:
			assert(false, 'TODO')
		var c:
			_current_attribute_value = c
			_state = State.AttributeValueUnquoted
	return null

func _on_attibute_value_quoted() -> Token:
	match _next_char():
		'"':
			_current_token.attributes[_current_attribute_name.to_lower()] = _current_attribute_value
			_state = State.Attribute
		'>':
			_state = State.Text
			_current_token.type = Token.Type.Tag
			return _emit_token()
		EOF:
			assert(false, 'TODO')
		var c:
			_current_attribute_value += c
	return null

func _on_attibute_value_unquoted() -> Token:
	match _next_char():
		' ', '\t', '\n', '\r':
			_current_token.attributes[_current_attribute_name.to_lower()] = _current_attribute_value
			_state = State.Attribute
		'>':
			_current_token.attributes[_current_attribute_name.to_lower()] = _current_attribute_value
			_state = State.Text
			_current_token.type = Token.Type.Tag
			return _emit_token()
		EOF:
			assert(false, 'TODO')
		var c:
			_current_attribute_value += c
	return null

func _on_doc_type() -> Token:
	match _next_char():
		'>':
			_state = State.Text
		EOF:
			assert(false, 'TODO')
	return null

func _parse_char() -> Token:
	match _state:
		State.Text: return _on_text()
		State.TagName: return _on_tag_name()
		State.Attribute: return _on_attibute()
		State.AttributeName: return _on_attibute_name()
		State.AttributeEquals: return _on_attibute_equals()
		State.AttributeValueStart: return _on_attibute_value_start()
		State.AttributeValueQuoted: return _on_attibute_value_quoted()
		State.AttributeValueUnquoted: return _on_attibute_value_unquoted()
		State.DocType: return _on_doc_type()
	return null

func next() -> Token:
	while _index < len(_source):
		var token = _parse_char()
		if token != null:
			return token
	return null