diff options
29 files changed, 991 insertions, 208 deletions
diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..d3a8b5b --- /dev/null +++ b/.editorconfig @@ -0,0 +1,39 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.{json,toml,yml,gyp}] +indent_style = space +indent_size = 2 + +[*.js] +indent_style = space +indent_size = 2 + +[*.rs] +indent_style = space +indent_size = 4 + +[*.{c,cc,h}] +indent_style = space +indent_size = 4 + +[*.{py,pyi}] +indent_style = space +indent_size = 4 + +[*.swift] +indent_style = space +indent_size = 4 + +[*.go] +indent_style = tab +indent_size = 8 + +[Makefile] +indent_style = tab +indent_size = 8 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..ffb52ab --- /dev/null +++ b/.gitattributes @@ -0,0 +1,11 @@ +* text eol=lf + +src/*.json linguist-generated +src/parser.c linguist-generated +src/tree_sitter/* linguist-generated + +bindings/** linguist-generated +binding.gyp linguist-generated +setup.py linguist-generated +Makefile linguist-generated +Package.swift linguist-generated diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..336d0af --- /dev/null +++ b/Makefile @@ -0,0 +1,112 @@ +VERSION := 0.0.1 + +LANGUAGE_NAME := tree-sitter-gsp + +# repository +SRC_DIR := src + +PARSER_REPO_URL := $(shell git -C $(SRC_DIR) remote get-url origin 2>/dev/null) + +ifeq ($(PARSER_URL),) +	PARSER_URL := $(subst .git,,$(PARSER_REPO_URL)) +ifeq ($(shell echo $(PARSER_URL) | grep '^[a-z][-+.0-9a-z]*://'),) +	PARSER_URL := $(subst :,/,$(PARSER_URL)) +	PARSER_URL := $(subst git@,https://,$(PARSER_URL)) +endif +endif + +TS ?= tree-sitter + +# ABI versioning +SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION))) +SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION))) + +# install directory layout +PREFIX ?= /usr/local +INCLUDEDIR ?= $(PREFIX)/include +LIBDIR ?= $(PREFIX)/lib +PCLIBDIR ?= $(LIBDIR)/pkgconfig + +# source/object files +PARSER := $(SRC_DIR)/parser.c +EXTRAS := $(filter-out $(PARSER),$(wildcard $(SRC_DIR)/*.c)) +OBJS := $(patsubst %.c,%.o,$(PARSER) $(EXTRAS)) + +# flags +ARFLAGS ?= rcs +override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC + +# OS-specific bits +ifeq ($(OS),Windows_NT) +	$(error "Windows is not supported") +else ifeq ($(shell uname),Darwin) +	SOEXT = dylib +	SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib +	SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib +	LINKSHARED := $(LINKSHARED)-dynamiclib -Wl, +	ifneq ($(ADDITIONAL_LIBS),) +	LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS), +	endif +	LINKSHARED := $(LINKSHARED)-install_name,$(LIBDIR)/lib$(LANGUAGE_NAME).$(SONAME_MAJOR).dylib,-rpath,@executable_path/../Frameworks +else +	SOEXT = so +	SOEXTVER_MAJOR = so.$(SONAME_MAJOR) +	SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR) +	LINKSHARED := $(LINKSHARED)-shared -Wl, +	ifneq ($(ADDITIONAL_LIBS),) +	LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS) +	endif +	LINKSHARED := $(LINKSHARED)-soname,lib$(LANGUAGE_NAME).so.$(SONAME_MAJOR) +endif +ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),) +	PCLIBDIR := $(PREFIX)/libdata/pkgconfig +endif + +all: lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) $(LANGUAGE_NAME).pc + +lib$(LANGUAGE_NAME).a: $(OBJS) +	$(AR) $(ARFLAGS) $@ $^ + +lib$(LANGUAGE_NAME).$(SOEXT): $(OBJS) +	$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@ +ifneq ($(STRIP),) +	$(STRIP) $@ +endif + +$(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in +	sed  -e 's|@URL@|$(PARSER_URL)|' \ +		-e 's|@VERSION@|$(VERSION)|' \ +		-e 's|@LIBDIR@|$(LIBDIR)|' \ +		-e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \ +		-e 's|@REQUIRES@|$(REQUIRES)|' \ +		-e 's|@ADDITIONAL_LIBS@|$(ADDITIONAL_LIBS)|' \ +		-e 's|=$(PREFIX)|=$${prefix}|' \ +		-e 's|@PREFIX@|$(PREFIX)|' $< > $@ + +$(PARSER): $(SRC_DIR)/grammar.json +	$(TS) generate --no-bindings $^ + +install: all +	install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)' +	install -m644 bindings/c/$(LANGUAGE_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h +	install -m644 $(LANGUAGE_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc +	install -m644 lib$(LANGUAGE_NAME).a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a +	install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) +	ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) +	ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) + +uninstall: +	$(RM) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a \ +		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) \ +		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) \ +		'$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) \ +		'$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h \ +		'$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc + +clean: +	$(RM) $(OBJS) $(LANGUAGE_NAME).pc lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) + +test: +	$(TS) test + +.PHONY: all install uninstall clean test diff --git a/Package.swift b/Package.swift new file mode 100644 index 0000000..133b6de --- /dev/null +++ b/Package.swift @@ -0,0 +1,47 @@ +// swift-tools-version:5.3 +import PackageDescription + +let package = Package( +    name: "TreeSitterGsp", +    products: [ +        .library(name: "TreeSitterGsp", targets: ["TreeSitterGsp"]), +    ], +    dependencies: [], +    targets: [ +        .target(name: "TreeSitterGsp", +                path: ".", +                exclude: [ +                    "Cargo.toml", +                    "Makefile", +                    "binding.gyp", +                    "bindings/c", +                    "bindings/go", +                    "bindings/node", +                    "bindings/python", +                    "bindings/rust", +                    "prebuilds", +                    "grammar.js", +                    "package.json", +                    "package-lock.json", +                    "pyproject.toml", +                    "setup.py", +                    "test", +                    "examples", +                    ".editorconfig", +                    ".github", +                    ".gitignore", +                    ".gitattributes", +                    ".gitmodules", +                ], +                sources: [ +                    "src/parser.c", +                    // NOTE: if your language has an external scanner, add it here. +                ], +                resources: [ +                    .copy("queries") +                ], +                publicHeadersPath: "bindings/swift", +                cSettings: [.headerSearchPath("src")]) +    ], +    cLanguageStandard: .c11 +) diff --git a/binding.gyp b/binding.gyp index 6ae7c31..016165a 100644 --- a/binding.gyp +++ b/binding.gyp @@ -2,18 +2,29 @@    "targets": [      {        "target_name": "tree_sitter_gsp_binding", +      "dependencies": [ +        "<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except", +      ],        "include_dirs": [ -        "<!(node -e \"require('nan')\")", -        "src" +        "src",        ],        "sources": [          "bindings/node/binding.cc",          "src/parser.c", -        # If your language uses an external scanner, add it here. +        # NOTE: if your language has an external scanner, add it here. +      ], +      "conditions": [ +        ["OS!='win'", { +          "cflags_c": [ +            "-std=c11", +          ], +        }, { # OS == "win" +          "cflags_c": [ +            "/std:c11", +            "/utf-8", +          ], +        }],        ], -      "cflags_c": [ -        "-std=c99", -      ]      }    ]  } diff --git a/bindings/c/tree-sitter-gsp.h b/bindings/c/tree-sitter-gsp.h new file mode 100644 index 0000000..8f23915 --- /dev/null +++ b/bindings/c/tree-sitter-gsp.h @@ -0,0 +1,16 @@ +#ifndef TREE_SITTER_GSP_H_ +#define TREE_SITTER_GSP_H_ + +typedef struct TSLanguage TSLanguage; + +#ifdef __cplusplus +extern "C" { +#endif + +const TSLanguage *tree_sitter_gsp(void); + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_GSP_H_ diff --git a/bindings/c/tree-sitter-gsp.pc.in b/bindings/c/tree-sitter-gsp.pc.in new file mode 100644 index 0000000..ba931d8 --- /dev/null +++ b/bindings/c/tree-sitter-gsp.pc.in @@ -0,0 +1,11 @@ +prefix=@PREFIX@ +libdir=@LIBDIR@ +includedir=@INCLUDEDIR@ + +Name: tree-sitter-gsp +Description: Gsp grammar for tree-sitter +URL: @URL@ +Version: @VERSION@ +Requires: @REQUIRES@ +Libs: -L${libdir} @ADDITIONAL_LIBS@ -ltree-sitter-gsp +Cflags: -I${includedir} diff --git a/bindings/go/binding.go b/bindings/go/binding.go new file mode 100644 index 0000000..353f400 --- /dev/null +++ b/bindings/go/binding.go @@ -0,0 +1,13 @@ +package tree_sitter_gsp + +// #cgo CFLAGS: -std=c11 -fPIC +// #include "../../src/parser.c" +// // NOTE: if your language has an external scanner, add it here. +import "C" + +import "unsafe" + +// Get the tree-sitter Language for this grammar. +func Language() unsafe.Pointer { +	return unsafe.Pointer(C.tree_sitter_gsp()) +} diff --git a/bindings/go/binding_test.go b/bindings/go/binding_test.go new file mode 100644 index 0000000..c4efff1 --- /dev/null +++ b/bindings/go/binding_test.go @@ -0,0 +1,15 @@ +package tree_sitter_gsp_test + +import ( +	"testing" + +	tree_sitter "github.com/smacker/go-tree-sitter" +	"github.com/tree-sitter/tree-sitter-gsp" +) + +func TestCanLoadGrammar(t *testing.T) { +	language := tree_sitter.NewLanguage(tree_sitter_gsp.Language()) +	if language == nil { +		t.Errorf("Error loading Gsp grammar") +	} +} diff --git a/bindings/go/go.mod b/bindings/go/go.mod new file mode 100644 index 0000000..38c34a6 --- /dev/null +++ b/bindings/go/go.mod @@ -0,0 +1,5 @@ +module github.com/tree-sitter/tree-sitter-gsp + +go 1.22 + +require github.com/smacker/go-tree-sitter v0.0.0-20230720070738-0d0a9f78d8f8 diff --git a/bindings/node/binding.cc b/bindings/node/binding.cc index 81881bf..a813e24 100644 --- a/bindings/node/binding.cc +++ b/bindings/node/binding.cc @@ -1,28 +1,20 @@ -#include "tree_sitter/parser.h" -#include <node.h> -#include "nan.h" +#include <napi.h> -using namespace v8; +typedef struct TSLanguage TSLanguage; -extern "C" TSLanguage * tree_sitter_gsp(); +extern "C" TSLanguage *tree_sitter_gsp(); -namespace { +// "tree-sitter", "language" hashed with BLAKE2 +const napi_type_tag LANGUAGE_TYPE_TAG = { +  0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16 +}; -NAN_METHOD(New) {} - -void Init(Local<Object> exports, Local<Object> module) { -  Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New); -  tpl->SetClassName(Nan::New("Language").ToLocalChecked()); -  tpl->InstanceTemplate()->SetInternalFieldCount(1); - -  Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked(); -  Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); -  Nan::SetInternalFieldPointer(instance, 0, tree_sitter_gsp()); - -  Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("gsp").ToLocalChecked()); -  Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance); +Napi::Object Init(Napi::Env env, Napi::Object exports) { +    exports["name"] = Napi::String::New(env, "gsp"); +    auto language = Napi::External<TSLanguage>::New(env, tree_sitter_gsp()); +    language.TypeTag(&LANGUAGE_TYPE_TAG); +    exports["language"] = language; +    return exports;  } -NODE_MODULE(tree_sitter_gsp_binding, Init) - -}  // namespace +NODE_API_MODULE(tree_sitter_gsp_binding, Init) diff --git a/bindings/node/index.d.ts b/bindings/node/index.d.ts new file mode 100644 index 0000000..efe259e --- /dev/null +++ b/bindings/node/index.d.ts @@ -0,0 +1,28 @@ +type BaseNode = { +  type: string; +  named: boolean; +}; + +type ChildNode = { +  multiple: boolean; +  required: boolean; +  types: BaseNode[]; +}; + +type NodeInfo = +  | (BaseNode & { +      subtypes: BaseNode[]; +    }) +  | (BaseNode & { +      fields: { [name: string]: ChildNode }; +      children: ChildNode[]; +    }); + +type Language = { +  name: string; +  language: unknown; +  nodeTypeInfo: NodeInfo[]; +}; + +declare const language: Language; +export = language; diff --git a/bindings/node/index.js b/bindings/node/index.js index d0bc87e..6657bcf 100644 --- a/bindings/node/index.js +++ b/bindings/node/index.js @@ -1,18 +1,6 @@ -try { -  module.exports = require("../../build/Release/tree_sitter_gsp_binding"); -} catch (error1) { -  if (error1.code !== 'MODULE_NOT_FOUND') { -    throw error1; -  } -  try { -    module.exports = require("../../build/Debug/tree_sitter_gsp_binding"); -  } catch (error2) { -    if (error2.code !== 'MODULE_NOT_FOUND') { -      throw error2; -    } -    throw error1 -  } -} +const root = require("path").join(__dirname, "..", ".."); + +module.exports = require("node-gyp-build")(root);  try {    module.exports.nodeTypeInfo = require("../../src/node-types.json"); diff --git a/bindings/python/tree_sitter_gsp/__init__.py b/bindings/python/tree_sitter_gsp/__init__.py new file mode 100644 index 0000000..a844d6d --- /dev/null +++ b/bindings/python/tree_sitter_gsp/__init__.py @@ -0,0 +1,5 @@ +"Gsp grammar for tree-sitter" + +from ._binding import language + +__all__ = ["language"] diff --git a/bindings/python/tree_sitter_gsp/__init__.pyi b/bindings/python/tree_sitter_gsp/__init__.pyi new file mode 100644 index 0000000..5416666 --- /dev/null +++ b/bindings/python/tree_sitter_gsp/__init__.pyi @@ -0,0 +1 @@ +def language() -> int: ... diff --git a/bindings/python/tree_sitter_gsp/binding.c b/bindings/python/tree_sitter_gsp/binding.c new file mode 100644 index 0000000..569c1dc --- /dev/null +++ b/bindings/python/tree_sitter_gsp/binding.c @@ -0,0 +1,27 @@ +#include <Python.h> + +typedef struct TSLanguage TSLanguage; + +TSLanguage *tree_sitter_gsp(void); + +static PyObject* _binding_language(PyObject *self, PyObject *args) { +    return PyLong_FromVoidPtr(tree_sitter_gsp()); +} + +static PyMethodDef methods[] = { +    {"language", _binding_language, METH_NOARGS, +     "Get the tree-sitter language for this grammar."}, +    {NULL, NULL, 0, NULL} +}; + +static struct PyModuleDef module = { +    .m_base = PyModuleDef_HEAD_INIT, +    .m_name = "_binding", +    .m_doc = NULL, +    .m_size = -1, +    .m_methods = methods +}; + +PyMODINIT_FUNC PyInit__binding(void) { +    return PyModule_Create(&module); +} diff --git a/bindings/python/tree_sitter_gsp/py.typed b/bindings/python/tree_sitter_gsp/py.typed new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/bindings/python/tree_sitter_gsp/py.typed diff --git a/bindings/rust/build.rs b/bindings/rust/build.rs index c6061f0..4cc26f5 100644 --- a/bindings/rust/build.rs +++ b/bindings/rust/build.rs @@ -7,6 +7,9 @@ fn main() {          .flag_if_supported("-Wno-unused-parameter")          .flag_if_supported("-Wno-unused-but-set-variable")          .flag_if_supported("-Wno-trigraphs"); +    #[cfg(target_env = "msvc")] +    c_config.flag("-utf-8"); +      let parser_path = src_dir.join("parser.c");      c_config.file(&parser_path); diff --git a/bindings/swift/TreeSitterGsp/gsp.h b/bindings/swift/TreeSitterGsp/gsp.h new file mode 100644 index 0000000..8f23915 --- /dev/null +++ b/bindings/swift/TreeSitterGsp/gsp.h @@ -0,0 +1,16 @@ +#ifndef TREE_SITTER_GSP_H_ +#define TREE_SITTER_GSP_H_ + +typedef struct TSLanguage TSLanguage; + +#ifdef __cplusplus +extern "C" { +#endif + +const TSLanguage *tree_sitter_gsp(void); + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_GSP_H_ @@ -18,7 +18,7 @@ module.exports = grammar({  			seq(choice('-', '='), optional($.text)),  		), -		node_name: $ => /[a-zA-Z:_\u{000C0}-\u{000D6}\u{000D8}-\u{000F6}\u{000F8}-\u{002FF}\u{00370}-\u{0037D}\u{0037F}-\u{01FFF}\u{0200C}-\u{0200D}\u{02070}-\u{0218F}\u{02C00}-\u{02FEF}\u{03001}-\u{0D7FF}\u{0F900}-\u{0FDCF}\u{0FDF0}-\u{0FFFD}\u{10000}-\u{EFFFF}][a-zA-Z0-9:_\-.·\u{00300}-\u{0036F}\u{0203F}-\u{02040}\u{000C0}-\u{000D6}\u{000D8}-\u{000F6}\u{000F8}-\u{002FF}\u{00370}-\u{0037D}\u{0037F}-\u{01FFF}\u{0200C}-\u{0200D}\u{02070}-\u{0218F}\u{02C00}-\u{02FEF}\u{03001}-\u{0D7FF}\u{0F900}-\u{0FDCF}\u{0FDF0}-\u{0FFFD}\u{10000}-\u{EFFFF}]*/u, +		node_name: $ => /[\/a-zA-Z:_\u{000C0}-\u{000D6}\u{000D8}-\u{000F6}\u{000F8}-\u{002FF}\u{00370}-\u{0037D}\u{0037F}-\u{01FFF}\u{0200C}-\u{0200D}\u{02070}-\u{0218F}\u{02C00}-\u{02FEF}\u{03001}-\u{0D7FF}\u{0F900}-\u{0FDCF}\u{0FDF0}-\u{0FFFD}\u{10000}-\u{EFFFF}][a-zA-Z0-9:_\-.·\u{00300}-\u{0036F}\u{0203F}-\u{02040}\u{000C0}-\u{000D6}\u{000D8}-\u{000F6}\u{000F8}-\u{002FF}\u{00370}-\u{0037D}\u{0037F}-\u{01FFF}\u{0200C}-\u{0200D}\u{02070}-\u{0218F}\u{02C00}-\u{02FEF}\u{03001}-\u{0D7FF}\u{0F900}-\u{0FDCF}\u{0FDF0}-\u{0FFFD}\u{10000}-\u{EFFFF}]*/u,  		text: $ => repeat1(  			choice( diff --git a/package.json b/package.json index 813cf53..09b9b1f 100644 --- a/package.json +++ b/package.json @@ -3,8 +3,11 @@    "version": "1.0.0",    "description": "A tree-sitter parser for gsp",    "main": "bindings/node", +  "types": "bindings/node",    "scripts": { -    "test": "echo \"Error: no test specified\" && exit 1" +    "test": "echo \"Error: no test specified\" && exit 1", +    "install": "node-gyp-build", +    "prebuildify": "prebuildify --napi --strip"    },    "repository": {      "type": "git", @@ -17,6 +20,32 @@    },    "homepage": "https://git.sr.ht/~mango/tree-sitter-gsp#readme",    "dependencies": { -    "nan": "^2.18.0" -  } +    "node-addon-api": "^7.1.0", +    "node-gyp-build": "^4.8.0" +  }, +  "peerDependencies": { +    "tree-sitter": "^0.21.0" +  }, +  "peerDependenciesMeta": { +    "tree_sitter": { +      "optional": true +    } +  }, +  "devDependencies": { +    "prebuildify": "^6.0.0" +  }, +  "files": [ +    "grammar.js", +    "binding.gyp", +    "prebuilds/**", +    "bindings/node/*", +    "queries/*", +    "src/**" +  ], +  "tree-sitter": [ +    { +      "scope": "source.gsp", +      "injection-regex": "^gsp$" +    } +  ]  } diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..efb1a59 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,29 @@ +[build-system] +requires = ["setuptools>=42", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "tree-sitter-gsp" +description = "Gsp grammar for tree-sitter" +version = "0.0.1" +keywords = ["incremental", "parsing", "tree-sitter", "gsp"] +classifiers = [ +  "Intended Audience :: Developers", +  "License :: OSI Approved :: MIT License", +  "Topic :: Software Development :: Compilers", +  "Topic :: Text Processing :: Linguistic", +  "Typing :: Typed" +] +requires-python = ">=3.8" +license.text = "MIT" +readme = "README.md" + +[project.urls] +Homepage = "https://github.com/tree-sitter/tree-sitter-gsp" + +[project.optional-dependencies] +core = ["tree-sitter~=0.21"] + +[tool.cibuildwheel] +build = "cp38-*" +build-frontend = "build" diff --git a/queries/gsp/highlights.scm b/queries/gsp/highlights.scm index ba33621..17e1134 100644 --- a/queries/gsp/highlights.scm +++ b/queries/gsp/highlights.scm @@ -1,5 +1,7 @@  [">" "-" "=" "@"] @operator  ["{" "}"] @tag.delimiter +((node (node_name @_name) (_)) + (#eq? @_name "/")) @comment  (node_name) @tag  [   (attribute_name) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f639ffc --- /dev/null +++ b/setup.py @@ -0,0 +1,60 @@ +from os.path import isdir, join +from platform import system + +from setuptools import Extension, find_packages, setup +from setuptools.command.build import build +from wheel.bdist_wheel import bdist_wheel + + +class Build(build): +    def run(self): +        if isdir("queries"): +            dest = join(self.build_lib, "tree_sitter_gsp", "queries") +            self.copy_tree("queries", dest) +        super().run() + + +class BdistWheel(bdist_wheel): +    def get_tag(self): +        python, abi, platform = super().get_tag() +        if python.startswith("cp"): +            python, abi = "cp38", "abi3" +        return python, abi, platform + + +setup( +    packages=find_packages("bindings/python"), +    package_dir={"": "bindings/python"}, +    package_data={ +        "tree_sitter_gsp": ["*.pyi", "py.typed"], +        "tree_sitter_gsp.queries": ["*.scm"], +    }, +    ext_package="tree_sitter_gsp", +    ext_modules=[ +        Extension( +            name="_binding", +            sources=[ +                "bindings/python/tree_sitter_gsp/binding.c", +                "src/parser.c", +                # NOTE: if your language uses an external scanner, add it here. +            ], +            extra_compile_args=[ +                "-std=c11", +            ] if system() != "Windows" else [ +                "/std:c11", +                "/utf-8", +            ], +            define_macros=[ +                ("Py_LIMITED_API", "0x03080000"), +                ("PY_SSIZE_T_CLEAN", None) +            ], +            include_dirs=["src"], +            py_limited_api=True, +        ) +    ], +    cmdclass={ +        "build": Build, +        "bdist_wheel": BdistWheel +    }, +    zip_safe=False +) diff --git a/src/grammar.json b/src/grammar.json index 21fc5fc..7809ff6 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -105,7 +105,8 @@      },      "node_name": {        "type": "PATTERN", -      "value": "[a-zA-Z:_\\u{000C0}-\\u{000D6}\\u{000D8}-\\u{000F6}\\u{000F8}-\\u{002FF}\\u{00370}-\\u{0037D}\\u{0037F}-\\u{01FFF}\\u{0200C}-\\u{0200D}\\u{02070}-\\u{0218F}\\u{02C00}-\\u{02FEF}\\u{03001}-\\u{0D7FF}\\u{0F900}-\\u{0FDCF}\\u{0FDF0}-\\u{0FFFD}\\u{10000}-\\u{EFFFF}][a-zA-Z0-9:_\\-.·\\u{00300}-\\u{0036F}\\u{0203F}-\\u{02040}\\u{000C0}-\\u{000D6}\\u{000D8}-\\u{000F6}\\u{000F8}-\\u{002FF}\\u{00370}-\\u{0037D}\\u{0037F}-\\u{01FFF}\\u{0200C}-\\u{0200D}\\u{02070}-\\u{0218F}\\u{02C00}-\\u{02FEF}\\u{03001}-\\u{0D7FF}\\u{0F900}-\\u{0FDCF}\\u{0FDF0}-\\u{0FFFD}\\u{10000}-\\u{EFFFF}]*" +      "value": "[\\/a-zA-Z:_\\u{000C0}-\\u{000D6}\\u{000D8}-\\u{000F6}\\u{000F8}-\\u{002FF}\\u{00370}-\\u{0037D}\\u{0037F}-\\u{01FFF}\\u{0200C}-\\u{0200D}\\u{02070}-\\u{0218F}\\u{02C00}-\\u{02FEF}\\u{03001}-\\u{0D7FF}\\u{0F900}-\\u{0FDCF}\\u{0FDF0}-\\u{0FFFD}\\u{10000}-\\u{EFFFF}][a-zA-Z0-9:_\\-.·\\u{00300}-\\u{0036F}\\u{0203F}-\\u{02040}\\u{000C0}-\\u{000D6}\\u{000D8}-\\u{000F6}\\u{000F8}-\\u{002FF}\\u{00370}-\\u{0037D}\\u{0037F}-\\u{01FFF}\\u{0200C}-\\u{0200D}\\u{02070}-\\u{0218F}\\u{02C00}-\\u{02FEF}\\u{03001}-\\u{0D7FF}\\u{0F900}-\\u{0FDCF}\\u{0FDF0}-\\u{0FFFD}\\u{10000}-\\u{EFFFF}]*", +      "flags": "u"      },      "text": {        "type": "REPEAT1", @@ -188,11 +189,13 @@      },      "class_shorthand": {        "type": "PATTERN", -      "value": "\\.\\P{White_Space}+" +      "value": "\\.\\P{White_Space}+", +      "flags": "u"      },      "id_shorthand": {        "type": "PATTERN", -      "value": "#\\P{White_Space}+" +      "value": "#\\P{White_Space}+", +      "flags": "u"      },      "attribute_name": {        "type": "PATTERN", @@ -204,7 +207,8 @@      },      "_S": {        "type": "PATTERN", -      "value": "\\p{White_Space}+" +      "value": "\\p{White_Space}+", +      "flags": "u"      }    },    "extras": [ @@ -219,4 +223,3 @@    "inline": [],    "supertypes": []  } - diff --git a/src/parser.c b/src/parser.c index 2567530..47501ef 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,7 +1,6 @@ -#include <tree_sitter/parser.h> +#include "tree_sitter/parser.h"  #if defined(__GNUC__) || defined(__clang__) -#pragma GCC diagnostic push  #pragma GCC diagnostic ignored "-Wmissing-field-initializers"  #endif @@ -16,7 +15,7 @@  #define MAX_ALIAS_SEQUENCE_LENGTH 6  #define PRODUCTION_ID_COUNT 1 -enum { +enum ts_symbol_identifiers {    anon_sym_GT = 1,    anon_sym_LBRACE = 2,    anon_sym_RBRACE = 3, @@ -247,97 +246,27 @@ static const TSStateId ts_primary_state_ids[STATE_COUNT] = {    [47] = 35,  }; -static inline bool sym_node_name_character_set_1(int32_t c) { -  return (c < 895 -    ? (c < 192 -      ? (c < '_' -        ? (c < 'A' -          ? c == ':' -          : c <= 'Z') -        : (c <= '_' || (c >= 'a' && c <= 'z'))) -      : (c <= 214 || (c < 248 -        ? (c >= 216 && c <= 246) -        : (c <= 767 || (c >= 880 && c <= 893))))) -    : (c <= 8191 || (c < 12289 -      ? (c < 8304 -        ? (c >= 8204 && c <= 8205) -        : (c <= 8591 || (c >= 11264 && c <= 12271))) -      : (c <= 55295 || (c < 65008 -        ? (c >= 63744 && c <= 64975) -        : (c <= 65533 || (c >= 65536 && c <= 983039))))))); -} - -static inline bool sym_node_name_character_set_2(int32_t c) { -  return (c < 895 -    ? (c < 'a' -      ? (c < 'A' -        ? (c < '0' -          ? (c >= '-' && c <= '.') -          : c <= ':') -        : (c <= 'Z' || c == '_')) -      : (c <= 'z' || (c < 216 -        ? (c < 192 -          ? c == 183 -          : c <= 214) -        : (c <= 246 || (c >= 248 && c <= 893))))) -    : (c <= 8191 || (c < 12289 -      ? (c < 8304 -        ? (c < 8255 -          ? (c >= 8204 && c <= 8205) -          : c <= 8256) -        : (c <= 8591 || (c >= 11264 && c <= 12271))) -      : (c <= 55295 || (c < 65008 -        ? (c >= 63744 && c <= 64975) -        : (c <= 65533 || (c >= 65536 && c <= 983039))))))); -} +static TSCharacterRange sym_node_name_character_set_1[] = { +  {'/', '/'}, {':', ':'}, {'A', 'Z'}, {'_', '_'}, {'a', 'z'}, {0xc0, 0xd6}, {0xd8, 0xf6}, {0xf8, 0x2ff}, +  {0x370, 0x37d}, {0x37f, 0x1fff}, {0x200c, 0x200d}, {0x2070, 0x218f}, {0x2c00, 0x2fef}, {0x3001, 0xd7ff}, {0xf900, 0xfdcf}, {0xfdf0, 0xfffd}, +  {0x10000, 0xeffff}, +}; -static inline bool sym_literal_text_character_set_1(int32_t c) { -  return (c < 8192 -    ? (c < 133 -      ? (c < ' ' -        ? (c >= '\t' && c <= '\r') -        : c <= ' ') -      : (c <= 133 || (c < 5760 -        ? c == 160 -        : c <= 5760))) -    : (c <= 8202 || (c < 8287 -      ? (c < 8239 -        ? (c >= 8232 && c <= 8233) -        : c <= 8239) -      : (c <= 8287 || c == 12288)))); -} +static TSCharacterRange sym_node_name_character_set_2[] = { +  {'-', '.'}, {'0', ':'}, {'A', 'Z'}, {'_', '_'}, {'a', 'z'}, {0xb7, 0xb7}, {0xc0, 0xd6}, {0xd8, 0xf6}, +  {0xf8, 0x37d}, {0x37f, 0x1fff}, {0x200c, 0x200d}, {0x203f, 0x2040}, {0x2070, 0x218f}, {0x2c00, 0x2fef}, {0x3001, 0xd7ff}, {0xf900, 0xfdcf}, +  {0xfdf0, 0xfffd}, {0x10000, 0xeffff}, +}; -static inline bool sym_id_shorthand_character_set_1(int32_t c) { -  return (c < 5760 -    ? (c < ' ' -      ? (c < '\t' -        ? c == 0 -        : c <= '\r') -      : (c <= ' ' || (c < 160 -        ? c == 133 -        : c <= 160))) -    : (c <= 5760 || (c < 8239 -      ? (c < 8232 -        ? (c >= 8192 && c <= 8202) -        : c <= 8233) -      : (c <= 8239 || (c < 12288 -        ? c == 8287 -        : c <= 12288))))); -} +static TSCharacterRange sym_class_shorthand_character_set_1[] = { +  {0, 0x08}, {0x0e, 0x1f}, {'!', 0x84}, {0x86, 0x9f}, {0xa1, 0x167f}, {0x1681, 0x1fff}, {0x200b, 0x2027}, {0x202a, 0x202e}, +  {0x2030, 0x205e}, {0x2060, 0x2fff}, {0x3001, 0x10ffff}, +}; -static inline bool sym__S_character_set_1(int32_t c) { -  return (c < 8192 -    ? (c < 133 -      ? (c < ' ' -        ? (c >= '\t' && c <= '\r') -        : c <= ' ') -      : (c <= 133 || c == 160)) -    : (c <= 8202 || (c < 8287 -      ? (c < 8239 -        ? (c >= 8232 && c <= 8233) -        : c <= 8239) -      : (c <= 8287 || c == 12288)))); -} +static TSCharacterRange sym__S_character_set_1[] = { +  {'\t', '\r'}, {' ', ' '}, {0x85, 0x85}, {0xa0, 0xa0}, {0x1680, 0x1680}, {0x2000, 0x200a}, {0x2028, 0x2029}, {0x202f, 0x202f}, +  {0x205f, 0x205f}, {0x3000, 0x3000}, +};  static bool ts_lex(TSLexer *lexer, TSStateId state) {    START_LEXER(); @@ -345,18 +274,20 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {    switch (state) {      case 0:        if (eof) ADVANCE(8); -      if (lookahead == '"') ADVANCE(1); -      if (lookahead == '#') ADVANCE(4); -      if (lookahead == '-') ADVANCE(12); -      if (lookahead == '.') ADVANCE(5); -      if (lookahead == '=') ADVANCE(13); -      if (lookahead == '>') ADVANCE(9); -      if (lookahead == '@') ADVANCE(16); -      if (lookahead == '{') ADVANCE(10); -      if (lookahead == '}') ADVANCE(11); -      if (lookahead == 5760) ADVANCE(14); -      if (sym__S_character_set_1(lookahead)) ADVANCE(23); -      if (sym_node_name_character_set_1(lookahead)) ADVANCE(15); +      ADVANCE_MAP( +        '"', 1, +        '#', 4, +        '-', 12, +        '.', 5, +        '=', 13, +        '>', 9, +        '@', 16, +        '{', 10, +        '}', 11, +        0x1680, 14, +      ); +      if (set_contains(sym__S_character_set_1, 10, lookahead)) ADVANCE(23); +      if (set_contains(sym_node_name_character_set_1, 17, lookahead)) ADVANCE(15);        END_STATE();      case 1:        if (lookahead == '"') ADVANCE(22); @@ -367,7 +298,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {        if (lookahead == '@') ADVANCE(16);        if (lookahead == '\\') ADVANCE(3);        if (lookahead == '}') ADVANCE(11); -      if (sym_literal_text_character_set_1(lookahead)) ADVANCE(17); +      if (set_contains(sym__S_character_set_1, 10, lookahead)) ADVANCE(17);        if (lookahead != 0) ADVANCE(18);        END_STATE();      case 3: @@ -376,10 +307,10 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {            lookahead == '}') ADVANCE(18);        END_STATE();      case 4: -      if (!sym_id_shorthand_character_set_1(lookahead)) ADVANCE(20); +      if ((!eof && set_contains(sym_class_shorthand_character_set_1, 11, lookahead))) ADVANCE(20);        END_STATE();      case 5: -      if (!sym_id_shorthand_character_set_1(lookahead)) ADVANCE(19); +      if ((!eof && set_contains(sym_class_shorthand_character_set_1, 11, lookahead))) ADVANCE(19);        END_STATE();      case 6:        if (lookahead != 0 && @@ -393,7 +324,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {        if (lookahead == '=') ADVANCE(13);        if (lookahead == '{') ADVANCE(10);        if (lookahead == '}') ADVANCE(11); -      if (sym_literal_text_character_set_1(lookahead)) ADVANCE(23); +      if (set_contains(sym__S_character_set_1, 10, lookahead)) ADVANCE(23);        if (lookahead == '-' ||            ('0' <= lookahead && lookahead <= '9') ||            ('A' <= lookahead && lookahead <= 'Z') || @@ -420,13 +351,13 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {        END_STATE();      case 14:        ACCEPT_TOKEN(sym_node_name); -      if (lookahead == 5760) ADVANCE(14); -      if (sym__S_character_set_1(lookahead)) ADVANCE(23); -      if (sym_node_name_character_set_2(lookahead)) ADVANCE(15); +      if (lookahead == 0x1680) ADVANCE(14); +      if (set_contains(sym__S_character_set_1, 10, lookahead)) ADVANCE(23); +      if (set_contains(sym_node_name_character_set_2, 18, lookahead)) ADVANCE(15);        END_STATE();      case 15:        ACCEPT_TOKEN(sym_node_name); -      if (sym_node_name_character_set_2(lookahead)) ADVANCE(15); +      if (set_contains(sym_node_name_character_set_2, 18, lookahead)) ADVANCE(15);        END_STATE();      case 16:        ACCEPT_TOKEN(anon_sym_AT); @@ -434,7 +365,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {      case 17:        ACCEPT_TOKEN(sym_literal_text);        if (lookahead == '\\') ADVANCE(3); -      if (sym_literal_text_character_set_1(lookahead)) ADVANCE(17); +      if (set_contains(sym__S_character_set_1, 10, lookahead)) ADVANCE(17);        if (lookahead != 0 &&            lookahead != '@' &&            lookahead != '}') ADVANCE(18); @@ -448,11 +379,11 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {        END_STATE();      case 19:        ACCEPT_TOKEN(sym_class_shorthand); -      if (!sym_id_shorthand_character_set_1(lookahead)) ADVANCE(19); +      if ((!eof && set_contains(sym_class_shorthand_character_set_1, 11, lookahead))) ADVANCE(19);        END_STATE();      case 20:        ACCEPT_TOKEN(sym_id_shorthand); -      if (!sym_id_shorthand_character_set_1(lookahead)) ADVANCE(20); +      if ((!eof && set_contains(sym_class_shorthand_character_set_1, 11, lookahead))) ADVANCE(20);        END_STATE();      case 21:        ACCEPT_TOKEN(sym_attribute_name); @@ -467,7 +398,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {        END_STATE();      case 23:        ACCEPT_TOKEN(sym__S); -      if (sym_literal_text_character_set_1(lookahead)) ADVANCE(23); +      if (set_contains(sym__S_character_set_1, 10, lookahead)) ADVANCE(23);        END_STATE();      default:        return false; @@ -1048,7 +979,7 @@ static const TSParseActionEntry ts_parse_actions[] = {    [0] = {.entry = {.count = 0, .reusable = false}},    [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(),    [3] = {.entry = {.count = 1, .reusable = false}}, SHIFT_EXTRA(), -  [5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0), +  [5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0, 0, 0),    [7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(35),    [9] = {.entry = {.count = 1, .reusable = false}}, SHIFT(9),    [11] = {.entry = {.count = 1, .reusable = true}}, SHIFT(30), @@ -1065,36 +996,36 @@ static const TSParseActionEntry ts_parse_actions[] = {    [33] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3),    [35] = {.entry = {.count = 1, .reusable = true}}, SHIFT(5),    [37] = {.entry = {.count = 1, .reusable = true}}, SHIFT(7), -  [39] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), -  [41] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(35), -  [44] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2), SHIFT_REPEAT(9), -  [47] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_attribute_list, 1), -  [49] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_attribute_list_repeat1, 2), -  [51] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_attribute_list_repeat1, 2), SHIFT_REPEAT(24), -  [54] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_attribute_list_repeat1, 2), SHIFT_REPEAT(18), -  [57] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_node_body, 1), +  [39] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), +  [41] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), SHIFT_REPEAT(35), +  [44] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), SHIFT_REPEAT(9), +  [47] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_attribute_list, 1, 0, 0), +  [49] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_attribute_list_repeat1, 2, 0, 0), +  [51] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_attribute_list_repeat1, 2, 0, 0), SHIFT_REPEAT(24), +  [54] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_attribute_list_repeat1, 2, 0, 0), SHIFT_REPEAT(18), +  [57] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_node_body, 1, 0, 0),    [59] = {.entry = {.count = 1, .reusable = true}}, SHIFT(27),    [61] = {.entry = {.count = 1, .reusable = true}}, SHIFT(25), -  [63] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1), -  [65] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_attribute, 1), +  [63] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1, 0, 0), +  [65] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_attribute, 1, 0, 0),    [67] = {.entry = {.count = 1, .reusable = true}}, SHIFT(37), -  [69] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_node, 5), -  [71] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_node, 5), -  [73] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_node, 3), -  [75] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_node, 3), -  [77] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_node, 6), -  [79] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_node, 6), -  [81] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_node, 4), -  [83] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_node, 4), -  [85] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_text_repeat1, 2), -  [87] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_text_repeat1, 2), SHIFT_REPEAT(27), -  [90] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_text_repeat1, 2), SHIFT_REPEAT(23), -  [93] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_text, 1), +  [69] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_node, 5, 0, 0), +  [71] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_node, 5, 0, 0), +  [73] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_node, 3, 0, 0), +  [75] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_node, 3, 0, 0), +  [77] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_node, 6, 0, 0), +  [79] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_node, 6, 0, 0), +  [81] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_node, 4, 0, 0), +  [83] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_node, 4, 0, 0), +  [85] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_text_repeat1, 2, 0, 0), +  [87] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_text_repeat1, 2, 0, 0), SHIFT_REPEAT(27), +  [90] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_text_repeat1, 2, 0, 0), SHIFT_REPEAT(23), +  [93] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_text, 1, 0, 0),    [95] = {.entry = {.count = 1, .reusable = true}}, SHIFT(23), -  [97] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_attribute, 3), +  [97] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_attribute, 3, 0, 0),    [99] = {.entry = {.count = 1, .reusable = true}}, SHIFT(47),    [101] = {.entry = {.count = 1, .reusable = false}}, SHIFT(10), -  [103] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_node_body, 2), +  [103] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_node_body, 2, 0, 0),    [105] = {.entry = {.count = 1, .reusable = true}}, SHIFT(21),    [107] = {.entry = {.count = 1, .reusable = false}}, SHIFT(11),    [109] = {.entry = {.count = 1, .reusable = true}}, SHIFT(26), @@ -1108,11 +1039,15 @@ static const TSParseActionEntry ts_parse_actions[] = {  #ifdef __cplusplus  extern "C" {  #endif -#ifdef _WIN32 -#define extern __declspec(dllexport) +#ifdef TREE_SITTER_HIDE_SYMBOLS +#define TS_PUBLIC +#elif defined(_WIN32) +#define TS_PUBLIC __declspec(dllexport) +#else +#define TS_PUBLIC __attribute__((visibility("default")))  #endif -extern const TSLanguage *tree_sitter_gsp(void) { +TS_PUBLIC const TSLanguage *tree_sitter_gsp(void) {    static const TSLanguage language = {      .version = LANGUAGE_VERSION,      .symbol_count = SYMBOL_COUNT, diff --git a/src/tree_sitter/alloc.h b/src/tree_sitter/alloc.h new file mode 100644 index 0000000..1f4466d --- /dev/null +++ b/src/tree_sitter/alloc.h @@ -0,0 +1,54 @@ +#ifndef TREE_SITTER_ALLOC_H_ +#define TREE_SITTER_ALLOC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> + +// Allow clients to override allocation functions +#ifdef TREE_SITTER_REUSE_ALLOCATOR + +extern void *(*ts_current_malloc)(size_t); +extern void *(*ts_current_calloc)(size_t, size_t); +extern void *(*ts_current_realloc)(void *, size_t); +extern void (*ts_current_free)(void *); + +#ifndef ts_malloc +#define ts_malloc  ts_current_malloc +#endif +#ifndef ts_calloc +#define ts_calloc  ts_current_calloc +#endif +#ifndef ts_realloc +#define ts_realloc ts_current_realloc +#endif +#ifndef ts_free +#define ts_free    ts_current_free +#endif + +#else + +#ifndef ts_malloc +#define ts_malloc  malloc +#endif +#ifndef ts_calloc +#define ts_calloc  calloc +#endif +#ifndef ts_realloc +#define ts_realloc realloc +#endif +#ifndef ts_free +#define ts_free    free +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ALLOC_H_ diff --git a/src/tree_sitter/array.h b/src/tree_sitter/array.h new file mode 100644 index 0000000..15a3b23 --- /dev/null +++ b/src/tree_sitter/array.h @@ -0,0 +1,290 @@ +#ifndef TREE_SITTER_ARRAY_H_ +#define TREE_SITTER_ARRAY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "./alloc.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#ifdef _MSC_VER +#pragma warning(disable : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +#define Array(T)       \ +  struct {             \ +    T *contents;       \ +    uint32_t size;     \ +    uint32_t capacity; \ +  } + +/// Initialize an array. +#define array_init(self) \ +  ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) + +/// Create an empty array. +#define array_new() \ +  { NULL, 0, 0 } + +/// Get a pointer to the element at a given `index` in the array. +#define array_get(self, _index) \ +  (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) + +/// Get a pointer to the first element in the array. +#define array_front(self) array_get(self, 0) + +/// Get a pointer to the last element in the array. +#define array_back(self) array_get(self, (self)->size - 1) + +/// Clear the array, setting its size to zero. Note that this does not free any +/// memory allocated for the array's contents. +#define array_clear(self) ((self)->size = 0) + +/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is +/// less than the array's current capacity, this function has no effect. +#define array_reserve(self, new_capacity) \ +  _array__reserve((Array *)(self), array_elem_size(self), new_capacity) + +/// Free any memory allocated for this array. Note that this does not free any +/// memory allocated for the array's contents. +#define array_delete(self) _array__delete((Array *)(self)) + +/// Push a new `element` onto the end of the array. +#define array_push(self, element)                            \ +  (_array__grow((Array *)(self), 1, array_elem_size(self)), \ +   (self)->contents[(self)->size++] = (element)) + +/// Increase the array's size by `count` elements. +/// New elements are zero-initialized. +#define array_grow_by(self, count) \ +  do { \ +    if ((count) == 0) break; \ +    _array__grow((Array *)(self), count, array_elem_size(self)); \ +    memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ +    (self)->size += (count); \ +  } while (0) + +/// Append all elements from one array to the end of another. +#define array_push_all(self, other)                                       \ +  array_extend((self), (other)->size, (other)->contents) + +/// Append `count` elements to the end of the array, reading their values from the +/// `contents` pointer. +#define array_extend(self, count, contents)                    \ +  _array__splice(                                               \ +    (Array *)(self), array_elem_size(self), (self)->size, \ +    0, count,  contents                                        \ +  ) + +/// Remove `old_count` elements from the array starting at the given `index`. At +/// the same index, insert `new_count` new elements, reading their values from the +/// `new_contents` pointer. +#define array_splice(self, _index, old_count, new_count, new_contents)  \ +  _array__splice(                                                       \ +    (Array *)(self), array_elem_size(self), _index,                \ +    old_count, new_count, new_contents                                 \ +  ) + +/// Insert one `element` into the array at the given `index`. +#define array_insert(self, _index, element) \ +  _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) + +/// Remove one element from the array at the given `index`. +#define array_erase(self, _index) \ +  _array__erase((Array *)(self), array_elem_size(self), _index) + +/// Pop the last element off the array, returning the element by value. +#define array_pop(self) ((self)->contents[--(self)->size]) + +/// Assign the contents of one array to another, reallocating if necessary. +#define array_assign(self, other) \ +  _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) + +/// Swap one array with another +#define array_swap(self, other) \ +  _array__swap((Array *)(self), (Array *)(other)) + +/// Get the size of the array contents +#define array_elem_size(self) (sizeof *(self)->contents) + +/// Search a sorted array for a given `needle` value, using the given `compare` +/// callback to determine the order. +/// +/// If an existing element is found to be equal to `needle`, then the `index` +/// out-parameter is set to the existing value's index, and the `exists` +/// out-parameter is set to true. Otherwise, `index` is set to an index where +/// `needle` should be inserted in order to preserve the sorting, and `exists` +/// is set to false. +#define array_search_sorted_with(self, compare, needle, _index, _exists) \ +  _array__search_sorted(self, 0, compare, , needle, _index, _exists) + +/// Search a sorted array for a given `needle` value, using integer comparisons +/// of a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_with`. +#define array_search_sorted_by(self, field, needle, _index, _exists) \ +  _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) + +/// Insert a given `value` into a sorted array, using the given `compare` +/// callback to determine the order. +#define array_insert_sorted_with(self, compare, value) \ +  do { \ +    unsigned _index, _exists; \ +    array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ +    if (!_exists) array_insert(self, _index, value); \ +  } while (0) + +/// Insert a given `value` into a sorted array, using integer comparisons of +/// a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_by`. +#define array_insert_sorted_by(self, field, value) \ +  do { \ +    unsigned _index, _exists; \ +    array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ +    if (!_exists) array_insert(self, _index, value); \ +  } while (0) + +// Private + +typedef Array(void) Array; + +/// This is not what you're looking for, see `array_delete`. +static inline void _array__delete(Array *self) { +  if (self->contents) { +    ts_free(self->contents); +    self->contents = NULL; +    self->size = 0; +    self->capacity = 0; +  } +} + +/// This is not what you're looking for, see `array_erase`. +static inline void _array__erase(Array *self, size_t element_size, +                                uint32_t index) { +  assert(index < self->size); +  char *contents = (char *)self->contents; +  memmove(contents + index * element_size, contents + (index + 1) * element_size, +          (self->size - index - 1) * element_size); +  self->size--; +} + +/// This is not what you're looking for, see `array_reserve`. +static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { +  if (new_capacity > self->capacity) { +    if (self->contents) { +      self->contents = ts_realloc(self->contents, new_capacity * element_size); +    } else { +      self->contents = ts_malloc(new_capacity * element_size); +    } +    self->capacity = new_capacity; +  } +} + +/// This is not what you're looking for, see `array_assign`. +static inline void _array__assign(Array *self, const Array *other, size_t element_size) { +  _array__reserve(self, element_size, other->size); +  self->size = other->size; +  memcpy(self->contents, other->contents, self->size * element_size); +} + +/// This is not what you're looking for, see `array_swap`. +static inline void _array__swap(Array *self, Array *other) { +  Array swap = *other; +  *other = *self; +  *self = swap; +} + +/// This is not what you're looking for, see `array_push` or `array_grow_by`. +static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { +  uint32_t new_size = self->size + count; +  if (new_size > self->capacity) { +    uint32_t new_capacity = self->capacity * 2; +    if (new_capacity < 8) new_capacity = 8; +    if (new_capacity < new_size) new_capacity = new_size; +    _array__reserve(self, element_size, new_capacity); +  } +} + +/// This is not what you're looking for, see `array_splice`. +static inline void _array__splice(Array *self, size_t element_size, +                                 uint32_t index, uint32_t old_count, +                                 uint32_t new_count, const void *elements) { +  uint32_t new_size = self->size + new_count - old_count; +  uint32_t old_end = index + old_count; +  uint32_t new_end = index + new_count; +  assert(old_end <= self->size); + +  _array__reserve(self, element_size, new_size); + +  char *contents = (char *)self->contents; +  if (self->size > old_end) { +    memmove( +      contents + new_end * element_size, +      contents + old_end * element_size, +      (self->size - old_end) * element_size +    ); +  } +  if (new_count > 0) { +    if (elements) { +      memcpy( +        (contents + index * element_size), +        elements, +        new_count * element_size +      ); +    } else { +      memset( +        (contents + index * element_size), +        0, +        new_count * element_size +      ); +    } +  } +  self->size += new_count - old_count; +} + +/// A binary search routine, based on Rust's `std::slice::binary_search_by`. +/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. +#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ +  do { \ +    *(_index) = start; \ +    *(_exists) = false; \ +    uint32_t size = (self)->size - *(_index); \ +    if (size == 0) break; \ +    int comparison; \ +    while (size > 1) { \ +      uint32_t half_size = size / 2; \ +      uint32_t mid_index = *(_index) + half_size; \ +      comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ +      if (comparison <= 0) *(_index) = mid_index; \ +      size -= half_size; \ +    } \ +    comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ +    if (comparison == 0) *(_exists) = true; \ +    else if (comparison < 0) *(_index) += 1; \ +  } while (0) + +/// Helper macro for the `_sorted_by` routines below. This takes the left (existing) +/// parameter by reference in order to work with the generic sorting function above. +#define _compare_int(a, b) ((int)*(a) - (int)(b)) + +#ifdef _MSC_VER +#pragma warning(default : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif + +#ifdef __cplusplus +} +#endif + +#endif  // TREE_SITTER_ARRAY_H_ diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h index 2b14ac1..17f0e94 100644 --- a/src/tree_sitter/parser.h +++ b/src/tree_sitter/parser.h @@ -13,9 +13,8 @@ extern "C" {  #define ts_builtin_sym_end 0  #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 -typedef uint16_t TSStateId; -  #ifndef TREE_SITTER_API_H_ +typedef uint16_t TSStateId;  typedef uint16_t TSSymbol;  typedef uint16_t TSFieldId;  typedef struct TSLanguage TSLanguage; @@ -87,6 +86,11 @@ typedef union {    } entry;  } TSParseActionEntry; +typedef struct { +  int32_t start; +  int32_t end; +} TSCharacterRange; +  struct TSLanguage {    uint32_t version;    uint32_t symbol_count; @@ -126,13 +130,38 @@ struct TSLanguage {    const TSStateId *primary_state_ids;  }; +static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { +  uint32_t index = 0; +  uint32_t size = len - index; +  while (size > 1) { +    uint32_t half_size = size / 2; +    uint32_t mid_index = index + half_size; +    TSCharacterRange *range = &ranges[mid_index]; +    if (lookahead >= range->start && lookahead <= range->end) { +      return true; +    } else if (lookahead > range->end) { +      index = mid_index; +    } +    size -= half_size; +  } +  TSCharacterRange *range = &ranges[index]; +  return (lookahead >= range->start && lookahead <= range->end); +} +  /*   *  Lexer Macros   */ +#ifdef _MSC_VER +#define UNUSED __pragma(warning(suppress : 4101)) +#else +#define UNUSED __attribute__((unused)) +#endif +  #define START_LEXER()           \    bool result = false;          \    bool skip = false;            \ +  UNUSED                        \    bool eof = false;             \    int32_t lookahead;            \    goto start;                   \ @@ -148,6 +177,17 @@ struct TSLanguage {      goto next_state;         \    } +#define ADVANCE_MAP(...)                                              \ +  {                                                                   \ +    static const uint16_t map[] = { __VA_ARGS__ };                    \ +    for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) {  \ +      if (map[i] == lookahead) {                                      \ +        state = map[i + 1];                                           \ +        goto next_state;                                              \ +      }                                                               \ +    }                                                                 \ +  } +  #define SKIP(state_value) \    {                       \      skip = true;          \ @@ -166,7 +206,7 @@ struct TSLanguage {   *  Parse Table Macros   */ -#define SMALL_STATE(id) id - LARGE_STATE_COUNT +#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)  #define STATE(id) id @@ -176,7 +216,7 @@ struct TSLanguage {    {{                                  \      .shift = {                        \        .type = TSParseActionTypeShift, \ -      .state = state_value            \ +      .state = (state_value)          \      }                                 \    }} @@ -184,7 +224,7 @@ struct TSLanguage {    {{                                  \      .shift = {                        \        .type = TSParseActionTypeShift, \ -      .state = state_value,           \ +      .state = (state_value),         \        .repetition = true              \      }                                 \    }} @@ -197,14 +237,15 @@ struct TSLanguage {      }                                 \    }} -#define REDUCE(symbol_val, child_count_val, ...) \ -  {{                                             \ -    .reduce = {                                  \ -      .type = TSParseActionTypeReduce,           \ -      .symbol = symbol_val,                      \ -      .child_count = child_count_val,            \ -      __VA_ARGS__                                \ -    },                                           \ +#define REDUCE(symbol_name, children, precedence, prod_id) \ +  {{                                                       \ +    .reduce = {                                            \ +      .type = TSParseActionTypeReduce,                     \ +      .symbol = symbol_name,                               \ +      .child_count = children,                             \ +      .dynamic_precedence = precedence,                    \ +      .production_id = prod_id                             \ +    },                                                     \    }}  #define RECOVER()                    \  |