Skip to content

Commit 576bab2

Browse files
committedAug 26, 2023
refactor: migrate syntax kind to codegen
1 parent 6825150 commit 576bab2

15 files changed

+174
-1531
lines changed
 

‎.gitmodules

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "libpg_query"]
2+
path = libpg_query
3+
url = git@github.com:pganalyze/libpg_query.git

‎Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ members = [
55

66
[workspace.dependencies]
77
parser = { path = "./crates/parser", version = "0.0.0" }
8+
codegen = { path = "./crates/codegen", version = "0.0.0" }
89
sourcegen = { path = "./crates/sourcegen", version = "0.0.0" }
910
pg_query_proto_parser = { path = "./crates/pg_query_proto_parser", version = "0.0.0" }
1011
triomphe = { version = "0.1.8", default-features = false, features = ["std"] }

‎crates/codegen/Cargo.toml

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[package]
2+
name = "codegen"
3+
version = "0.0.0"
4+
edition = "2021"
5+
6+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7+
8+
[dependencies]
9+
proc-macro2 = "1.0.66"
10+
quote = "1.0.33"
11+
pg_query_proto_parser.workspace = true
12+
13+
[lib]
14+
proc-macro = true

‎crates/codegen/src/lib.rs

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
mod syntax_kind;
2+
3+
use syntax_kind::syntax_kind_mod;
4+
5+
#[proc_macro]
6+
pub fn syntax_kind(item: proc_macro::TokenStream) -> proc_macro::TokenStream {
7+
syntax_kind_mod(item.into()).into()
8+
}

‎crates/codegen/src/syntax_kind.rs

+136
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
use std::collections::HashSet;
2+
3+
use pg_query_proto_parser::{Node, ProtoParser, Token};
4+
use proc_macro2::{Ident, Literal};
5+
use quote::{format_ident, quote};
6+
7+
pub fn syntax_kind_mod(_item: proc_macro2::TokenStream) -> proc_macro2::TokenStream {
8+
// let parser = ProtoParser::new(
9+
// "/Users/raminder.singh/src/rust/postgres_lsp/crates/parser/proto/source.proto",
10+
// );
11+
let parser = ProtoParser::new("./crates/parser/proto/source.proto");
12+
let proto_file = parser.parse();
13+
14+
let mut current_enum_names: HashSet<&str> = HashSet::new();
15+
16+
let custom_node_names = custom_node_names();
17+
let custom_node_identifiers = custom_node_identifiers(&custom_node_names);
18+
current_enum_names.extend(&custom_node_names);
19+
20+
let node_identifiers = node_identifiers(&proto_file.nodes, &current_enum_names);
21+
current_enum_names.extend(node_names(&proto_file.nodes));
22+
23+
let token_identifiers = token_identifiers(&proto_file.tokens, &current_enum_names);
24+
let token_value_literals = token_value_literals(&proto_file.tokens, &current_enum_names);
25+
26+
let syntax_kind_impl =
27+
syntax_kind_impl(&node_identifiers, &token_identifiers, &token_value_literals);
28+
29+
quote! {
30+
use cstree::Syntax;
31+
use pg_query::{protobuf::ScanToken, NodeEnum, NodeRef};
32+
33+
/// An u32 enum of all valid syntax elements (nodes and tokens) of the postgres
34+
/// sql dialect, and a few custom ones that are not parsed by pg_query.rs, such
35+
/// as `Whitespace`.
36+
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Syntax)]
37+
#[repr(u32)]
38+
pub enum SyntaxKind {
39+
// custom nodes, which are not parsed by pg_query.rs
40+
#(#custom_node_identifiers),*,
41+
#(#node_identifiers),*,
42+
#(#token_identifiers),*,
43+
}
44+
45+
#syntax_kind_impl
46+
}
47+
}
48+
49+
fn custom_node_names() -> Vec<&'static str> {
50+
vec![
51+
"SourceFile",
52+
"Comment",
53+
"Whitespace",
54+
"Newline",
55+
"Tab",
56+
"Stmt",
57+
]
58+
}
59+
60+
fn node_names(nodes: &[Node]) -> impl Iterator<Item = &str> {
61+
nodes.iter().map(|node| node.name.as_str())
62+
}
63+
64+
fn custom_node_identifiers(custom_node_names: &[&str]) -> Vec<Ident> {
65+
custom_node_names
66+
.iter()
67+
.map(|&node_name| format_ident!("{}", node_name))
68+
.collect()
69+
}
70+
71+
fn node_identifiers(nodes: &[Node], existing_enum_names: &HashSet<&str>) -> Vec<Ident> {
72+
nodes
73+
.iter()
74+
.filter(|&token| !existing_enum_names.contains(token.name.as_str()))
75+
.map(|node| format_ident!("{}", &node.name))
76+
.collect()
77+
}
78+
79+
fn token_identifiers(tokens: &[Token], existing_enum_names: &HashSet<&str>) -> Vec<Ident> {
80+
tokens
81+
.iter()
82+
.filter(|&token| !existing_enum_names.contains(token.name.as_str()))
83+
.map(|token| format_ident!("{}", &token.name))
84+
.collect()
85+
}
86+
87+
fn token_value_literals(tokens: &[Token], existing_enum_names: &HashSet<&str>) -> Vec<Literal> {
88+
tokens
89+
.iter()
90+
.filter(|&token| !existing_enum_names.contains(token.name.as_str()))
91+
.map(|token| Literal::i32_unsuffixed(token.value))
92+
.collect()
93+
}
94+
95+
fn syntax_kind_impl(
96+
node_identifiers: &[Ident],
97+
token_identifiers: &[Ident],
98+
token_value_literals: &[Literal],
99+
) -> proc_macro2::TokenStream {
100+
let new_from_pg_query_node_fn = new_from_pg_query_node_fn(node_identifiers);
101+
let new_from_pg_query_token_fn =
102+
new_from_pg_query_token_fn(token_identifiers, token_value_literals);
103+
quote! {
104+
impl SyntaxKind {
105+
#new_from_pg_query_node_fn
106+
107+
#new_from_pg_query_token_fn
108+
}
109+
}
110+
}
111+
112+
fn new_from_pg_query_node_fn(node_identifiers: &[Ident]) -> proc_macro2::TokenStream {
113+
quote! {
114+
/// Converts a `pg_query` node to a `SyntaxKind`
115+
pub fn new_from_pg_query_node(node: &NodeEnum) -> Self {
116+
match node {
117+
#(NodeEnum::#node_identifiers(_) => SyntaxKind::#node_identifiers),*
118+
}
119+
}
120+
}
121+
}
122+
123+
fn new_from_pg_query_token_fn(
124+
token_identifiers: &[Ident],
125+
token_value_literals: &[Literal],
126+
) -> proc_macro2::TokenStream {
127+
quote! {
128+
/// Converts a `pg_query` token to a `SyntaxKind`
129+
pub fn new_from_pg_query_token(token: &ScanToken) -> Self {
130+
match token.token {
131+
#(#token_value_literals => SyntaxKind::#token_identifiers),*,
132+
_ => panic!("Unknown token"),
133+
}
134+
}
135+
}
136+
}

‎crates/parser/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ serde = { version = "1.0", features = ["derive"] }
1515
env_logger = { version = "0.9.1" }
1616
log = { version = "0.4.20" }
1717

18-
sourcegen.workspace = true
18+
codegen.workspace = true
1919
pg_query_proto_parser.workspace = true

‎crates/parser/src/lib.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ mod sibling_token;
2424
mod source_file;
2525
mod statement;
2626
mod syntax_error;
27-
mod syntax_kind_generated;
27+
mod syntax_kind_codegen;
2828
mod syntax_node;
2929
mod token_type;
3030

3131
pub use crate::parser::{Parse, Parser};
32-
pub use crate::syntax_kind_generated::SyntaxKind;
32+
pub use crate::syntax_kind_codegen::SyntaxKind;
3333
pub use crate::syntax_node::{SyntaxElement, SyntaxNode, SyntaxToken};

‎crates/parser/src/parser.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use pg_query::NodeEnum;
66

77
use crate::ast_node::RawStmt;
88
use crate::syntax_error::SyntaxError;
9-
use crate::syntax_kind_generated::SyntaxKind;
9+
use crate::syntax_kind_codegen::SyntaxKind;
1010
use crate::syntax_node::SyntaxNode;
1111
use crate::token_type::TokenType;
1212

‎crates/parser/src/sibling_token.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::SyntaxKind;
1+
use crate::syntax_kind_codegen::SyntaxKind;
22

33
impl SyntaxKind {
44
pub fn is_opening_sibling(&self) -> bool {

‎crates/parser/src/source_file.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use logos::Logos;
22

3-
use crate::{parser::Parser, syntax_kind_generated::SyntaxKind};
3+
use crate::{parser::Parser, syntax_kind_codegen::SyntaxKind};
44

55
/// A super simple lexer for sql files that splits the input into indivudual statements and
66
/// comments.

‎crates/parser/src/statement.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use logos::{Logos, Span};
44
use crate::{
55
parser::Parser,
66
pg_query_utils_generated::get_children,
7-
syntax_kind_generated::SyntaxKind,
7+
syntax_kind_codegen::SyntaxKind,
88
token_type::{
99
get_token_type_from_pg_query_token, get_token_type_from_statement_token, TokenType,
1010
},
+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
use codegen::syntax_kind;
2+
3+
syntax_kind!();

0 commit comments

Comments
 (0)