From 6a46f81f4acd9ca398487220cb5dd02c701fc138 Mon Sep 17 00:00:00 2001 From: Simon van Bernem Date: Mon, 10 Jun 2024 19:26:06 +0200 Subject: [PATCH 1/2] Added support for documentSymbolProvider and workspaceSymbolProvider --- server/lsp_interface.jai | 64 ++++++++-------- server/main.jai | 20 +++++ server/program.jai | 34 ++++++++- server/symbols.jai | 154 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 239 insertions(+), 33 deletions(-) create mode 100644 server/symbols.jai diff --git a/server/lsp_interface.jai b/server/lsp_interface.jai index d9b337d..feb8811 100644 --- a/server/lsp_interface.jai +++ b/server/lsp_interface.jai @@ -110,6 +110,20 @@ LSP_Request_Message_Document_Symbol :: struct { } } +LSP_Request_Message_Workspace_Symbol :: struct { + using request: LSP_Request_Message; + params: struct{ + query: string; + } +} + +LSP_Workspace_Symbol :: struct{ + name: string; + kind: LSP_Symbol_Kind; + //tags: Tag + location: LSP_Location; +} + LSP_Did_Open_Text_Document :: struct { using request: LSP_Request_Message; params: struct { @@ -402,36 +416,6 @@ LSP_Partial_Result_Params :: struct { // 📌 LSP_Document_Symbol :: struct { - - Kind :: enum s64 { - FILE :: 1; - MODULE :: 2; - NAMESPACE :: 3; - PACKAGE :: 4; - CLASS :: 5; - METHOD :: 6; - PROPERTY :: 7; - FIELD :: 8; - CONSTRUCTOR :: 9; - ENUM :: 10; - INTERFACE :: 11; - FUNCTION :: 12; - VARIABLE :: 13; - CONSTANT :: 14; - STRING :: 15; - NUMBER :: 16; - BOOLEAN :: 17; - ARRAY :: 18; - OBJECT :: 19; - KEY :: 20; - NULL :: 21; - ENUMMEMBER :: 22; - STRUCT :: 23; - EVENT :: 24; - OPERATOR :: 25; - TYPEPARAMETER :: 26; - } - name: string; kind: s64; range: LSP_Range; @@ -490,7 +474,8 @@ LSP_Request_Message_Initialize :: struct { LSP_Result_Initialize :: struct { capabilities: struct { // semanticTokensProvider := true; - // documentSymbolProvider := true; + documentSymbolProvider := true; + workspaceSymbolProvider := true; definitionProvider := true; // hoverProvider := true; @@ -765,6 +750,23 @@ lsp_respond :: (id: u32, data: $T) { }); } +lsp_respond_with_already_json_serialized_result_data :: (id: u32, data: string){ + Reply_With_Placeholder :: struct { + using response: LSP_Response_Message; + result := "T"; + } + + body_with_placeholder := json_write_string(Reply_With_Placeholder.{id = id}); + defer free(body_with_placeholder); + + found_separator, prefix, postfix := split_from_right(body_with_placeholder, "\"T\""); + + assert(found_separator); + final_body := tprint("%0%0%0", prefix, data, postfix); + + print("Content-Length: %\r\n\r\n%", final_body.count, final_body); +} + lsp_respond_with_error :: (id: u32, code: LSP_Error_Codes, message: string, data: $T) { reply: struct { using response: LSP_Response_Message; diff --git a/server/main.jai b/server/main.jai index a91c77f..ce24338 100644 --- a/server/main.jai +++ b/server/main.jai @@ -322,6 +322,25 @@ handle_request :: (request: LSP_Request_Message, raw_request: string) { } handle_hover(body); + case "textDocument/documentSymbol"; + //ZoneScoped("textDocument/documentSymbol"); + success, body := json_parse_string(raw_request, LSP_Request_Message_Document_Symbol); + if !success { + log_error("Unable to parse textDocument/documentSymbol message"); + return; + } + + handle_document_symbol(body); + + case "workspace/symbol"; + //ZoneScoped("workspace/symbol"); + success, body := json_parse_string(raw_request, LSP_Request_Message_Workspace_Symbol); + if !success { + log_error("Unable to parse workspace/symbol message"); + return; + } + + handle_workspace_symbol(body); } } @@ -410,6 +429,7 @@ array_reverse :: (array: []$T) { #load "completition.jai"; #load "goto.jai"; +#load "symbols.jai"; #load "signature_help.jai"; #load "hover.jai"; diff --git a/server/program.jai b/server/program.jai index 4b17dd0..fc17574 100644 --- a/server/program.jai +++ b/server/program.jai @@ -10,7 +10,9 @@ Program_File :: struct { nodes: [..]*Node; declarations: Table(string, *Declaration); resolved_identifiers: Table(*Node, []*Declaration); - linked_files: []Linked_File; // @TODO: this is probably stupid name for this! + declaration_hierarchy: Table(*Declaration, [..]*Declaration); + serialized_workspace_symbols: Table(string, string); + linked_files : [] Linked_File; // @TODO: this is probably stupid name for this! } Linked_File :: struct { @@ -21,15 +23,21 @@ Linked_File :: struct { init_file :: (using file: Program_File) { init(*declarations); init(*resolved_identifiers); + init(*declaration_hierarchy); + init(*serialized_workspace_symbols); } deinit_file :: (using file: Program_File) { deinit(*declarations); + deinit(*declaration_hierarchy); + deinit(*serialized_workspace_symbols); array_free(nodes); } reset_file :: (using file: Program_File) { table_reset(*declarations); + table_reset(*declaration_hierarchy); + table_reset(*serialized_workspace_symbols); array_reset(*nodes); array_reset(*loads); array_reset(*imports); @@ -115,12 +123,15 @@ parse_file :: (path: string, force := false) { } analyze_files :: () { + //ZoneScoped(); for file: server.files_to_be_analyzed { file.linked_files = get_file_links(file); } for file: server.files_to_be_analyzed { resolve_identifiers(file); + build_declaration_hierarchy(file); + generate_and_serialize_workspace_symbols_for_file(file); } array_reset(*server.files_to_be_analyzed); @@ -257,6 +268,23 @@ resolve_identifiers :: (file: *Program_File) { } } +build_declaration_hierarchy :: (file: *Program_File) { + //ZoneScoped(); + for file.declarations{ + parent_declaration: *Declaration; + + if it.parent + parent_declaration = cast(*Declaration) get_node_nearest_parent(it.parent, kind => kind == .DECLARATION); + + children := find_or_add(*file.declaration_hierarchy, parent_declaration); + + array_add(children, it); + } + + for file.declaration_hierarchy + quick_sort(it, (a, b) => ifx a.location.l0 != b.location.l0 then a.location.l0 - b.location.l0 else a.location.c0 - b.location.c0); +} + get_node_by_location :: (file: *Program_File, location: Node.Location, filter: Node.Kind = .UNINITIALIZATED) -> *Node { nearest_node: *Node; @@ -918,4 +946,6 @@ run_diagnostics :: () { } server.previously_errored_file = file; -} \ No newline at end of file +} + +#import "Sort"; \ No newline at end of file diff --git a/server/symbols.jai b/server/symbols.jai new file mode 100644 index 0000000..f90604b --- /dev/null +++ b/server/symbols.jai @@ -0,0 +1,154 @@ +expression_to_symbol_kind :: (expression: *Node) -> LSP_Symbol_Kind{ + if expression{ + if expression.kind == { + case .STRUCT; return .STRUCT; + case .ENUM; return .ENUM; + case .PROCEDURE; return .FUNCTION; + case .OPERATOR_OVERLOAD; return .OPERATOR; + case .QUICK_LAMBDA; return .FUNCTION; + case .POLYMORPHIC_CONSTANT; return .TYPEPARAMETER; + case .DIRECTIVE_IMPORT; return .MODULE; + case .DIRECTIVE_BAKE_ARGUMENTS; return .FUNCTION; + case .DIRECTIVE_LIBRARY; return .PACKAGE; + case .UNION; return .STRUCT; + + // with all of the following, I don't really know how to get enough information about them to confidently determine what they are. A type directive could declare a function type, or a struct type for example. A cast could literally yield anything. I'm sure you could look at the AST here in more detail, but I don't know my way well enough around this codebase to do that. So, as a kind-of-hacky-thing, I just return VARIABLE for all of them. The most important thing is that they show imo, not that the kind matches exactly (I think the kind is mostly for display anyway, not really that functional.) + + case .DIRECTIVE_TYPE; #through; + case .CAST; #through; + case .PROCEDURE_CALL; #through; + case .IDENTIFIER; #through; + case .BINARY_OPERATION; #through; + case .LITERAL; + return .VARIABLE; + case; + //log("Unhandled expression kind % at %:%:%.", expression.kind, expression.location.file, expression.location.l0 + 1, expression.location.c0 + 1); + return 0; + + //There are other shortcomings here: I would like to return things like FIELD for struct-members, CONSTANT for constants, also use FILE vs just MODULE or PACKAGE, but for all of these I am not confident what the AST would look like. So I opted to not pretend I can return the maximum detail here and kept it simple instead. + } + } + else{ + return .VARIABLE; + } +} + +handle_document_symbol :: (request: LSP_Request_Message_Document_Symbol) { + //ZoneScoped(); + file_path := normalize_path(request.params.textDocument.uri); + + file := get_file(file_path); + if !file { + log_error("File does not exists or has not been parser yet! (%)", file_path); + lsp_respond(request.id, null); + return; + } + + create_lsp_document_from_declaration :: (file: *Program_File, using declaration: *Declaration) -> LSP_Document_Symbol{ + //ZoneScoped(); + child_declarations, has_children := table_find(*file.declaration_hierarchy, declaration); + child_lsp_document_symbols: [..]LSP_Document_Symbol; + + if has_children{ + for child_declarations{ + symbol := create_lsp_document_from_declaration(file, it); + array_add(*child_lsp_document_symbols, symbol); + } + } + + result: LSP_Document_Symbol; + + result.name = name; + result.kind = xx expression_to_symbol_kind(expression); + result.range = .{.{xx location.l0, xx location.c0}, .{xx location.l1, xx location.c1}}; + result.selectionRange = result.range; + result.children = child_lsp_document_symbols; + + return result; + } + + if file.declaration_hierarchy.count > 0{ + lsp_symbols: [..]LSP_Document_Symbol; + + for table_find_pointer(*file.declaration_hierarchy, null).*{ + symbol := create_lsp_document_from_declaration(file, it); + array_add(*lsp_symbols, symbol); + } + + lsp_respond(request.id, lsp_symbols); + } +} + +generate_and_serialize_workspace_symbols_for_file :: (file: *Program_File){ + for file.serialized_workspace_symbols + free(it); + + table_reset(*file.serialized_workspace_symbols); + + for declaration: file.declarations{ + symbol: LSP_Workspace_Symbol; + symbol.name = declaration.name; + symbol.location = node_location_to_lsp_location(declaration.location); + symbol.kind = expression_to_symbol_kind(declaration.expression); + + //turns out, if we include all variable declarations (including all parameters and local variables of all functions), then I get like 80,000 symbols in the workspace of my game, and then vscode takes half a second just to process the result. We send it very quickly, because we cache the serialized json, but vscode can't keep up. So I skip it here. + + if symbol.kind == .VARIABLE + continue; + + serialized_symbol := json_write_string(symbol); + + table_add(*file.serialized_workspace_symbols, declaration.name, serialized_symbol); + } +} + +MAX_NUMBER_OF_WORKSPACE_SYMBOLS_TO_RETURN_TO_PREVENT_VSCODE_FROM_BEING_SUPER_SLOW :: 1000; + +handle_workspace_symbol :: (request: LSP_Request_Message_Workspace_Symbol) { + //ZoneScoped(); + query_parts := split(request.params.query, cast(u8) #char " "); + + for query_parts + if it.count == 0 + remove it; + + builder: String_Builder; + append(*builder, "["); + + symbol_count := 0; + + for file: server.files{ + for serialized_symbol, symbol_name: file.serialized_workspace_symbols{ + if symbol_count == MAX_NUMBER_OF_WORKSPACE_SYMBOLS_TO_RETURN_TO_PREVENT_VSCODE_FROM_BEING_SUPER_SLOW + break file; + + //the recommendation from visual studio code is that when searching for tokens, the LSP should be very lenient as to what it filters, so that vscode can then be flexible about ordering and interpreting your search terms. They explicitely say you shouldn't check if the query is a substring, because then you can't have stuff like having the query "abc" match "arrayBoundsCheck", which is apparently something that people want to do. I personally hate that for the query "read vessel", vscode doesn't treat "read" and "vessel" as two different search terms, and so won't show you "readVessel", and also that for "readvessel", "readVessel" would match, but not "vesselRead". Basically, I ideally want to type multiple search terms, and if they are in the name, I want that to match (like in sublime text). Vscode doesn't do that. Whatever. + // What I decided to do here is the following: I'm interpreting each space-separated element of the query as its own search term. (vscode doesn't even seem to transmit stuff after the space to the LSP, so if that is a rule and not a bug (which I couldn't find out) this part isn't even relevant). For each search term, we follow the recommandation and consider it matched, if the characters of the search term appear in the symbol name in order, case insensitive. + + remaining_name_to_search := symbol_name; + + for part: query_parts { + for 0..request.params.query.count - 1 { + search_character_as_string := slice(request.params.query, it, 1); + index := index_of_string_nocase(remaining_name_to_search, search_character_as_string); + + if index == -1 + continue serialized_symbol; + else + remaining_name_to_search = slice(remaining_name_to_search, index + 1, remaining_name_to_search.count - index - 1); + } + } + + if symbol_count > 0 + append(*builder, ","); + + append(*builder, serialized_symbol); + symbol_count += 1; + } + } + + append(*builder, "]"); + json_serialized_symbol_list := builder_to_string(*builder); + lsp_respond_with_already_json_serialized_result_data(request.id, json_serialized_symbol_list); + free(json_serialized_symbol_list); +} From d55e658932339cad67112f2ba5e6a37cf223291e Mon Sep 17 00:00:00 2001 From: Simon van Bernem Date: Sat, 13 Jul 2024 10:32:47 +0200 Subject: [PATCH 2/2] Added a somewhat hacky hotfix for the problem that get_identifier_decl fails to return declarations for many global function definitions. --- server/program.jai | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/server/program.jai b/server/program.jai index fc17574..c024aa0 100644 --- a/server/program.jai +++ b/server/program.jai @@ -644,7 +644,18 @@ get_identifier_decl :: (__file: *Program_File, ident: *Identifier) -> []*Declara return decls; } - return .[]; + //the loop above fails to find identifier declarations for many common symbols. As a hacky fix, we fall back to searching for declarations that match the name of the identifier. This may lead to false positives obviously, but I do not know if this is any worse than what the loop above does. And since we only do this when the loop above has not been successfull in finding something, I would argue that getting a 90% correct answer is better than no answer. + + declarations: [..]*Declaration; + + for file: server.files { + file_decl, found := table_find(*file.declarations, ident.name); + + if found + array_add(*declarations, file_decl); + } + + return declarations; } // @TODO: Maybe refactor this to be more useful?