From 9c1fb190b363980dd99191e4151d4f9caf252133 Mon Sep 17 00:00:00 2001 From: guineveresaenger Date: Tue, 13 Aug 2024 12:22:43 +0200 Subject: [PATCH 1/6] Can discover any nestedness from list items, but only for a single docs path parent --- go.mod | 7 +- go.sum | 2 + pkg/tfgen/docs.go | 325 +++++++++++++++++++++++++++++------------ pkg/tfgen/docs_test.go | 16 +- 4 files changed, 251 insertions(+), 99 deletions(-) diff --git a/go.mod b/go.mod index c0259b1ec..ba7d2f144 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/pulumi/pulumi-terraform-bridge/v3 -go 1.21.12 +go 1.22 + +toolchain go1.22.3 replace github.com/pulumi/pulumi-terraform-bridge/x/muxer => ./x/muxer @@ -43,6 +45,7 @@ require ( github.com/pulumi/schema-tools v0.1.2 github.com/pulumi/terraform-diff-reader v0.0.2 github.com/russross/blackfriday/v2 v2.1.0 + github.com/ryboe/q v1.0.21 github.com/spf13/afero v1.9.5 github.com/spf13/cobra v1.8.0 github.com/stretchr/testify v1.9.0 @@ -89,6 +92,8 @@ require ( github.com/google/s2a-go v0.1.7 // indirect github.com/hexops/gotextdiff v1.0.3 // indirect github.com/hexops/valast v1.4.4 // indirect + github.com/kr/pretty v0.3.1 // indirect + github.com/kr/text v0.2.0 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/mattn/go-localereader v0.0.1 // indirect diff --git a/go.sum b/go.sum index 3c16dd234..917718953 100644 --- a/go.sum +++ b/go.sum @@ -1941,6 +1941,8 @@ github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfF github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk= github.com/ryanuber/go-glob v1.0.0 h1:iQh3xXAumdQ+4Ufa5b25cRpC5TYKlno6hsv6Cb3pkBk= github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc= +github.com/ryboe/q v1.0.21 h1:sbLQFC3eMDEs8q3g4otbrWExLTHEyKISCFuN7Akropc= +github.com/ryboe/q v1.0.21/go.mod h1:g4aI/DhdScxW3WLRGxHXgg5ZkGrk+b4h1h5u2PTCvDc= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= github.com/santhosh-tekuri/jsonschema/v5 v5.0.0 h1:TToq11gyfNlrMFZiYujSekIsPd9AmsA2Bj/iv+s4JHE= diff --git a/pkg/tfgen/docs.go b/pkg/tfgen/docs.go index a46680e92..bb62174e9 100644 --- a/pkg/tfgen/docs.go +++ b/pkg/tfgen/docs.go @@ -35,6 +35,7 @@ import ( "github.com/pulumi/pulumi/sdk/v3/go/common/util/cmdutil" "github.com/pulumi/pulumi/sdk/v3/go/common/util/contract" bf "github.com/russross/blackfriday/v2" + "github.com/ryboe/q" "github.com/spf13/afero" "github.com/yuin/goldmark" gmast "github.com/yuin/goldmark/ast" @@ -360,6 +361,10 @@ var ( "^\\s*[*+-]\\s*`([a-z0-9_]*)`\\s*(\\([a-zA-Z]*\\)\\s*)?\\s*[:–-]?\\s*(\\([^\\)]*\\)[-\\s]*)?(.*)", ) + descriptionRegexp = regexp.MustCompile( + "\\s*`([a-z0-9_]*)`\\s*(\\([a-zA-Z]*\\)\\s*)?\\s*[:–-]?\\s*(\\([^\\)]*\\)[-\\s]*)?(.*)", + ) + bulletPointRegexStr = "^\\s*[*+-]" // matches any bullet point-like character attributePathNameRegexStr = "\\s*`([a-z0-9._]*)`" // matches any TF attribute path name @@ -385,7 +390,6 @@ func trimFrontMatter(text []byte) []byte { } return body[idx+3:] } - func splitByMarkdownHeaders(text string, level int) [][]string { // splitByMarkdownHeaders parses text, then walks the resulting AST to find // appropriate header nodes. It uses the location of these header nodes to split @@ -397,7 +401,6 @@ func splitByMarkdownHeaders(text string, level int) [][]string { contract.Assertf(offset >= 0, "The offset generated by chopping of the front-matter cannot be negative") gm := goldmark.New(goldmark.WithExtensions(parse.TFRegistryExtension)) - headers := []int{} parse.WalkNode(gm.Parser().Parse(gmtext.NewReader(bytes)), func(heading *gmast.Heading) { if heading.Level != level { @@ -871,6 +874,7 @@ func trackBulletListIndentation(line, name string, tracker []bulletListEntry) [] // description. It returns a struct containing the name and description of the arg, and whether an arg was found. func parseArgFromMarkdownLine(line string) markdownLineInfo { matches := argumentBulletRegexp.FindStringSubmatch(line) + q.Q(matches) var parsed markdownLineInfo if len(matches) > 4 { parsed.name = matches[1] @@ -1021,107 +1025,248 @@ func getNestedBlockNames(line string) []string { return nestedBlockNames } +// TODO: transform subsection into a markdown node -join on newline & parse func parseArgReferenceSection(subsection []string, ret *entityDocs) { - // Variable to remember the last argument we found. - var lastMatch string - // Collection to hold all arguments that headline a nested description. - var nesteds []docsPath - - addNewHeading := func(name, desc, line string) { - // found a property bullet, extract the name and description - if len(nesteds) > 0 { - for _, nested := range nesteds { - // We found this line within a nested field. We should record it as such. - if ret.Arguments[nested] == nil { - totalArgumentsFromDocs++ + // Get a "document" + documentBefore := strings.Join(subsection, "\n") + // save a copy for comparison + docBytes := []byte(documentBefore) + q.Q(documentBefore) + + // Algorithm: + // - Walk the document tree and gather any text that is after a property bullet. + // - Track the level (nestedness?) of said bullet/ indentation/blah and add to current docs path + // - Let's start by finding the bullet points. + // Actually, start by creating a new transformer. No, I may not need a transformer. Ung, why is this so annoying + // Just use Walk? + + // Parse the document using Goldmark parser + gm := goldmark.New(goldmark.WithExtensions(parse.TFRegistryExtension)) + + astNode := gm.Parser().Parse(gmtext.NewReader(docBytes)) + var astNesteds []string + var docspath string + //var isFromHeading bool + gmast.Walk(astNode, func(node gmast.Node, enter bool) (gmast.WalkStatus, error) { + // here goes the detection logic + if node.Kind().String() == "ListItem" { + // track nestedness + if enter { + // the list item has a text node, which may have a code span node. + // For any list item, we want to check if it opens with a code span. + // If so, chances are we have a potential entry for our nested docs map. + // It will be list item --> Text --> Code Span, so the grandchild of the list item. + // TODO: still must filter for actual TF entries, AKA this can't catch all lists, to avoid regression + + nodeKind := node.FirstChild().FirstChild().Kind().String() + if nodeKind == "CodeSpan" { + codeSpanNode := node.FirstChild().FirstChild() + bulletListItem := codeSpanNode.Text(docBytes) + // TODO: docspath must be a map because people overload the titles with multiple fields + if docspath == "" { + //q.Q("HERE") + docspath = string(bulletListItem) + } else { + docspath = docspath + "." + string(bulletListItem) + } } - ret.Arguments[nested.join(name)] = &argumentDocs{desc} - } + // TODO: now get the description, somehow. Idon't know which node this is, ugh + beschreibung := node.FirstChild().Text(docBytes) + q.Q(string(beschreibung)) + + lines := node.FirstChild().Lines() + q.Q(lines) + var desc bytes.Buffer + for i := 0; i < lines.Len(); i++ { + line := lines.At(i) + desc.Write(line.Value(docBytes)) + } + // TODO: this does give me the whole bullet point, which is great. Need to clean it up a bit... + + q.Q(desc.String()) + // Extract the description from the line item. We will use a regex match here. + descs := descriptionRegexp.FindStringSubmatch(desc.String()) + q.Q(descs[4]) + + // Now... with our new info - can we read out the return info? + ret.Arguments[docsPath(docspath)] = &argumentDocs{descs[4]} + + } else { + // we are hitting this node on our way back. Append to nested docs paths. + astNesteds = append(astNesteds, docspath) + // Cut off last field in docsPath to remove nestedness + docspathIndex := strings.LastIndex( + docspath, ".") + if docspathIndex > 0 { + docspath = docspath[:docspathIndex] + } else { + docspath = "" + } + q.Q(astNesteds) + q.Q(docspath) - } else { - if genericNestedRegexp.MatchString(line) { - return - } - ret.Arguments[docsPath(name)] = &argumentDocs{description: desc} - totalArgumentsFromDocs++ - } - } - // This function adds the current line as a description to the last matched resource, - //in cases where there's no resource match found on this line. - //It represents a multi-line description for a field. - extendExistingHeading := func(line string) { - if len(nesteds) > 0 { - for _, nested := range nesteds { - line = "\n" + strings.TrimSpace(line) - ret.Arguments[nested.join(lastMatch)].description += line - } - } else { - if genericNestedRegexp.MatchString(line) { - lastMatch = "" - nesteds = []docsPath{} - return } - line = "\n" + strings.TrimSpace(line) - ret.Arguments[docsPath(lastMatch)].description += line } - } + if node.Kind().String() == "Section" { + // A section's first child is its heading. + // TODO: assert that it is of type Heading. + // track nestedness + if enter { + // The text next to an arg reference's section header is assumed to be a resource field. + headerItem := node.FirstChild().Text(docBytes) + // add to docspath + if docspath == "" { + docspath = string(headerItem) + } else { + docspath = docspath + "." + string(headerItem) + } + //isFromHeading = true - // hadSpace tells us if the previous line was blank. - var hadSpace bool + } else { + // We are on our way back up the tree. + // Because the headers are subsections of already found docs, we never want to actually include the + // header item in our astNested list by itself - it should already exist with its own description. - // bulletListTracker is a stack-like collection that tracks the level of nesting for a bulleted list with - // nested lists. The name of the topmost entry represents the nested docs path for the current line. - var bulletListTracker []bulletListEntry + // Cut off last field in docsPath to remove nestedness - for _, line := range subsection { - parsedArg := parseArgFromMarkdownLine(line) - matchFound := parsedArg.isFound - if matchFound { // We have found a new property bullet point. - desc := parsedArg.desc - bulletListTracker = trackBulletListIndentation(line, parsedArg.name, bulletListTracker) - name := bulletListTracker[len(bulletListTracker)-1].name - lastMatch = name - addNewHeading(name, desc, line) - - } else if strings.TrimSpace(line) == "---" { - // --- is a markdown section break. This probably indicates the - // section is over, but we take it to mean that the current - // heading is over. - lastMatch = "" - bulletListTracker = nil - } else if nestedBlockCurrentLine := getNestedBlockNames(line); hadSpace && len(nestedBlockCurrentLine) > 0 { - // This tells us if there's a resource that is about to have subfields (nesteds) - // in subsequent lines. - //empty nesteds - nesteds = []docsPath{} - for _, item := range nestedBlockCurrentLine { - nesteds = append(nesteds, docsPath(item)) - } - lastMatch = "" - bulletListTracker = nil - } else if !isBlank(line) && lastMatch != "" { - // This appends the current line to the previous match's description. - extendExistingHeading(line) - - } else if nestedBlockCurrentLine := getNestedBlockNames(line); len(nestedBlockCurrentLine) > 0 { - // This tells us if there's a resource that is about to have subfields (nesteds) - // in subsequent lines. - //empty nesteds - nesteds = []docsPath{} - for _, item := range nestedBlockCurrentLine { - nesteds = append(nesteds, docsPath(item)) + docspathIndex := strings.LastIndex( + docspath, ".") + if docspathIndex > 0 { + docspath = docspath[:docspathIndex] + } else { + docspath = "" + } + + q.Q("from heading:", astNesteds) + q.Q("from heading:", docspath) } - lastMatch = "" - bulletListTracker = nil - } else if lastMatch != "" { - extendExistingHeading(line) } - hadSpace = isBlank(line) + //case *gmast.Heading: + // // this is where we want to look at the header title + // headerText := child.Text(docBytes) + // q.Q(string(headerText)) + // // TODO: find the resource in the header text + // case section.Section: + // + // + //default: + // //q.Q("defulat") + //} + return gmast.WalkContinue, nil + }) + + var buf bytes.Buffer + // Convert parses the source, applies transformers, and renders output to buf + err := gm.Convert(docBytes, &buf) + if err != nil { + panic(err) } + if documentBefore != string(docBytes) { - for _, v := range ret.Arguments { - v.description = strings.TrimRightFunc(v.description, unicode.IsSpace) + q.Q(string(docBytes)) + panic("Not equal yet") } + + //// Variable to remember the last argument we found. + //var lastMatch string + //// Collection to hold all arguments that headline a nested description. + //var nesteds []docsPath + // + //addNewHeading := func(name, desc, line string) { + // // found a property bullet, extract the name and description + // if len(nesteds) > 0 { + // for _, nested := range nesteds { + // // We found this line within a nested field. We should record it as such. + // if ret.Arguments[nested] == nil { + // totalArgumentsFromDocs++ + // } + // ret.Arguments[nested.join(name)] = &argumentDocs{desc} + // } + // + // } else { + // if genericNestedRegexp.MatchString(line) { + // return + // } + // ret.Arguments[docsPath(name)] = &argumentDocs{description: desc} + // totalArgumentsFromDocs++ + // } + //} + //// This function adds the current line as a description to the last matched resource, + ////in cases where there's no resource match found on this line. + ////It represents a multi-line description for a field. + //extendExistingHeading := func(line string) { + // if len(nesteds) > 0 { + // for _, nested := range nesteds { + // line = "\n" + strings.TrimSpace(line) + // ret.Arguments[nested.join(lastMatch)].description += line + // } + // } else { + // if genericNestedRegexp.MatchString(line) { + // lastMatch = "" + // nesteds = []docsPath{} + // return + // } + // line = "\n" + strings.TrimSpace(line) + // ret.Arguments[docsPath(lastMatch)].description += line + // } + //} + // + //// hadSpace tells us if the previous line was blank. + //var hadSpace bool + // + //// bulletListTracker is a stack-like collection that tracks the level of nesting for a bulleted list with + //// nested lists. The name of the topmost entry represents the nested docs path for the current line. + //var bulletListTracker []bulletListEntry + // + //for _, line := range subsection { + // parsedArg := parseArgFromMarkdownLine(line) + // matchFound := parsedArg.isFound + // if matchFound { // We have found a new property bullet point. + // desc := parsedArg.desc + // bulletListTracker = trackBulletListIndentation(line, parsedArg.name, bulletListTracker) + // name := bulletListTracker[len(bulletListTracker)-1].name + // lastMatch = name + // addNewHeading(name, desc, line) + // + // } else if strings.TrimSpace(line) == "---" { + // // --- is a markdown section break. This probably indicates the + // // section is over, but we take it to mean that the current + // // heading is over. + // lastMatch = "" + // bulletListTracker = nil + // } else if nestedBlockCurrentLine := getNestedBlockNames(line); hadSpace && len(nestedBlockCurrentLine) > 0 { + // // This tells us if there's a resource that is about to have subfields (nesteds) + // // in subsequent lines. + // //empty nesteds + // nesteds = []docsPath{} + // for _, item := range nestedBlockCurrentLine { + // nesteds = append(nesteds, docsPath(item)) + // } + // lastMatch = "" + // bulletListTracker = nil + // } else if !isBlank(line) && lastMatch != "" { + // // This appends the current line to the previous match's description. + // extendExistingHeading(line) + // + // } else if nestedBlockCurrentLine := getNestedBlockNames(line); len(nestedBlockCurrentLine) > 0 { + // // This tells us if there's a resource that is about to have subfields (nesteds) + // // in subsequent lines. + // //empty nesteds + // nesteds = []docsPath{} + // for _, item := range nestedBlockCurrentLine { + // nesteds = append(nesteds, docsPath(item)) + // } + // lastMatch = "" + // bulletListTracker = nil + // } else if lastMatch != "" { + // extendExistingHeading(line) + // } + // hadSpace = isBlank(line) + //} + // + //for _, v := range ret.Arguments { + // v.description = strings.TrimRightFunc(v.description, unicode.IsSpace) + //} } func parseAttributesReferenceSection(subsection []string, ret *entityDocs) { diff --git a/pkg/tfgen/docs_test.go b/pkg/tfgen/docs_test.go index 6c7c7952a..0bcb7625d 100644 --- a/pkg/tfgen/docs_test.go +++ b/pkg/tfgen/docs_test.go @@ -346,20 +346,20 @@ func TestArgumentRegex(t *testing.T) { "* `signing_algorithm` - (Required) Name of the algorithm your private CA uses to sign certificate requests. Valid values can be found in the [ACM PCA Documentation](https://docs.aws.amazon.com/privateca/latest/APIReference/API_CertificateAuthorityConfiguration.html).", "* `subject` - (Required) Nested argument that contains X.500 distinguished name information. At least one nested attribute must be specified.", "", - "#### subject", - "", - "Contains information about the certificate subject. Identifies the entity that owns or controls the public key in the certificate. The entity can be a user, computer, device, or service.", - "", - "* `common_name` - (Optional) Fully qualified domain name (FQDN) associated with the certificate subject. Must be less than or equal to 64 characters in length.", - "* `country` - (Optional) Two digit code that specifies the country in which the certificate subject located. Must be less than or equal to 2 characters in length.", + //"#### subject", + //"", + //"Contains information about the certificate subject. Identifies the entity that owns or controls the public key in the certificate. The entity can be a user, computer, device, or service.", + //"", + //"* `common_name` - (Optional) Fully qualified domain name (FQDN) associated with the certificate subject. Must be less than or equal to 64 characters in length.", + //"* `country` - (Optional) Two digit code that specifies the country in which the certificate subject located. Must be less than or equal to 2 characters in length.", }, expected: map[docsPath]*argumentDocs{ "certificate_authority_configuration.key_algorithm": {description: "Type of the public key algorithm and size, in bits, of the key pair that your key pair creates when it issues a certificate. Valid values can be found in the [ACM PCA Documentation](https://docs.aws.amazon.com/privateca/latest/APIReference/API_CertificateAuthorityConfiguration.html)."}, "certificate_authority_configuration.signing_algorithm": {description: "Name of the algorithm your private CA uses to sign certificate requests. Valid values can be found in the [ACM PCA Documentation](https://docs.aws.amazon.com/privateca/latest/APIReference/API_CertificateAuthorityConfiguration.html)."}, "certificate_authority_configuration.subject": {description: "Nested argument that contains X.500 distinguished name information. At least one nested attribute must be specified."}, - "subject.common_name": {description: "Fully qualified domain name (FQDN) associated with the certificate subject. Must be less than or equal to 64 characters in length."}, - "subject.country": {description: "Two digit code that specifies the country in which the certificate subject located. Must be less than or equal to 2 characters in length."}, + //"subject.common_name": {description: "Fully qualified domain name (FQDN) associated with the certificate subject. Must be less than or equal to 64 characters in length."}, + //"subject.country": {description: "Two digit code that specifies the country in which the certificate subject located. Must be less than or equal to 2 characters in length."}, }, }, { From cd9a9184b8842764cdca539c3c2ff6caecd39aff Mon Sep 17 00:00:00 2001 From: guineveresaenger Date: Tue, 13 Aug 2024 16:23:34 +0200 Subject: [PATCH 2/6] Add more node cases and cleanup --- pkg/tfgen/docs.go | 397 +++++++++++++---------------------------- pkg/tfgen/docs_test.go | 105 +++++------ 2 files changed, 175 insertions(+), 327 deletions(-) diff --git a/pkg/tfgen/docs.go b/pkg/tfgen/docs.go index bb62174e9..40724da4e 100644 --- a/pkg/tfgen/docs.go +++ b/pkg/tfgen/docs.go @@ -362,7 +362,7 @@ var ( ) descriptionRegexp = regexp.MustCompile( - "\\s*`([a-z0-9_]*)`\\s*(\\([a-zA-Z]*\\)\\s*)?\\s*[:–-]?\\s*(\\([^\\)]*\\)[-\\s]*)?(.*)", + "^\\s*`([a-z0-9_]*)`\\s*(\\([a-zA-Z]*\\)\\s*)?\\s*[:–-]?\\s*(\\([^)]*\\)[-\\s]*)?((.|\n)*)", ) bulletPointRegexStr = "^\\s*[*+-]" // matches any bullet point-like character @@ -800,92 +800,6 @@ func (p *tfMarkdownParser) parseSchemaWithNestedSections(subsection []string) { parseTopLevelSchemaIntoDocs(&p.ret, topLevelSchema, p.sink.warn) } -type markdownLineInfo struct { - name, desc string - isFound bool -} - -type bulletListEntry struct { - name string - index int -} - -// trackBulletListIndentation looks at the index of the bullet list marker ( `*`, `-` or `+`) in a docs line and -// compares it to a collection that tracks the level of list nesting by comparing to the previous list entry's nested -// level (if any). -// Note that this function only looks at the placement of the bullet list marker, and assumes same-level list markers -// to be in the same location in each line. This is not necessarily the case for Markdown, which considers a range of -// locations within 1-4 whitespace characters, as well as considers the start index of the text following the bullet -// point. If and when this becomes an issue during docs parsing, we may consider adding some of those rules here. -// Read more about nested lists in GitHub-flavored Markdown: -// https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#nested-lists -// -//nolint:lll -func trackBulletListIndentation(line, name string, tracker []bulletListEntry) []bulletListEntry { - - listMarkerLocation := listMarkerRegex.FindStringIndex(line) - contract.Assertf(len(listMarkerLocation) == 2, - fmt.Sprintf("Expected to find bullet list marker in line %s", line)) - listMarkerIndex := listMarkerLocation[0] - - // If our tracker is empty, we are at list nested level 0. - if len(tracker) == 0 { - newEntry := bulletListEntry{ - name: name, - index: listMarkerIndex, - } - return append(tracker, newEntry) - } - // Always compare to last entry in tracker - lastListEntry := tracker[len(tracker)-1] - - // if current line's listMarkerIndex is greater than the tracker's last entry's listMarkerIndex, - // make a new tracker entry and push it on there with all the info. - if listMarkerIndex > lastListEntry.index { - name = lastListEntry.name + "." + name - newEntry := bulletListEntry{ - name: name, - index: listMarkerIndex, - } - return append(tracker, newEntry) - } - // if current line's listMarkerIndex is the same as the last entry's, we're at the same level. - if listMarkerIndex == lastListEntry.index { - // Replace the last entry in our tracker - replaceEntry := bulletListEntry{ - index: listMarkerIndex, - } - if len(tracker) == 1 { - replaceEntry.name = name - } else { - // use the penultimate entry name to build current name - replaceName := tracker[(len(tracker)-2)].name + "." + name - replaceEntry.name = replaceName - } - return append(tracker[:len(tracker)-1], replaceEntry) - } - - // The current line's listMarkerIndex is smaller that the previous entry's. - // Pop off the latest entry, and retry to see if the next previous entry is a match. - return trackBulletListIndentation(line, name, tracker[:len(tracker)-1]) -} - -// parseArgFromMarkdownLine takes a line of Markdown and attempts to parse it for a Terraform argument and its -// description. It returns a struct containing the name and description of the arg, and whether an arg was found. -func parseArgFromMarkdownLine(line string) markdownLineInfo { - matches := argumentBulletRegexp.FindStringSubmatch(line) - q.Q(matches) - var parsed markdownLineInfo - if len(matches) > 4 { - parsed.name = matches[1] - parsed.desc = matches[4] - parsed.isFound = true - } - return parsed -} - -var genericNestedRegexp = regexp.MustCompile("supports? the following:") - var nestedObjectRegexps = []*regexp.Regexp{ // For example: // s3_bucket.html.markdown: "The `website` object supports the following:" @@ -1025,7 +939,6 @@ func getNestedBlockNames(line string) []string { return nestedBlockNames } -// TODO: transform subsection into a markdown node -join on newline & parse func parseArgReferenceSection(subsection []string, ret *entityDocs) { // Get a "document" documentBefore := strings.Join(subsection, "\n") @@ -1044,229 +957,161 @@ func parseArgReferenceSection(subsection []string, ret *entityDocs) { gm := goldmark.New(goldmark.WithExtensions(parse.TFRegistryExtension)) astNode := gm.Parser().Parse(gmtext.NewReader(docBytes)) - var astNesteds []string - var docspath string - //var isFromHeading bool + var paths []string + var writeList bool + gmast.Walk(astNode, func(node gmast.Node, enter bool) (gmast.WalkStatus, error) { - // here goes the detection logic + // When we find a list item, we check if it is an argument entry. if node.Kind().String() == "ListItem" { - // track nestedness if enter { - // the list item has a text node, which may have a code span node. // For any list item, we want to check if it opens with a code span. // If so, chances are we have a potential entry for our nested docs map. // It will be list item --> Text --> Code Span, so the grandchild of the list item. - // TODO: still must filter for actual TF entries, AKA this can't catch all lists, to avoid regression - - nodeKind := node.FirstChild().FirstChild().Kind().String() + // TODO: still must filter for actual TF entries, AKA this shouldn't catch all lists, to avoid regression + codeSpanNode := node.FirstChild().FirstChild() + nodeKind := codeSpanNode.Kind().String() if nodeKind == "CodeSpan" { - codeSpanNode := node.FirstChild().FirstChild() - bulletListItem := codeSpanNode.Text(docBytes) - // TODO: docspath must be a map because people overload the titles with multiple fields - if docspath == "" { - //q.Q("HERE") - docspath = string(bulletListItem) - } else { - docspath = docspath + "." + string(bulletListItem) - } - } - // TODO: now get the description, somehow. Idon't know which node this is, ugh - beschreibung := node.FirstChild().Text(docBytes) - q.Q(string(beschreibung)) - - lines := node.FirstChild().Lines() - q.Q(lines) - var desc bytes.Buffer - for i := 0; i < lines.Len(); i++ { - line := lines.At(i) - desc.Write(line.Value(docBytes)) - } - // TODO: this does give me the whole bullet point, which is great. Need to clean it up a bit... + codeSpanItem := codeSpanNode.Text(docBytes) + q.Q(string(codeSpanItem)) - q.Q(desc.String()) - // Extract the description from the line item. We will use a regex match here. - descs := descriptionRegexp.FindStringSubmatch(desc.String()) - q.Q(descs[4]) + // The list item's first child is a text block. + // For most of our entries, this is all we need. + desc := writeLines(node.FirstChild().Lines(), docBytes) - // Now... with our new info - can we read out the return info? - ret.Arguments[docsPath(docspath)] = &argumentDocs{descs[4]} + // Extract the description from the written lines. + // We will use a regex match for now, as we did before. + // TODO: maybe replace regex + descs := descriptionRegexp.FindStringSubmatch(desc) - } else { - // we are hitting this node on our way back. Append to nested docs paths. - astNesteds = append(astNesteds, docspath) - // Cut off last field in docsPath to remove nestedness - docspathIndex := strings.LastIndex( - docspath, ".") - if docspathIndex > 0 { - docspath = docspath[:docspathIndex] - } else { - docspath = "" + //q.Q(descs) + if len(descs) <= 4 { + writeList = true + } + //q.Q(paths) + // add to docspaths if writeList is false + if !writeList { + if len(paths) == 0 { + paths = append(paths, string(codeSpanItem)) + } else { + var newPaths []string + for _, p := range paths { + p = p + "." + string(codeSpanItem) + newPaths = append(newPaths, p) + } + paths = newPaths + } + } + + // Read results into the return argument docs. When we're reading subfields for multiple fields, + // the description is still the same as discovered from the node's lines. + for _, path := range paths { + if !writeList { + ret.Arguments[docsPath(path)] = &argumentDocs{descs[4]} + } else { + q.Q("hit else; do not append") + ret.Arguments[docsPath(path)] = &argumentDocs{"PLEASE SHOW UP"} + // we have a non-matching argument. We don't want to do anything with this. + } + } + //q.Q("listitem enter: ", paths) } - q.Q(astNesteds) - q.Q(docspath) + } else { + // we are hitting this node on our way back. + // Cut off last field in docs paths to remove nestedness + var newpaths []string + for _, p := range paths { + pathIndex := strings.LastIndex( + p, ".") + if pathIndex > 0 { + p = p[:pathIndex] + newpaths = append(newpaths, p) + } + } + paths = newpaths + //q.Q("list items leave: ", paths) + //q.Q(astNesteds) } } if node.Kind().String() == "Section" { // A section's first child is its heading. + // In this part of the upstream document, a heading generally means a subresource name. // TODO: assert that it is of type Heading. - // track nestedness if enter { // The text next to an arg reference's section header is assumed to be a resource field. headerItem := node.FirstChild().Text(docBytes) - // add to docspath - if docspath == "" { - docspath = string(headerItem) + // add to docs paths + if len(paths) == 0 { + paths = append(paths, string(headerItem)) } else { - docspath = docspath + "." + string(headerItem) + var newPaths []string + for _, p := range paths { + p = p + "." + string(headerItem) + newPaths = append(newPaths, p) + } + paths = newPaths } - //isFromHeading = true - + //q.Q("headers enter: ", paths) } else { // We are on our way back up the tree. // Because the headers are subsections of already found docs, we never want to actually include the // header item in our astNested list by itself - it should already exist with its own description. // Cut off last field in docsPath to remove nestedness - - docspathIndex := strings.LastIndex( - docspath, ".") - if docspathIndex > 0 { - docspath = docspath[:docspathIndex] - } else { - docspath = "" + // TODO: refactor, this is the same logic as above. + var newpaths []string + for _, p := range paths { + pathIndex := strings.LastIndex( + p, ".") + if pathIndex > 0 { + p = p[:pathIndex] + } + newpaths = append(newpaths, p) } - - q.Q("from heading:", astNesteds) - q.Q("from heading:", docspath) + paths = newpaths + //q.Q("headers leave: ", paths) + //q.Q("from heading:", astNesteds) + } + } + // Additionally, there are top-level paragraphs that can contain information about nested docs, + // such as "The `foo_bar` object supports the following:". + if node.Kind().String() == "Paragraph" && node.Parent().Kind().String() == "Document" { + if enter { + // We believe that all of the fields mentioned in paragraphs are able to be treated as top-level, i.e. + // they're of the format "(The) `foo` [field|resource] supports the following:", or they already + // include the nested path as in "(The) `foo.bar` [field|resource] supports the following:". + // This means that at any detection of a top-level Paragraph node, we re-set the docsPath to "". + paths = []string{} + paragraph := writeLines(node.Lines(), docBytes) + //q.Q(paragraph.String()) + // Check if our paragraph matches any of the nested object signifiers. See `nestedObjectRegexps`. + nestedBlockNames := getNestedBlockNames(paragraph) + //q.Q(nestedBlockNames) + if len(nestedBlockNames) > 0 { + // write to docspath + paths = nestedBlockNames + //q.Q("paragraph: ", paths) + } + } else { + // Because descriptions nested under paragraphs are not children, but rather siblings, + // we do not manipulate the docspath level at this point. Continue walking. + return gmast.WalkContinue, nil } } - //case *gmast.Heading: - // // this is where we want to look at the header title - // headerText := child.Text(docBytes) - // q.Q(string(headerText)) - // // TODO: find the resource in the header text - // case section.Section: - // - // - //default: - // //q.Q("defulat") - //} return gmast.WalkContinue, nil }) +} - var buf bytes.Buffer - // Convert parses the source, applies transformers, and renders output to buf - err := gm.Convert(docBytes, &buf) - if err != nil { - panic(err) - } - if documentBefore != string(docBytes) { - - q.Q(string(docBytes)) - panic("Not equal yet") +func writeLines(lines *gmtext.Segments, docBytes []byte) string { + var desc bytes.Buffer + for i := 0; i < lines.Len(); i++ { + //TODO: actually don't write lines that are the name itself and the (Optional) nonsense! + line := lines.At(i) + desc.Write(line.Value(docBytes)) + //q.Q(desc.String()) } - - //// Variable to remember the last argument we found. - //var lastMatch string - //// Collection to hold all arguments that headline a nested description. - //var nesteds []docsPath - // - //addNewHeading := func(name, desc, line string) { - // // found a property bullet, extract the name and description - // if len(nesteds) > 0 { - // for _, nested := range nesteds { - // // We found this line within a nested field. We should record it as such. - // if ret.Arguments[nested] == nil { - // totalArgumentsFromDocs++ - // } - // ret.Arguments[nested.join(name)] = &argumentDocs{desc} - // } - // - // } else { - // if genericNestedRegexp.MatchString(line) { - // return - // } - // ret.Arguments[docsPath(name)] = &argumentDocs{description: desc} - // totalArgumentsFromDocs++ - // } - //} - //// This function adds the current line as a description to the last matched resource, - ////in cases where there's no resource match found on this line. - ////It represents a multi-line description for a field. - //extendExistingHeading := func(line string) { - // if len(nesteds) > 0 { - // for _, nested := range nesteds { - // line = "\n" + strings.TrimSpace(line) - // ret.Arguments[nested.join(lastMatch)].description += line - // } - // } else { - // if genericNestedRegexp.MatchString(line) { - // lastMatch = "" - // nesteds = []docsPath{} - // return - // } - // line = "\n" + strings.TrimSpace(line) - // ret.Arguments[docsPath(lastMatch)].description += line - // } - //} - // - //// hadSpace tells us if the previous line was blank. - //var hadSpace bool - // - //// bulletListTracker is a stack-like collection that tracks the level of nesting for a bulleted list with - //// nested lists. The name of the topmost entry represents the nested docs path for the current line. - //var bulletListTracker []bulletListEntry - // - //for _, line := range subsection { - // parsedArg := parseArgFromMarkdownLine(line) - // matchFound := parsedArg.isFound - // if matchFound { // We have found a new property bullet point. - // desc := parsedArg.desc - // bulletListTracker = trackBulletListIndentation(line, parsedArg.name, bulletListTracker) - // name := bulletListTracker[len(bulletListTracker)-1].name - // lastMatch = name - // addNewHeading(name, desc, line) - // - // } else if strings.TrimSpace(line) == "---" { - // // --- is a markdown section break. This probably indicates the - // // section is over, but we take it to mean that the current - // // heading is over. - // lastMatch = "" - // bulletListTracker = nil - // } else if nestedBlockCurrentLine := getNestedBlockNames(line); hadSpace && len(nestedBlockCurrentLine) > 0 { - // // This tells us if there's a resource that is about to have subfields (nesteds) - // // in subsequent lines. - // //empty nesteds - // nesteds = []docsPath{} - // for _, item := range nestedBlockCurrentLine { - // nesteds = append(nesteds, docsPath(item)) - // } - // lastMatch = "" - // bulletListTracker = nil - // } else if !isBlank(line) && lastMatch != "" { - // // This appends the current line to the previous match's description. - // extendExistingHeading(line) - // - // } else if nestedBlockCurrentLine := getNestedBlockNames(line); len(nestedBlockCurrentLine) > 0 { - // // This tells us if there's a resource that is about to have subfields (nesteds) - // // in subsequent lines. - // //empty nesteds - // nesteds = []docsPath{} - // for _, item := range nestedBlockCurrentLine { - // nesteds = append(nesteds, docsPath(item)) - // } - // lastMatch = "" - // bulletListTracker = nil - // } else if lastMatch != "" { - // extendExistingHeading(line) - // } - // hadSpace = isBlank(line) - //} - // - //for _, v := range ret.Arguments { - // v.description = strings.TrimRightFunc(v.description, unicode.IsSpace) - //} + q.Q(desc.String()) + return desc.String() } func parseAttributesReferenceSection(subsection []string, ret *entityDocs) { diff --git a/pkg/tfgen/docs_test.go b/pkg/tfgen/docs_test.go index 0bcb7625d..e084faa56 100644 --- a/pkg/tfgen/docs_test.go +++ b/pkg/tfgen/docs_test.go @@ -155,6 +155,41 @@ func TestArgumentRegex(t *testing.T) { }, }, }, + { + name: "Parses multiple nested arguments via `object supports the following`", + input: []string{ + "The `token_configuration` object supports the following:", + "", + "* `audience` - (Optional) A list of the intended recipients of the token.", + "* `issuer` - (Optional) The base domain of the identity provider that issues the token", + "", + "The `jwt_configuration` object supports the following:", + "", + "* `audience` - (Optional) A list of the intended recipients of the JWT. A valid JWT must provide an aud that matches at least one entry in this list.", + "* `issuer` - (Optional) The base domain of the identity provider that issues JSON Web Tokens, such as the `endpoint` attribute of the [`aws_cognito_user_pool`](/docs/providers/aws/r/cognito_user_pool.html) resource.", + "", + " this is a new paragraph", + "", + "* `top_level_field` - (Required) This field's docspath should not be nested under the previous paragraph.", + }, + expected: map[docsPath]*argumentDocs{ + "token_configuration.audience": { + description: "A list of the intended recipients of the token."}, + "token_configuration.issuer": { + description: "The base domain of the identity provider that issues the token", + }, + + "jwt_configuration.audience": { + description: "A list of the intended recipients of the JWT. A valid JWT must provide an aud that matches at least one entry in this list.", + }, + "jwt_configuration.issuer": { + description: "The base domain of the identity provider that issues JSON Web Tokens, such as the `endpoint` attribute of the [`aws_cognito_user_pool`](/docs/providers/aws/r/cognito_user_pool.html) resource.", + }, + "top_level_field": { + description: "This field's docspath should not be nested under the previous paragraph.", + }, + }, + }, { name: "Renders ~> **NOTE:** and continues parsing as expected", input: []string{ @@ -231,7 +266,7 @@ func TestArgumentRegex(t *testing.T) { "* `retention_policy` - (Required) A `retention_policy` block as documented below.", "", "---", - "* `retention_policy` supports the following:", + "`retention_policy` supports the following:", }, expected: map[docsPath]*argumentDocs{ "retention_policy": { @@ -283,7 +318,7 @@ func TestArgumentRegex(t *testing.T) { description: "Indicates how to allocate the target capacity across\nthe Spot pools specified by the Spot fleet request. The default is\n`lowestPrice`.", }, "instance_pools_to_use_count": { - description: "\nThe number of Spot pools across which to allocate your target Spot capacity.\nValid only when `allocation_strategy` is set to `lowestPrice`. Spot Fleet selects\nthe cheapest Spot pools and evenly allocates your target Spot capacity across\nthe number of Spot pools that you specify.", + description: "The number of Spot pools across which to allocate your target Spot capacity.\nValid only when `allocation_strategy` is set to `lowestPrice`. Spot Fleet selects\nthe cheapest Spot pools and evenly allocates your target Spot capacity across\nthe number of Spot pools that you specify.", }, }, }, @@ -487,8 +522,8 @@ func TestArgumentRegex(t *testing.T) { }, expected: map[docsPath]*argumentDocs{ "node_pool_config": {description: "The configuration for the GKE node pool. \nIf specified, " + - "Dataproc attempts to create a node pool with the specified shape.\nIf one with the same name " + - "already exists, it is verified against all specified fields.\nIf a field differs, the virtual " + + "Dataproc attempts to create a node pool with the specified shape. \nIf one with the same name " + + "already exists, it is verified against all specified fields. \nIf a field differs, the virtual " + "cluster creation will fail.", }, }, @@ -560,7 +595,6 @@ func TestArgumentRegex(t *testing.T) { "settings.maintenance_window.day": {description: "Day of week (`1-7`), starting on Monday"}, }, }, - { name: "All caps bullet points are not parsed as TF properties", input: []string{ @@ -584,21 +618,21 @@ func TestArgumentRegex(t *testing.T) { " `DELETING` The Private Link service is being deleted."}, }, }, - { - name: "Bullet points in backticks containing any uppercase letters are not parsed as TF properties", - input: []string{ - "* `status` - Status of the AWS PrivateLink connection.", - " Returns one of the following values:", - " * `Available` Atlas created the load balancer and the Private Link Service.", - " * `Initiating` Atlas is creating the network load balancer and VPC endpoint service.", - }, - expected: map[docsPath]*argumentDocs{ - "status": {description: "Status of the AWS PrivateLink connection.\n" + - "Returns one of the following values:\n" + - "* `Available` Atlas created the load balancer and the Private Link Service.\n" + - "* `Initiating` Atlas is creating the network load balancer and VPC endpoint service."}, - }, - }, + //{ + // name: "Bullet points in backticks containing any uppercase letters are not parsed as TF properties", + // input: []string{ + // "* `status` - Status of the AWS PrivateLink connection.", + // " Returns one of the following values:", + // " * `Available` Atlas created the load balancer and the Private Link Service.", + // " * `Initiating` Atlas is creating the network load balancer and VPC endpoint service.", + // }, + // expected: map[docsPath]*argumentDocs{ + // "status": {description: "Status of the AWS PrivateLink connection.\n" + + // "Returns one of the following values:\n" + + // "* `Available` Atlas created the load balancer and the Private Link Service.\n" + + // "* `Initiating` Atlas is creating the network load balancer and VPC endpoint service."}, + // }, + //}, } for _, tt := range tests { @@ -1534,37 +1568,6 @@ FooFactory fooFactory = new FooFactory(); assert.Equal(t, buf.String(), hclConversionsToString(input)) } -func TestParseArgFromMarkdownLine(t *testing.T) { - //nolint:lll - tests := []struct { - input string - expectedName string - expectedDesc string - expectedFound bool - }{ - {"* `name` - (Required) A unique name to give the role.", "name", "A unique name to give the role.", true}, - {"* `key_vault_key_id` - (Optional) The Key Vault key URI for CMK encryption. Changing this forces a new resource to be created.", "key_vault_key_id", "The Key Vault key URI for CMK encryption. Changing this forces a new resource to be created.", true}, - {"* `urn` - The uniform resource name of the Droplet", "urn", "The uniform resource name of the Droplet", true}, - {"* `name`- The name of the Droplet", "name", "The name of the Droplet", true}, - {"* `jumbo_frame_capable` -Indicates whether jumbo frames (9001 MTU) are supported.", "jumbo_frame_capable", "Indicates whether jumbo frames (9001 MTU) are supported.", true}, - {"* `ssl_support_method`: Specifies how you want CloudFront to serve HTTPS", "ssl_support_method", "Specifies how you want CloudFront to serve HTTPS", true}, - {"* `principal_tags`: (Optional: []) - String to string map of variables.", "principal_tags", "String to string map of variables.", true}, - {" * `id` - The id of the property", "id", "The id of the property", true}, - {" * id - The id of the property", "", "", false}, - //In rare cases, we may have a match where description is empty like the following, taken from https://github.com/hashicorp/terraform-provider-aws/blob/main/website/docs/r/spot_fleet_request.html.markdown - {"* `instance_pools_to_use_count` - (Optional; Default: 1)", "instance_pools_to_use_count", "", true}, - {"", "", "", false}, - {"Most of these arguments directly correspond to the", "", "", false}, - } - - for _, test := range tests { - parsedLine := parseArgFromMarkdownLine(test.input) - assert.Equal(t, test.expectedName, parsedLine.name) - assert.Equal(t, test.expectedDesc, parsedLine.desc) - assert.Equal(t, test.expectedFound, parsedLine.isFound) - } -} - func TestParseAttributesReferenceSection(t *testing.T) { ret := entityDocs{ Arguments: make(map[docsPath]*argumentDocs), From 92f8b2736d316584c4eaad40dc736dd6cec3d801 Mon Sep 17 00:00:00 2001 From: guineveresaenger Date: Thu, 15 Aug 2024 16:27:13 +0200 Subject: [PATCH 3/6] Appends lists that aren't TF names verbatim cleanup and refactor --- go.mod | 7 +- go.sum | 2 - pkg/tfgen/docs.go | 148 +++++++++++++++-------------------------- pkg/tfgen/docs_test.go | 30 ++++----- 4 files changed, 69 insertions(+), 118 deletions(-) diff --git a/go.mod b/go.mod index ba7d2f144..c0259b1ec 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,6 @@ module github.com/pulumi/pulumi-terraform-bridge/v3 -go 1.22 - -toolchain go1.22.3 +go 1.21.12 replace github.com/pulumi/pulumi-terraform-bridge/x/muxer => ./x/muxer @@ -45,7 +43,6 @@ require ( github.com/pulumi/schema-tools v0.1.2 github.com/pulumi/terraform-diff-reader v0.0.2 github.com/russross/blackfriday/v2 v2.1.0 - github.com/ryboe/q v1.0.21 github.com/spf13/afero v1.9.5 github.com/spf13/cobra v1.8.0 github.com/stretchr/testify v1.9.0 @@ -92,8 +89,6 @@ require ( github.com/google/s2a-go v0.1.7 // indirect github.com/hexops/gotextdiff v1.0.3 // indirect github.com/hexops/valast v1.4.4 // indirect - github.com/kr/pretty v0.3.1 // indirect - github.com/kr/text v0.2.0 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/mattn/go-localereader v0.0.1 // indirect diff --git a/go.sum b/go.sum index 917718953..3c16dd234 100644 --- a/go.sum +++ b/go.sum @@ -1941,8 +1941,6 @@ github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfF github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk= github.com/ryanuber/go-glob v1.0.0 h1:iQh3xXAumdQ+4Ufa5b25cRpC5TYKlno6hsv6Cb3pkBk= github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc= -github.com/ryboe/q v1.0.21 h1:sbLQFC3eMDEs8q3g4otbrWExLTHEyKISCFuN7Akropc= -github.com/ryboe/q v1.0.21/go.mod h1:g4aI/DhdScxW3WLRGxHXgg5ZkGrk+b4h1h5u2PTCvDc= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= github.com/santhosh-tekuri/jsonschema/v5 v5.0.0 h1:TToq11gyfNlrMFZiYujSekIsPd9AmsA2Bj/iv+s4JHE= diff --git a/pkg/tfgen/docs.go b/pkg/tfgen/docs.go index 40724da4e..07003f047 100644 --- a/pkg/tfgen/docs.go +++ b/pkg/tfgen/docs.go @@ -35,7 +35,6 @@ import ( "github.com/pulumi/pulumi/sdk/v3/go/common/util/cmdutil" "github.com/pulumi/pulumi/sdk/v3/go/common/util/contract" bf "github.com/russross/blackfriday/v2" - "github.com/ryboe/q" "github.com/spf13/afero" "github.com/yuin/goldmark" gmast "github.com/yuin/goldmark/ast" @@ -940,157 +939,92 @@ func getNestedBlockNames(line string) []string { } func parseArgReferenceSection(subsection []string, ret *entityDocs) { - // Get a "document" - documentBefore := strings.Join(subsection, "\n") - // save a copy for comparison - docBytes := []byte(documentBefore) - q.Q(documentBefore) - - // Algorithm: - // - Walk the document tree and gather any text that is after a property bullet. - // - Track the level (nestedness?) of said bullet/ indentation/blah and add to current docs path - // - Let's start by finding the bullet points. - // Actually, start by creating a new transformer. No, I may not need a transformer. Ung, why is this so annoying - // Just use Walk? + + // Treat our subsection as a markdown node. This will later just be a node. + docBytes := []byte(strings.Join(subsection, "\n")) // Parse the document using Goldmark parser gm := goldmark.New(goldmark.WithExtensions(parse.TFRegistryExtension)) - astNode := gm.Parser().Parse(gmtext.NewReader(docBytes)) - var paths []string - var writeList bool + var paths []string + var writeList bool // tracking whether we need to write a list verbatim gmast.Walk(astNode, func(node gmast.Node, enter bool) (gmast.WalkStatus, error) { // When we find a list item, we check if it is an argument entry. if node.Kind().String() == "ListItem" { if enter { // For any list item, we want to check if it opens with a code span. - // If so, chances are we have a potential entry for our nested docs map. // It will be list item --> Text --> Code Span, so the grandchild of the list item. - // TODO: still must filter for actual TF entries, AKA this shouldn't catch all lists, to avoid regression codeSpanNode := node.FirstChild().FirstChild() - nodeKind := codeSpanNode.Kind().String() - if nodeKind == "CodeSpan" { + if codeSpanNode.Kind().String() == "CodeSpan" { codeSpanItem := codeSpanNode.Text(docBytes) - q.Q(string(codeSpanItem)) // The list item's first child is a text block. // For most of our entries, this is all we need. desc := writeLines(node.FirstChild().Lines(), docBytes) - // Extract the description from the written lines. - // We will use a regex match for now, as we did before. - // TODO: maybe replace regex + // To see if we have a TF name, use a regex match. + // The submatch looks for patterns such as + // + // `follow_gae_application` - (Optional) A GAE application whose zone to remain" descs := descriptionRegexp.FindStringSubmatch(desc) - - //q.Q(descs) if len(descs) <= 4 { writeList = true } - //q.Q(paths) + // add to docspaths if writeList is false if !writeList { - if len(paths) == 0 { - paths = append(paths, string(codeSpanItem)) - } else { - var newPaths []string - for _, p := range paths { - p = p + "." + string(codeSpanItem) - newPaths = append(newPaths, p) - } - paths = newPaths - } + paths = addPaths(paths, codeSpanItem) } - // Read results into the return argument docs. When we're reading subfields for multiple fields, // the description is still the same as discovered from the node's lines. for _, path := range paths { if !writeList { ret.Arguments[docsPath(path)] = &argumentDocs{descs[4]} } else { - q.Q("hit else; do not append") - ret.Arguments[docsPath(path)] = &argumentDocs{"PLEASE SHOW UP"} - // we have a non-matching argument. We don't want to do anything with this. + // We need to write the entire list item into the description. + // We'll just append each list item as it is visited. + currentDesc := ret.Arguments[docsPath(path)].description + newDesc := currentDesc + "\n* " + desc + ret.Arguments[docsPath(path)] = &argumentDocs{newDesc} } } - //q.Q("listitem enter: ", paths) } } else { - // we are hitting this node on our way back. - // Cut off last field in docs paths to remove nestedness - var newpaths []string - for _, p := range paths { - pathIndex := strings.LastIndex( - p, ".") - if pathIndex > 0 { - p = p[:pathIndex] - newpaths = append(newpaths, p) - } - + if !writeList { + paths = cutPaths(paths) } - paths = newpaths - //q.Q("list items leave: ", paths) - //q.Q(astNesteds) } } if node.Kind().String() == "Section" { - // A section's first child is its heading. + writeList = false + // A Section's first child is its heading. // In this part of the upstream document, a heading generally means a subresource name. - // TODO: assert that it is of type Heading. if enter { // The text next to an arg reference's section header is assumed to be a resource field. headerItem := node.FirstChild().Text(docBytes) // add to docs paths - if len(paths) == 0 { - paths = append(paths, string(headerItem)) - } else { - var newPaths []string - for _, p := range paths { - p = p + "." + string(headerItem) - newPaths = append(newPaths, p) - } - paths = newPaths - } - //q.Q("headers enter: ", paths) + paths = addPaths(paths, headerItem) } else { - // We are on our way back up the tree. - // Because the headers are subsections of already found docs, we never want to actually include the - // header item in our astNested list by itself - it should already exist with its own description. - - // Cut off last field in docsPath to remove nestedness - // TODO: refactor, this is the same logic as above. - var newpaths []string - for _, p := range paths { - pathIndex := strings.LastIndex( - p, ".") - if pathIndex > 0 { - p = p[:pathIndex] - } - newpaths = append(newpaths, p) - } - paths = newpaths - //q.Q("headers leave: ", paths) - //q.Q("from heading:", astNesteds) + paths = cutPaths(paths) } } // Additionally, there are top-level paragraphs that can contain information about nested docs, // such as "The `foo_bar` object supports the following:". if node.Kind().String() == "Paragraph" && node.Parent().Kind().String() == "Document" { + writeList = false if enter { - // We believe that all of the fields mentioned in paragraphs are able to be treated as top-level, i.e. + // All of the fields mentioned in paragraphs can be treated as top-level, i.e. // they're of the format "(The) `foo` [field|resource] supports the following:", or they already // include the nested path as in "(The) `foo.bar` [field|resource] supports the following:". - // This means that at any detection of a top-level Paragraph node, we re-set the docsPath to "". + // This means that at any detection of a top-level Paragraph node, we re-set the docsPath slice to empty. paths = []string{} paragraph := writeLines(node.Lines(), docBytes) - //q.Q(paragraph.String()) // Check if our paragraph matches any of the nested object signifiers. See `nestedObjectRegexps`. nestedBlockNames := getNestedBlockNames(paragraph) - //q.Q(nestedBlockNames) if len(nestedBlockNames) > 0 { // write to docspath paths = nestedBlockNames - //q.Q("paragraph: ", paths) } } else { // Because descriptions nested under paragraphs are not children, but rather siblings, @@ -1105,15 +1039,39 @@ func parseArgReferenceSection(subsection []string, ret *entityDocs) { func writeLines(lines *gmtext.Segments, docBytes []byte) string { var desc bytes.Buffer for i := 0; i < lines.Len(); i++ { - //TODO: actually don't write lines that are the name itself and the (Optional) nonsense! line := lines.At(i) desc.Write(line.Value(docBytes)) - //q.Q(desc.String()) } - q.Q(desc.String()) return desc.String() } +func cutPaths(paths []string) []string { + var newpaths []string + for _, p := range paths { + pathIndex := strings.LastIndex( + p, ".") + if pathIndex > 0 { + p = p[:pathIndex] + newpaths = append(newpaths, p) + } + } + return newpaths +} + +func addPaths(paths []string, pathSection []byte) []string { + if len(paths) == 0 { + paths = append(paths, string(pathSection)) + } else { + var newPaths []string + for _, p := range paths { + p = p + "." + string(pathSection) + newPaths = append(newPaths, p) + } + paths = newPaths + } + return paths +} + func parseAttributesReferenceSection(subsection []string, ret *entityDocs) { var lastMatch string for _, line := range subsection { diff --git a/pkg/tfgen/docs_test.go b/pkg/tfgen/docs_test.go index e084faa56..67a7863a5 100644 --- a/pkg/tfgen/docs_test.go +++ b/pkg/tfgen/docs_test.go @@ -618,21 +618,21 @@ func TestArgumentRegex(t *testing.T) { " `DELETING` The Private Link service is being deleted."}, }, }, - //{ - // name: "Bullet points in backticks containing any uppercase letters are not parsed as TF properties", - // input: []string{ - // "* `status` - Status of the AWS PrivateLink connection.", - // " Returns one of the following values:", - // " * `Available` Atlas created the load balancer and the Private Link Service.", - // " * `Initiating` Atlas is creating the network load balancer and VPC endpoint service.", - // }, - // expected: map[docsPath]*argumentDocs{ - // "status": {description: "Status of the AWS PrivateLink connection.\n" + - // "Returns one of the following values:\n" + - // "* `Available` Atlas created the load balancer and the Private Link Service.\n" + - // "* `Initiating` Atlas is creating the network load balancer and VPC endpoint service."}, - // }, - //}, + { + name: "Bullet points in backticks containing any uppercase letters are not parsed as TF properties", + input: []string{ + "* `status` - Status of the AWS PrivateLink connection.", + " Returns one of the following values:", + " * `Available` Atlas created the load balancer and the Private Link Service.", + " * `Initiating` Atlas is creating the network load balancer and VPC endpoint service.", + }, + expected: map[docsPath]*argumentDocs{ + "status": {description: "Status of the AWS PrivateLink connection.\n" + + "Returns one of the following values:\n" + + "* `Available` Atlas created the load balancer and the Private Link Service.\n" + + "* `Initiating` Atlas is creating the network load balancer and VPC endpoint service."}, + }, + }, } for _, tt := range tests { From 54501c1d2fbbe0c5e9e69924ce1ce20beb87d56d Mon Sep 17 00:00:00 2001 From: guineveresaenger Date: Fri, 16 Aug 2024 15:12:11 -0700 Subject: [PATCH 4/6] Adjust test. Notes are not actually parts of resource field descriptions. --- pkg/tfgen/test_data/azurerm-sql-firewall-rule/expected.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/tfgen/test_data/azurerm-sql-firewall-rule/expected.json b/pkg/tfgen/test_data/azurerm-sql-firewall-rule/expected.json index 4d9bdd0fc..ea802493f 100644 --- a/pkg/tfgen/test_data/azurerm-sql-firewall-rule/expected.json +++ b/pkg/tfgen/test_data/azurerm-sql-firewall-rule/expected.json @@ -2,8 +2,7 @@ "Description": "Allows you to manage an Azure SQL Firewall Rule.\n\n\u003e **Note:** The `azurerm_sql_firewall_rule` resource is deprecated in version 3.0 of the AzureRM provider and will be removed in version 4.0. Please use the `azurerm_mssql_firewall_rule` resource instead.\n\n## Example Usage\n\n```hcl\nresource \"azurerm_resource_group\" \"example\" {\n name = \"example-resources\"\n location = \"West Europe\"\n}\n\nresource \"azurerm_sql_server\" \"example\" {\n name = \"mysqlserver\"\n resource_group_name = azurerm_resource_group.example.name\n location = azurerm_resource_group.example.location\n version = \"12.0\"\n administrator_login = \"4dm1n157r470r\"\n administrator_login_password = \"4-v3ry-53cr37-p455w0rd\"\n}\n\nresource \"azurerm_sql_firewall_rule\" \"example\" {\n name = \"FirewallRule1\"\n resource_group_name = azurerm_resource_group.example.name\n server_name = azurerm_sql_server.example.name\n start_ip_address = \"10.0.17.62\"\n end_ip_address = \"10.0.17.62\"\n}\n```", "Arguments": { "end_ip_address": { - "description": "The ending IP address to allow through the firewall for this rule.\n\n\u003e **NOTE:** The Azure feature `Allow access to Azure services` can be enabled by setting `start_ip_address` and `end_ip_address` to `0.0.0.0` which ([is documented in the Azure API Docs](https://docs.microsoft.com/rest/api/sql/firewallrules/createorupdate))." - }, + "description": "The ending IP address to allow through the firewall for this rule."}, "name": { "description": "The name of the firewall rule. Changing this forces a new resource to be created." }, From c759ac430307a1c96f7e2841d3ad40032559d46a Mon Sep 17 00:00:00 2001 From: guineveresaenger Date: Mon, 19 Aug 2024 12:43:29 -0700 Subject: [PATCH 5/6] lint --- pkg/tfgen/docs.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pkg/tfgen/docs.go b/pkg/tfgen/docs.go index 07003f047..80d79f3bc 100644 --- a/pkg/tfgen/docs.go +++ b/pkg/tfgen/docs.go @@ -356,10 +356,6 @@ var ( // [1]: https://docs.aws.amazon.com/lambda/latest/dg/welcome.html linkFooterRegexp = regexp.MustCompile(`(?m)^(\[\d+\]):\s(.*)`) - argumentBulletRegexp = regexp.MustCompile( - "^\\s*[*+-]\\s*`([a-z0-9_]*)`\\s*(\\([a-zA-Z]*\\)\\s*)?\\s*[:–-]?\\s*(\\([^\\)]*\\)[-\\s]*)?(.*)", - ) - descriptionRegexp = regexp.MustCompile( "^\\s*`([a-z0-9_]*)`\\s*(\\([a-zA-Z]*\\)\\s*)?\\s*[:–-]?\\s*(\\([^)]*\\)[-\\s]*)?((.|\n)*)", ) @@ -373,7 +369,6 @@ var ( ) attributionFormatString = "This Pulumi package is based on the [`%[1]s` Terraform Provider](https://%[3]s/%[2]s/terraform-provider-%[1]s)." - listMarkerRegex = regexp.MustCompile("[-*+]") ) func trimFrontMatter(text []byte) []byte { @@ -949,7 +944,7 @@ func parseArgReferenceSection(subsection []string, ret *entityDocs) { var paths []string var writeList bool // tracking whether we need to write a list verbatim - gmast.Walk(astNode, func(node gmast.Node, enter bool) (gmast.WalkStatus, error) { + err := gmast.Walk(astNode, func(node gmast.Node, enter bool) (gmast.WalkStatus, error) { // When we find a list item, we check if it is an argument entry. if node.Kind().String() == "ListItem" { if enter { @@ -1034,6 +1029,7 @@ func parseArgReferenceSection(subsection []string, ret *entityDocs) { } return gmast.WalkContinue, nil }) + contract.AssertNoErrorf(err, "Cannot fail to parse argument reference") } func writeLines(lines *gmtext.Segments, docBytes []byte) string { From 45847da0b960a5d1894e364d9cda646ef83abd0a Mon Sep 17 00:00:00 2001 From: guineveresaenger Date: Mon, 19 Aug 2024 13:02:05 -0700 Subject: [PATCH 6/6] Adjust nestedsection test to be more correct --- pkg/tfgen/docs_test.go | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pkg/tfgen/docs_test.go b/pkg/tfgen/docs_test.go index 67a7863a5..6850873e2 100644 --- a/pkg/tfgen/docs_test.go +++ b/pkg/tfgen/docs_test.go @@ -381,20 +381,20 @@ func TestArgumentRegex(t *testing.T) { "* `signing_algorithm` - (Required) Name of the algorithm your private CA uses to sign certificate requests. Valid values can be found in the [ACM PCA Documentation](https://docs.aws.amazon.com/privateca/latest/APIReference/API_CertificateAuthorityConfiguration.html).", "* `subject` - (Required) Nested argument that contains X.500 distinguished name information. At least one nested attribute must be specified.", "", - //"#### subject", - //"", - //"Contains information about the certificate subject. Identifies the entity that owns or controls the public key in the certificate. The entity can be a user, computer, device, or service.", - //"", - //"* `common_name` - (Optional) Fully qualified domain name (FQDN) associated with the certificate subject. Must be less than or equal to 64 characters in length.", - //"* `country` - (Optional) Two digit code that specifies the country in which the certificate subject located. Must be less than or equal to 2 characters in length.", + "#### subject", + "", + "Contains information about the certificate subject. Identifies the entity that owns or controls the public key in the certificate. The entity can be a user, computer, device, or service.", + "", + "* `common_name` - (Optional) Fully qualified domain name (FQDN) associated with the certificate subject. Must be less than or equal to 64 characters in length.", + "* `country` - (Optional) Two digit code that specifies the country in which the certificate subject located. Must be less than or equal to 2 characters in length.", }, expected: map[docsPath]*argumentDocs{ - "certificate_authority_configuration.key_algorithm": {description: "Type of the public key algorithm and size, in bits, of the key pair that your key pair creates when it issues a certificate. Valid values can be found in the [ACM PCA Documentation](https://docs.aws.amazon.com/privateca/latest/APIReference/API_CertificateAuthorityConfiguration.html)."}, - "certificate_authority_configuration.signing_algorithm": {description: "Name of the algorithm your private CA uses to sign certificate requests. Valid values can be found in the [ACM PCA Documentation](https://docs.aws.amazon.com/privateca/latest/APIReference/API_CertificateAuthorityConfiguration.html)."}, - "certificate_authority_configuration.subject": {description: "Nested argument that contains X.500 distinguished name information. At least one nested attribute must be specified."}, - //"subject.common_name": {description: "Fully qualified domain name (FQDN) associated with the certificate subject. Must be less than or equal to 64 characters in length."}, - //"subject.country": {description: "Two digit code that specifies the country in which the certificate subject located. Must be less than or equal to 2 characters in length."}, + "certificate_authority_configuration.key_algorithm": {description: "Type of the public key algorithm and size, in bits, of the key pair that your key pair creates when it issues a certificate. Valid values can be found in the [ACM PCA Documentation](https://docs.aws.amazon.com/privateca/latest/APIReference/API_CertificateAuthorityConfiguration.html)."}, + "certificate_authority_configuration.signing_algorithm": {description: "Name of the algorithm your private CA uses to sign certificate requests. Valid values can be found in the [ACM PCA Documentation](https://docs.aws.amazon.com/privateca/latest/APIReference/API_CertificateAuthorityConfiguration.html)."}, + "certificate_authority_configuration.subject": {description: "Nested argument that contains X.500 distinguished name information. At least one nested attribute must be specified."}, + "certificate_authority_configuration.subject.common_name": {description: "Fully qualified domain name (FQDN) associated with the certificate subject. Must be less than or equal to 64 characters in length."}, + "certificate_authority_configuration.subject.country": {description: "Two digit code that specifies the country in which the certificate subject located. Must be less than or equal to 2 characters in length."}, }, }, {