diff --git a/pkg/tfgen/docs.go b/pkg/tfgen/docs.go index a46680e92..80d79f3bc 100644 --- a/pkg/tfgen/docs.go +++ b/pkg/tfgen/docs.go @@ -356,8 +356,8 @@ var ( // [1]: https://docs.aws.amazon.com/lambda/latest/dg/welcome.html linkFooterRegexp = regexp.MustCompile(`(?m)^(\[\d+\]):\s(.*)`) - argumentBulletRegexp = regexp.MustCompile( - "^\\s*[*+-]\\s*`([a-z0-9_]*)`\\s*(\\([a-zA-Z]*\\)\\s*)?\\s*[:–-]?\\s*(\\([^\\)]*\\)[-\\s]*)?(.*)", + descriptionRegexp = regexp.MustCompile( + "^\\s*`([a-z0-9_]*)`\\s*(\\([a-zA-Z]*\\)\\s*)?\\s*[:–-]?\\s*(\\([^)]*\\)[-\\s]*)?((.|\n)*)", ) bulletPointRegexStr = "^\\s*[*+-]" // matches any bullet point-like character @@ -369,7 +369,6 @@ var ( ) attributionFormatString = "This Pulumi package is based on the [`%[1]s` Terraform Provider](https://%[3]s/%[2]s/terraform-provider-%[1]s)." - listMarkerRegex = regexp.MustCompile("[-*+]") ) func trimFrontMatter(text []byte) []byte { @@ -385,7 +384,6 @@ func trimFrontMatter(text []byte) []byte { } return body[idx+3:] } - func splitByMarkdownHeaders(text string, level int) [][]string { // splitByMarkdownHeaders parses text, then walks the resulting AST to find // appropriate header nodes. It uses the location of these header nodes to split @@ -397,7 +395,6 @@ func splitByMarkdownHeaders(text string, level int) [][]string { contract.Assertf(offset >= 0, "The offset generated by chopping of the front-matter cannot be negative") gm := goldmark.New(goldmark.WithExtensions(parse.TFRegistryExtension)) - headers := []int{} parse.WalkNode(gm.Parser().Parse(gmtext.NewReader(bytes)), func(heading *gmast.Heading) { if heading.Level != level { @@ -797,91 +794,6 @@ func (p *tfMarkdownParser) parseSchemaWithNestedSections(subsection []string) { parseTopLevelSchemaIntoDocs(&p.ret, topLevelSchema, p.sink.warn) } -type markdownLineInfo struct { - name, desc string - isFound bool -} - -type bulletListEntry struct { - name string - index int -} - -// trackBulletListIndentation looks at the index of the bullet list marker ( `*`, `-` or `+`) in a docs line and -// compares it to a collection that tracks the level of list nesting by comparing to the previous list entry's nested -// level (if any). -// Note that this function only looks at the placement of the bullet list marker, and assumes same-level list markers -// to be in the same location in each line. This is not necessarily the case for Markdown, which considers a range of -// locations within 1-4 whitespace characters, as well as considers the start index of the text following the bullet -// point. If and when this becomes an issue during docs parsing, we may consider adding some of those rules here. -// Read more about nested lists in GitHub-flavored Markdown: -// https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#nested-lists -// -//nolint:lll -func trackBulletListIndentation(line, name string, tracker []bulletListEntry) []bulletListEntry { - - listMarkerLocation := listMarkerRegex.FindStringIndex(line) - contract.Assertf(len(listMarkerLocation) == 2, - fmt.Sprintf("Expected to find bullet list marker in line %s", line)) - listMarkerIndex := listMarkerLocation[0] - - // If our tracker is empty, we are at list nested level 0. - if len(tracker) == 0 { - newEntry := bulletListEntry{ - name: name, - index: listMarkerIndex, - } - return append(tracker, newEntry) - } - // Always compare to last entry in tracker - lastListEntry := tracker[len(tracker)-1] - - // if current line's listMarkerIndex is greater than the tracker's last entry's listMarkerIndex, - // make a new tracker entry and push it on there with all the info. - if listMarkerIndex > lastListEntry.index { - name = lastListEntry.name + "." + name - newEntry := bulletListEntry{ - name: name, - index: listMarkerIndex, - } - return append(tracker, newEntry) - } - // if current line's listMarkerIndex is the same as the last entry's, we're at the same level. - if listMarkerIndex == lastListEntry.index { - // Replace the last entry in our tracker - replaceEntry := bulletListEntry{ - index: listMarkerIndex, - } - if len(tracker) == 1 { - replaceEntry.name = name - } else { - // use the penultimate entry name to build current name - replaceName := tracker[(len(tracker)-2)].name + "." + name - replaceEntry.name = replaceName - } - return append(tracker[:len(tracker)-1], replaceEntry) - } - - // The current line's listMarkerIndex is smaller that the previous entry's. - // Pop off the latest entry, and retry to see if the next previous entry is a match. - return trackBulletListIndentation(line, name, tracker[:len(tracker)-1]) -} - -// parseArgFromMarkdownLine takes a line of Markdown and attempts to parse it for a Terraform argument and its -// description. It returns a struct containing the name and description of the arg, and whether an arg was found. -func parseArgFromMarkdownLine(line string) markdownLineInfo { - matches := argumentBulletRegexp.FindStringSubmatch(line) - var parsed markdownLineInfo - if len(matches) > 4 { - parsed.name = matches[1] - parsed.desc = matches[4] - parsed.isFound = true - } - return parsed -} - -var genericNestedRegexp = regexp.MustCompile("supports? the following:") - var nestedObjectRegexps = []*regexp.Regexp{ // For example: // s3_bucket.html.markdown: "The `website` object supports the following:" @@ -1022,106 +934,138 @@ func getNestedBlockNames(line string) []string { } func parseArgReferenceSection(subsection []string, ret *entityDocs) { - // Variable to remember the last argument we found. - var lastMatch string - // Collection to hold all arguments that headline a nested description. - var nesteds []docsPath - - addNewHeading := func(name, desc, line string) { - // found a property bullet, extract the name and description - if len(nesteds) > 0 { - for _, nested := range nesteds { - // We found this line within a nested field. We should record it as such. - if ret.Arguments[nested] == nil { - totalArgumentsFromDocs++ - } - ret.Arguments[nested.join(name)] = &argumentDocs{desc} - } - } else { - if genericNestedRegexp.MatchString(line) { - return + // Treat our subsection as a markdown node. This will later just be a node. + docBytes := []byte(strings.Join(subsection, "\n")) + + // Parse the document using Goldmark parser + gm := goldmark.New(goldmark.WithExtensions(parse.TFRegistryExtension)) + astNode := gm.Parser().Parse(gmtext.NewReader(docBytes)) + + var paths []string + var writeList bool // tracking whether we need to write a list verbatim + err := gmast.Walk(astNode, func(node gmast.Node, enter bool) (gmast.WalkStatus, error) { + // When we find a list item, we check if it is an argument entry. + if node.Kind().String() == "ListItem" { + if enter { + // For any list item, we want to check if it opens with a code span. + // It will be list item --> Text --> Code Span, so the grandchild of the list item. + codeSpanNode := node.FirstChild().FirstChild() + if codeSpanNode.Kind().String() == "CodeSpan" { + codeSpanItem := codeSpanNode.Text(docBytes) + + // The list item's first child is a text block. + // For most of our entries, this is all we need. + desc := writeLines(node.FirstChild().Lines(), docBytes) + + // To see if we have a TF name, use a regex match. + // The submatch looks for patterns such as + // + // `follow_gae_application` - (Optional) A GAE application whose zone to remain" + descs := descriptionRegexp.FindStringSubmatch(desc) + if len(descs) <= 4 { + writeList = true + } + + // add to docspaths if writeList is false + if !writeList { + paths = addPaths(paths, codeSpanItem) + } + // Read results into the return argument docs. When we're reading subfields for multiple fields, + // the description is still the same as discovered from the node's lines. + for _, path := range paths { + if !writeList { + ret.Arguments[docsPath(path)] = &argumentDocs{descs[4]} + } else { + // We need to write the entire list item into the description. + // We'll just append each list item as it is visited. + currentDesc := ret.Arguments[docsPath(path)].description + newDesc := currentDesc + "\n* " + desc + ret.Arguments[docsPath(path)] = &argumentDocs{newDesc} + } + } + } + } else { + if !writeList { + paths = cutPaths(paths) + } } - ret.Arguments[docsPath(name)] = &argumentDocs{description: desc} - totalArgumentsFromDocs++ - } - } - // This function adds the current line as a description to the last matched resource, - //in cases where there's no resource match found on this line. - //It represents a multi-line description for a field. - extendExistingHeading := func(line string) { - if len(nesteds) > 0 { - for _, nested := range nesteds { - line = "\n" + strings.TrimSpace(line) - ret.Arguments[nested.join(lastMatch)].description += line + } + if node.Kind().String() == "Section" { + writeList = false + // A Section's first child is its heading. + // In this part of the upstream document, a heading generally means a subresource name. + if enter { + // The text next to an arg reference's section header is assumed to be a resource field. + headerItem := node.FirstChild().Text(docBytes) + // add to docs paths + paths = addPaths(paths, headerItem) + } else { + paths = cutPaths(paths) } - } else { - if genericNestedRegexp.MatchString(line) { - lastMatch = "" - nesteds = []docsPath{} - return + } + // Additionally, there are top-level paragraphs that can contain information about nested docs, + // such as "The `foo_bar` object supports the following:". + if node.Kind().String() == "Paragraph" && node.Parent().Kind().String() == "Document" { + writeList = false + if enter { + // All of the fields mentioned in paragraphs can be treated as top-level, i.e. + // they're of the format "(The) `foo` [field|resource] supports the following:", or they already + // include the nested path as in "(The) `foo.bar` [field|resource] supports the following:". + // This means that at any detection of a top-level Paragraph node, we re-set the docsPath slice to empty. + paths = []string{} + paragraph := writeLines(node.Lines(), docBytes) + // Check if our paragraph matches any of the nested object signifiers. See `nestedObjectRegexps`. + nestedBlockNames := getNestedBlockNames(paragraph) + if len(nestedBlockNames) > 0 { + // write to docspath + paths = nestedBlockNames + } + } else { + // Because descriptions nested under paragraphs are not children, but rather siblings, + // we do not manipulate the docspath level at this point. Continue walking. + return gmast.WalkContinue, nil } - line = "\n" + strings.TrimSpace(line) - ret.Arguments[docsPath(lastMatch)].description += line } - } - - // hadSpace tells us if the previous line was blank. - var hadSpace bool + return gmast.WalkContinue, nil + }) + contract.AssertNoErrorf(err, "Cannot fail to parse argument reference") +} - // bulletListTracker is a stack-like collection that tracks the level of nesting for a bulleted list with - // nested lists. The name of the topmost entry represents the nested docs path for the current line. - var bulletListTracker []bulletListEntry +func writeLines(lines *gmtext.Segments, docBytes []byte) string { + var desc bytes.Buffer + for i := 0; i < lines.Len(); i++ { + line := lines.At(i) + desc.Write(line.Value(docBytes)) + } + return desc.String() +} - for _, line := range subsection { - parsedArg := parseArgFromMarkdownLine(line) - matchFound := parsedArg.isFound - if matchFound { // We have found a new property bullet point. - desc := parsedArg.desc - bulletListTracker = trackBulletListIndentation(line, parsedArg.name, bulletListTracker) - name := bulletListTracker[len(bulletListTracker)-1].name - lastMatch = name - addNewHeading(name, desc, line) - - } else if strings.TrimSpace(line) == "---" { - // --- is a markdown section break. This probably indicates the - // section is over, but we take it to mean that the current - // heading is over. - lastMatch = "" - bulletListTracker = nil - } else if nestedBlockCurrentLine := getNestedBlockNames(line); hadSpace && len(nestedBlockCurrentLine) > 0 { - // This tells us if there's a resource that is about to have subfields (nesteds) - // in subsequent lines. - //empty nesteds - nesteds = []docsPath{} - for _, item := range nestedBlockCurrentLine { - nesteds = append(nesteds, docsPath(item)) - } - lastMatch = "" - bulletListTracker = nil - } else if !isBlank(line) && lastMatch != "" { - // This appends the current line to the previous match's description. - extendExistingHeading(line) - - } else if nestedBlockCurrentLine := getNestedBlockNames(line); len(nestedBlockCurrentLine) > 0 { - // This tells us if there's a resource that is about to have subfields (nesteds) - // in subsequent lines. - //empty nesteds - nesteds = []docsPath{} - for _, item := range nestedBlockCurrentLine { - nesteds = append(nesteds, docsPath(item)) - } - lastMatch = "" - bulletListTracker = nil - } else if lastMatch != "" { - extendExistingHeading(line) +func cutPaths(paths []string) []string { + var newpaths []string + for _, p := range paths { + pathIndex := strings.LastIndex( + p, ".") + if pathIndex > 0 { + p = p[:pathIndex] + newpaths = append(newpaths, p) } - hadSpace = isBlank(line) } + return newpaths +} - for _, v := range ret.Arguments { - v.description = strings.TrimRightFunc(v.description, unicode.IsSpace) +func addPaths(paths []string, pathSection []byte) []string { + if len(paths) == 0 { + paths = append(paths, string(pathSection)) + } else { + var newPaths []string + for _, p := range paths { + p = p + "." + string(pathSection) + newPaths = append(newPaths, p) + } + paths = newPaths } + return paths } func parseAttributesReferenceSection(subsection []string, ret *entityDocs) { diff --git a/pkg/tfgen/docs_test.go b/pkg/tfgen/docs_test.go index 6c7c7952a..6850873e2 100644 --- a/pkg/tfgen/docs_test.go +++ b/pkg/tfgen/docs_test.go @@ -155,6 +155,41 @@ func TestArgumentRegex(t *testing.T) { }, }, }, + { + name: "Parses multiple nested arguments via `object supports the following`", + input: []string{ + "The `token_configuration` object supports the following:", + "", + "* `audience` - (Optional) A list of the intended recipients of the token.", + "* `issuer` - (Optional) The base domain of the identity provider that issues the token", + "", + "The `jwt_configuration` object supports the following:", + "", + "* `audience` - (Optional) A list of the intended recipients of the JWT. A valid JWT must provide an aud that matches at least one entry in this list.", + "* `issuer` - (Optional) The base domain of the identity provider that issues JSON Web Tokens, such as the `endpoint` attribute of the [`aws_cognito_user_pool`](/docs/providers/aws/r/cognito_user_pool.html) resource.", + "", + " this is a new paragraph", + "", + "* `top_level_field` - (Required) This field's docspath should not be nested under the previous paragraph.", + }, + expected: map[docsPath]*argumentDocs{ + "token_configuration.audience": { + description: "A list of the intended recipients of the token."}, + "token_configuration.issuer": { + description: "The base domain of the identity provider that issues the token", + }, + + "jwt_configuration.audience": { + description: "A list of the intended recipients of the JWT. A valid JWT must provide an aud that matches at least one entry in this list.", + }, + "jwt_configuration.issuer": { + description: "The base domain of the identity provider that issues JSON Web Tokens, such as the `endpoint` attribute of the [`aws_cognito_user_pool`](/docs/providers/aws/r/cognito_user_pool.html) resource.", + }, + "top_level_field": { + description: "This field's docspath should not be nested under the previous paragraph.", + }, + }, + }, { name: "Renders ~> **NOTE:** and continues parsing as expected", input: []string{ @@ -231,7 +266,7 @@ func TestArgumentRegex(t *testing.T) { "* `retention_policy` - (Required) A `retention_policy` block as documented below.", "", "---", - "* `retention_policy` supports the following:", + "`retention_policy` supports the following:", }, expected: map[docsPath]*argumentDocs{ "retention_policy": { @@ -283,7 +318,7 @@ func TestArgumentRegex(t *testing.T) { description: "Indicates how to allocate the target capacity across\nthe Spot pools specified by the Spot fleet request. The default is\n`lowestPrice`.", }, "instance_pools_to_use_count": { - description: "\nThe number of Spot pools across which to allocate your target Spot capacity.\nValid only when `allocation_strategy` is set to `lowestPrice`. Spot Fleet selects\nthe cheapest Spot pools and evenly allocates your target Spot capacity across\nthe number of Spot pools that you specify.", + description: "The number of Spot pools across which to allocate your target Spot capacity.\nValid only when `allocation_strategy` is set to `lowestPrice`. Spot Fleet selects\nthe cheapest Spot pools and evenly allocates your target Spot capacity across\nthe number of Spot pools that you specify.", }, }, }, @@ -355,11 +390,11 @@ func TestArgumentRegex(t *testing.T) { }, expected: map[docsPath]*argumentDocs{ - "certificate_authority_configuration.key_algorithm": {description: "Type of the public key algorithm and size, in bits, of the key pair that your key pair creates when it issues a certificate. Valid values can be found in the [ACM PCA Documentation](https://docs.aws.amazon.com/privateca/latest/APIReference/API_CertificateAuthorityConfiguration.html)."}, - "certificate_authority_configuration.signing_algorithm": {description: "Name of the algorithm your private CA uses to sign certificate requests. Valid values can be found in the [ACM PCA Documentation](https://docs.aws.amazon.com/privateca/latest/APIReference/API_CertificateAuthorityConfiguration.html)."}, - "certificate_authority_configuration.subject": {description: "Nested argument that contains X.500 distinguished name information. At least one nested attribute must be specified."}, - "subject.common_name": {description: "Fully qualified domain name (FQDN) associated with the certificate subject. Must be less than or equal to 64 characters in length."}, - "subject.country": {description: "Two digit code that specifies the country in which the certificate subject located. Must be less than or equal to 2 characters in length."}, + "certificate_authority_configuration.key_algorithm": {description: "Type of the public key algorithm and size, in bits, of the key pair that your key pair creates when it issues a certificate. Valid values can be found in the [ACM PCA Documentation](https://docs.aws.amazon.com/privateca/latest/APIReference/API_CertificateAuthorityConfiguration.html)."}, + "certificate_authority_configuration.signing_algorithm": {description: "Name of the algorithm your private CA uses to sign certificate requests. Valid values can be found in the [ACM PCA Documentation](https://docs.aws.amazon.com/privateca/latest/APIReference/API_CertificateAuthorityConfiguration.html)."}, + "certificate_authority_configuration.subject": {description: "Nested argument that contains X.500 distinguished name information. At least one nested attribute must be specified."}, + "certificate_authority_configuration.subject.common_name": {description: "Fully qualified domain name (FQDN) associated with the certificate subject. Must be less than or equal to 64 characters in length."}, + "certificate_authority_configuration.subject.country": {description: "Two digit code that specifies the country in which the certificate subject located. Must be less than or equal to 2 characters in length."}, }, }, { @@ -487,8 +522,8 @@ func TestArgumentRegex(t *testing.T) { }, expected: map[docsPath]*argumentDocs{ "node_pool_config": {description: "The configuration for the GKE node pool. \nIf specified, " + - "Dataproc attempts to create a node pool with the specified shape.\nIf one with the same name " + - "already exists, it is verified against all specified fields.\nIf a field differs, the virtual " + + "Dataproc attempts to create a node pool with the specified shape. \nIf one with the same name " + + "already exists, it is verified against all specified fields. \nIf a field differs, the virtual " + "cluster creation will fail.", }, }, @@ -560,7 +595,6 @@ func TestArgumentRegex(t *testing.T) { "settings.maintenance_window.day": {description: "Day of week (`1-7`), starting on Monday"}, }, }, - { name: "All caps bullet points are not parsed as TF properties", input: []string{ @@ -1534,37 +1568,6 @@ FooFactory fooFactory = new FooFactory(); assert.Equal(t, buf.String(), hclConversionsToString(input)) } -func TestParseArgFromMarkdownLine(t *testing.T) { - //nolint:lll - tests := []struct { - input string - expectedName string - expectedDesc string - expectedFound bool - }{ - {"* `name` - (Required) A unique name to give the role.", "name", "A unique name to give the role.", true}, - {"* `key_vault_key_id` - (Optional) The Key Vault key URI for CMK encryption. Changing this forces a new resource to be created.", "key_vault_key_id", "The Key Vault key URI for CMK encryption. Changing this forces a new resource to be created.", true}, - {"* `urn` - The uniform resource name of the Droplet", "urn", "The uniform resource name of the Droplet", true}, - {"* `name`- The name of the Droplet", "name", "The name of the Droplet", true}, - {"* `jumbo_frame_capable` -Indicates whether jumbo frames (9001 MTU) are supported.", "jumbo_frame_capable", "Indicates whether jumbo frames (9001 MTU) are supported.", true}, - {"* `ssl_support_method`: Specifies how you want CloudFront to serve HTTPS", "ssl_support_method", "Specifies how you want CloudFront to serve HTTPS", true}, - {"* `principal_tags`: (Optional: []) - String to string map of variables.", "principal_tags", "String to string map of variables.", true}, - {" * `id` - The id of the property", "id", "The id of the property", true}, - {" * id - The id of the property", "", "", false}, - //In rare cases, we may have a match where description is empty like the following, taken from https://github.com/hashicorp/terraform-provider-aws/blob/main/website/docs/r/spot_fleet_request.html.markdown - {"* `instance_pools_to_use_count` - (Optional; Default: 1)", "instance_pools_to_use_count", "", true}, - {"", "", "", false}, - {"Most of these arguments directly correspond to the", "", "", false}, - } - - for _, test := range tests { - parsedLine := parseArgFromMarkdownLine(test.input) - assert.Equal(t, test.expectedName, parsedLine.name) - assert.Equal(t, test.expectedDesc, parsedLine.desc) - assert.Equal(t, test.expectedFound, parsedLine.isFound) - } -} - func TestParseAttributesReferenceSection(t *testing.T) { ret := entityDocs{ Arguments: make(map[docsPath]*argumentDocs), diff --git a/pkg/tfgen/test_data/azurerm-sql-firewall-rule/expected.json b/pkg/tfgen/test_data/azurerm-sql-firewall-rule/expected.json index 4d9bdd0fc..ea802493f 100644 --- a/pkg/tfgen/test_data/azurerm-sql-firewall-rule/expected.json +++ b/pkg/tfgen/test_data/azurerm-sql-firewall-rule/expected.json @@ -2,8 +2,7 @@ "Description": "Allows you to manage an Azure SQL Firewall Rule.\n\n\u003e **Note:** The `azurerm_sql_firewall_rule` resource is deprecated in version 3.0 of the AzureRM provider and will be removed in version 4.0. Please use the `azurerm_mssql_firewall_rule` resource instead.\n\n## Example Usage\n\n```hcl\nresource \"azurerm_resource_group\" \"example\" {\n name = \"example-resources\"\n location = \"West Europe\"\n}\n\nresource \"azurerm_sql_server\" \"example\" {\n name = \"mysqlserver\"\n resource_group_name = azurerm_resource_group.example.name\n location = azurerm_resource_group.example.location\n version = \"12.0\"\n administrator_login = \"4dm1n157r470r\"\n administrator_login_password = \"4-v3ry-53cr37-p455w0rd\"\n}\n\nresource \"azurerm_sql_firewall_rule\" \"example\" {\n name = \"FirewallRule1\"\n resource_group_name = azurerm_resource_group.example.name\n server_name = azurerm_sql_server.example.name\n start_ip_address = \"10.0.17.62\"\n end_ip_address = \"10.0.17.62\"\n}\n```", "Arguments": { "end_ip_address": { - "description": "The ending IP address to allow through the firewall for this rule.\n\n\u003e **NOTE:** The Azure feature `Allow access to Azure services` can be enabled by setting `start_ip_address` and `end_ip_address` to `0.0.0.0` which ([is documented in the Azure API Docs](https://docs.microsoft.com/rest/api/sql/firewallrules/createorupdate))." - }, + "description": "The ending IP address to allow through the firewall for this rule."}, "name": { "description": "The name of the firewall rule. Changing this forces a new resource to be created." },