Add autoID for definition terms

Fixes #13403
See #11566

Co-authored-by: Joe Mooring <joe@mooring.com>
This commit is contained in:
Bjørn Erik Pedersen 2025-02-15 17:13:20 +01:00
parent 9c2f8ec61b
commit 157d3703c3
9 changed files with 262 additions and 47 deletions

View file

@ -26,6 +26,7 @@ import (
"github.com/gohugoio/hugo/common/text"
"github.com/yuin/goldmark/ast"
east "github.com/yuin/goldmark/extension/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/util"
@ -43,11 +44,11 @@ func sanitizeAnchorName(b []byte, idType string) []byte {
func sanitizeAnchorNameWithHook(b []byte, idType string, hook func(buf *bytes.Buffer)) []byte {
buf := bp.GetBuffer()
if idType == goldmark_config.AutoHeadingIDTypeBlackfriday {
if idType == goldmark_config.AutoIDTypeBlackfriday {
// TODO(bep) make it more efficient.
buf.WriteString(blackfriday.SanitizedAnchorName(string(b)))
} else {
asciiOnly := idType == goldmark_config.AutoHeadingIDTypeGitHubAscii
asciiOnly := idType == goldmark_config.AutoIDTypeGitHubAscii
if asciiOnly {
// Normalize it to preserve accents if possible.
@ -90,8 +91,9 @@ func isAlphaNumeric(r rune) bool {
var _ parser.IDs = (*idFactory)(nil)
type idFactory struct {
idType string
vals map[string]struct{}
idType string
vals map[string]struct{}
duplicates []string
}
func newIDFactory(idType string) *idFactory {
@ -101,11 +103,28 @@ func newIDFactory(idType string) *idFactory {
}
}
type stringValuesProvider interface {
StringValues() []string
}
var _ stringValuesProvider = (*idFactory)(nil)
func (ids *idFactory) StringValues() []string {
values := make([]string, 0, len(ids.vals))
for k := range ids.vals {
values = append(values, k)
}
values = append(values, ids.duplicates...)
return values
}
func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
return sanitizeAnchorNameWithHook(value, ids.idType, func(buf *bytes.Buffer) {
if buf.Len() == 0 {
if kind == ast.KindHeading {
buf.WriteString("heading")
} else if kind == east.KindDefinitionTerm {
buf.WriteString("term")
} else {
buf.WriteString("id")
}
@ -123,11 +142,18 @@ func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
buf.Truncate(pos)
}
}
ids.vals[buf.String()] = struct{}{}
ids.put(buf.String())
})
}
func (ids *idFactory) Put(value []byte) {
ids.vals[util.BytesToReadOnlyString(value)] = struct{}{}
func (ids *idFactory) put(s string) {
if _, found := ids.vals[s]; found {
ids.duplicates = append(ids.duplicates, s)
} else {
ids.vals[s] = struct{}{}
}
}
func (ids *idFactory) Put(value []byte) {
ids.put(string(value))
}

View file

@ -78,9 +78,9 @@ tabspace
expect := expectlines[i]
c.Run(input, func(c *qt.C) {
b := []byte(input)
got := string(sanitizeAnchorName(b, goldmark_config.AutoHeadingIDTypeGitHub))
got := string(sanitizeAnchorName(b, goldmark_config.AutoIDTypeGitHub))
c.Assert(got, qt.Equals, expect)
c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub), qt.Equals, expect)
c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoIDTypeGitHub), qt.Equals, expect)
c.Assert(string(b), qt.Equals, input)
})
}
@ -89,20 +89,20 @@ tabspace
func TestSanitizeAnchorNameAsciiOnly(t *testing.T) {
c := qt.New(t)
c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "god-is-good")
c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "resume")
c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoIDTypeGitHubAscii), qt.Equals, "god-is-good")
c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoIDTypeGitHubAscii), qt.Equals, "resume")
}
func TestSanitizeAnchorNameBlackfriday(t *testing.T) {
c := qt.New(t)
c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoHeadingIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we")
c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we")
}
func BenchmarkSanitizeAnchorName(b *testing.B) {
input := []byte("God is good: 神真美好")
b.ResetTimer()
for i := 0; i < b.N; i++ {
result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHub)
result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeGitHub)
if len(result) != 24 {
b.Fatalf("got %d", len(result))
}
@ -113,7 +113,7 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) {
input := []byte("God is good: 神真美好")
b.ResetTimer()
for i := 0; i < b.N; i++ {
result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHubAscii)
result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeGitHubAscii)
if len(result) != 12 {
b.Fatalf("got %d", len(result))
}
@ -124,7 +124,7 @@ func BenchmarkSanitizeAnchorNameBlackfriday(b *testing.B) {
input := []byte("God is good: 神真美好")
b.ResetTimer()
for i := 0; i < b.N; i++ {
result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeBlackfriday)
result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeBlackfriday)
if len(result) != 24 {
b.Fatalf("got %d", len(result))
}
@ -135,7 +135,7 @@ func BenchmarkSanitizeAnchorNameString(b *testing.B) {
input := "God is good: 神真美好"
b.ResetTimer()
for i := 0; i < b.N; i++ {
result := sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub)
result := sanitizeAnchorNameString(input, goldmark_config.AutoIDTypeGitHub)
if len(result) != 24 {
b.Fatalf("got %d", len(result))
}

View file

@ -61,7 +61,7 @@ func (p provide) New(cfg converter.ProviderConfig) (converter.Provider, error) {
cfg: cfg,
md: md,
sanitizeAnchorName: func(s string) string {
return sanitizeAnchorNameString(s, cfg.MarkupConfig().Goldmark.Parser.AutoHeadingIDType)
return sanitizeAnchorNameString(s, cfg.MarkupConfig().Goldmark.Parser.AutoIDType)
},
}, nil
}), nil
@ -188,16 +188,12 @@ func newMarkdown(pcfg converter.ProviderConfig) goldmark.Markdown {
extensions = append(extensions, emoji.Emoji)
}
if cfg.Parser.AutoHeadingID {
parserOptions = append(parserOptions, parser.WithAutoHeadingID())
}
if cfg.Parser.Attribute.Title {
parserOptions = append(parserOptions, parser.WithAttribute())
}
if cfg.Parser.Attribute.Block {
extensions = append(extensions, attributes.New())
if cfg.Parser.Attribute.Block || cfg.Parser.AutoHeadingID || cfg.Parser.AutoDefinitionTermID {
extensions = append(extensions, attributes.New(cfg.Parser))
}
md := goldmark.New(
@ -295,7 +291,7 @@ func (c *goldmarkConverter) Convert(ctx converter.RenderContext) (converter.Resu
}
func (c *goldmarkConverter) newParserContext(rctx converter.RenderContext) *parserContext {
ctx := parser.NewContext(parser.WithIDs(newIDFactory(c.cfg.MarkupConfig().Goldmark.Parser.AutoHeadingIDType)))
ctx := parser.NewContext(parser.WithIDs(newIDFactory(c.cfg.MarkupConfig().Goldmark.Parser.AutoIDType)))
ctx.Set(tocEnableKey, rctx.RenderTOC)
return &parserContext{
Context: ctx,

View file

@ -15,9 +15,9 @@
package goldmark_config
const (
AutoHeadingIDTypeGitHub = "github"
AutoHeadingIDTypeGitHubAscii = "github-ascii"
AutoHeadingIDTypeBlackfriday = "blackfriday"
AutoIDTypeGitHub = "github"
AutoIDTypeGitHubAscii = "github-ascii"
AutoIDTypeBlackfriday = "blackfriday"
)
// Default holds the default Goldmark configuration.
@ -79,7 +79,8 @@ var Default = Config{
},
Parser: Parser{
AutoHeadingID: true,
AutoHeadingIDType: AutoHeadingIDTypeGitHub,
AutoDefinitionTermID: false,
AutoIDType: AutoIDTypeGitHub,
WrapStandAloneImageWithinParagraph: true,
Attribute: ParserAttribute{
Title: true,
@ -97,6 +98,16 @@ type Config struct {
RenderHooks RenderHooks
}
func (c *Config) Init() error {
if err := c.Parser.Init(); err != nil {
return err
}
if c.Parser.AutoDefinitionTermID && !c.Extensions.DefinitionList {
c.Parser.AutoDefinitionTermID = false
}
return nil
}
// RenderHooks contains configuration for Goldmark render hooks.
type RenderHooks struct {
Image ImageRenderHook
@ -250,16 +261,30 @@ type Parser struct {
// auto generated heading ids.
AutoHeadingID bool
// The strategy to use when generating heading IDs.
// Available options are "github", "github-ascii".
// Enables auto definition term ids.
AutoDefinitionTermID bool
// The strategy to use when generating IDs.
// Available options are "github", "github-ascii", and "blackfriday".
// Default is "github", which will create GitHub-compatible anchor names.
AutoHeadingIDType string
AutoIDType string
// Enables custom attributes.
Attribute ParserAttribute
// Whether to wrap stand-alone images within a paragraph or not.
WrapStandAloneImageWithinParagraph bool
// Renamed to AutoIDType in 0.144.0.
AutoHeadingIDType string `json:"-"`
}
func (p *Parser) Init() error {
// Renamed from AutoHeadingIDType to AutoIDType in 0.144.0.
if p.AutoHeadingIDType != "" {
p.AutoIDType = p.AutoHeadingIDType
}
return nil
}
type ParserAttribute struct {

View file

@ -1,8 +1,11 @@
package attributes
import (
"github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
"github.com/gohugoio/hugo/markup/goldmark/internal/render"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
east "github.com/yuin/goldmark/extension/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
@ -14,24 +17,29 @@ import (
var (
kindAttributesBlock = ast.NewNodeKind("AttributesBlock")
attrNameID = []byte("id")
defaultParser = new(attrParser)
defaultTransformer = new(transformer)
attributes goldmark.Extender = new(attrExtension)
defaultParser = new(attrParser)
)
func New() goldmark.Extender {
return attributes
func New(cfg goldmark_config.Parser) goldmark.Extender {
return &attrExtension{cfg: cfg}
}
type attrExtension struct{}
type attrExtension struct {
cfg goldmark_config.Parser
}
func (a *attrExtension) Extend(m goldmark.Markdown) {
if a.cfg.Attribute.Block {
m.Parser().AddOptions(
parser.WithBlockParsers(
util.Prioritized(defaultParser, 100)),
)
}
m.Parser().AddOptions(
parser.WithBlockParsers(
util.Prioritized(defaultParser, 100)),
parser.WithASTTransformers(
util.Prioritized(defaultTransformer, 100),
util.Prioritized(&transformer{cfg: a.cfg}, 100),
),
)
}
@ -92,18 +100,47 @@ func (a *attributesBlock) Kind() ast.NodeKind {
return kindAttributesBlock
}
type transformer struct{}
type transformer struct {
cfg goldmark_config.Parser
}
func (a *transformer) isFragmentNode(n ast.Node) bool {
switch n.Kind() {
case east.KindDefinitionTerm, ast.KindHeading:
return true
default:
return false
}
}
func (a *transformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
attributes := make([]ast.Node, 0, 500)
var attributes []ast.Node
if a.cfg.Attribute.Block {
attributes = make([]ast.Node, 0, 500)
}
ast.Walk(node, func(node ast.Node, entering bool) (ast.WalkStatus, error) {
if entering && node.Kind() == kindAttributesBlock {
if !entering {
return ast.WalkContinue, nil
}
if a.isFragmentNode(node) {
if id, found := node.Attribute(attrNameID); !found {
a.generateAutoID(node, reader, pc)
} else {
pc.IDs().Put(id.([]byte))
}
}
if a.cfg.Attribute.Block && node.Kind() == kindAttributesBlock {
// Attributes for fenced code blocks are handled in their own extension,
// but note that we currently only support code block attributes when
// CodeFences=true.
if node.PreviousSibling() != nil && node.PreviousSibling().Kind() != ast.KindFencedCodeBlock && !node.HasBlankPreviousLines() {
attributes = append(attributes, node)
return ast.WalkSkipChildren, nil
} else {
// remove attributes node
node.Parent().RemoveChild(node.Parent(), node)
}
}
@ -123,3 +160,33 @@ func (a *transformer) Transform(node *ast.Document, reader text.Reader, pc parse
attr.Parent().RemoveChild(attr.Parent(), attr)
}
}
func (a *transformer) generateAutoID(n ast.Node, reader text.Reader, pc parser.Context) {
var text []byte
switch n := n.(type) {
case *ast.Heading:
if a.cfg.AutoHeadingID {
text = textHeadingID(n, reader)
}
case *east.DefinitionTerm:
if a.cfg.AutoDefinitionTermID {
text = []byte(render.TextPlain(n, reader.Source()))
}
}
if len(text) > 0 {
headingID := pc.IDs().Generate(text, n.Kind())
n.SetAttribute(attrNameID, headingID)
}
}
// Markdown settext headers can have multiple lines, use the last line for the ID.
func textHeadingID(node *ast.Heading, reader text.Reader) []byte {
var line []byte
lastIndex := node.Lines().Len() - 1
if lastIndex > -1 {
lastLine := node.Lines().At(lastIndex)
line = lastLine.Value(reader.Source())
}
return line
}

View file

@ -0,0 +1,74 @@
package attributes_test
import (
"testing"
"github.com/gohugoio/hugo/hugolib"
)
func TestDescriptionListAutoID(t *testing.T) {
t.Parallel()
files := `
-- hugo.toml --
[markup.goldmark.parser]
autoHeadingID = true
autoDefinitionTermID = true
autoIDType = 'github-ascii'
-- content/p1.md --
---
title: "Title"
---
## Title with id set {#title-with-id}
## Title with id set duplicate {#title-with-id}
## My Title
Base Name
: Base name of the file.
Base Name
: Duplicate term name.
My Title
: Term with same name as title.
Foo@Bar
: The foo bar.
foo [something](/a/b/) bar
: A foo bar.
良善天父
: The good father.
Ā ā Ă ă Ą ą Ć ć Ĉ ĉ Ċ ċ Č č Ď
: Testing accents.
Mutiline set text header
Second line
---------------
-- layouts/_default/single.html --
{{ .Content }}|Identifiers: {{ .Fragments.Identifiers }}|
`
b := hugolib.Test(t, files)
b.AssertFileContent("public/p1/index.html",
`<dt id="base-name">Base Name</dt>`,
`<dt id="base-name-1">Base Name</dt>`,
`<dt id="foobar">Foo@Bar</dt>`,
`<h2 id="my-title">My Title</h2>`,
`<dt id="foo-something-bar">foo <a href="/a/b/">something</a> bar</dt>`,
`<h2 id="title-with-id">Title with id set</h2>`,
`<h2 id="title-with-id">Title with id set duplicate</h2>`,
`<dt id="my-title-1">My Title</dt>`,
`<dt id="term">良善天父</dt>`,
`<dt id="a-a-a-a-a-a-c-c-c-c-c-c-c-c-d">Ā ā Ă ă Ą ą Ć ć Ĉ ĉ Ċ ċ Č č Ď</dt>`,
`<h2 id="second-line">Mutiline set text header`,
"|Identifiers: [a-a-a-a-a-a-c-c-c-c-c-c-c-c-d base-name base-name-1 foo-something-bar foobar my-title my-title-1 second-line term title-with-id title-with-id]|",
)
}

View file

@ -53,6 +53,10 @@ func (t *tocTransformer) Transform(n *ast.Document, reader text.Reader, pc parse
headingText bytes.Buffer
)
if ids := pc.IDs().(stringValuesProvider).StringValues(); len(ids) > 0 {
toc.SetIdentifiers(ids)
}
ast.Walk(n, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
s := ast.WalkStatus(ast.WalkContinue)
if n.Kind() == ast.KindHeading {
@ -131,5 +135,7 @@ func (e *tocExtension) Extend(m goldmark.Markdown) {
r.AddOptions(e.options...)
m.Parser().AddOptions(parser.WithASTTransformers(util.Prioritized(&tocTransformer{
r: r,
}, 10)))
},
// This must run after the ID generation (priority 100).
110)))
}

View file

@ -41,6 +41,10 @@ type Config struct {
AsciidocExt asciidocext_config.Config
}
func (c *Config) Init() error {
return c.Goldmark.Init()
}
func Decode(cfg config.Provider) (conf Config, err error) {
conf = Default
@ -57,6 +61,10 @@ func Decode(cfg config.Provider) (conf Config, err error) {
return
}
if err = conf.Init(); err != nil {
return
}
if err = highlight.ApplyLegacyConfig(cfg, &conf.Highlight); err != nil {
return
}

View file

@ -31,7 +31,8 @@ var Empty = &Fragments{
// Builder is used to build the ToC data structure.
type Builder struct {
toc *Fragments
identifiersSet bool
toc *Fragments
}
// AddAt adds the heading to the ToC.
@ -42,6 +43,16 @@ func (b *Builder) AddAt(h *Heading, row, level int) {
b.toc.addAt(h, row, level)
}
// SetIdentifiers sets the identifiers in the ToC.
func (b *Builder) SetIdentifiers(ids []string) {
if b.toc == nil {
b.toc = &Fragments{}
}
b.identifiersSet = true
sort.Strings(ids)
b.toc.Identifiers = ids
}
// Build returns the ToC.
func (b Builder) Build() *Fragments {
if b.toc == nil {
@ -51,7 +62,9 @@ func (b Builder) Build() *Fragments {
b.toc.walk(func(h *Heading) {
if h.ID != "" {
b.toc.HeadingsMap[h.ID] = h
b.toc.Identifiers = append(b.toc.Identifiers, h.ID)
if !b.identifiersSet {
b.toc.Identifiers = append(b.toc.Identifiers, h.ID)
}
}
})
sort.Strings(b.toc.Identifiers)