diff --git a/CHANGELOG.md b/CHANGELOG.md index 78474a7d..6a6262e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ An option to return also the empty found tokens was also added via `Tokenizer.KeepEmptyTokens(true)`. _This should fix the parsing of whitespace charactes around view query column names when no quotes are used ([#3616](https://github.com/pocketbase/pocketbase/discussions/3616#discussioncomment-7398564))._ +- Fixed the `:excerpt(max, withEllipsis?)` `field` query param modifier to properly add space to the generated text fragment after block tags. + ## v0.19.0 diff --git a/tools/rest/excerpt_modifier.go b/tools/rest/excerpt_modifier.go index 107a8fcf..07da1a06 100644 --- a/tools/rest/excerpt_modifier.go +++ b/tools/rest/excerpt_modifier.go @@ -78,8 +78,7 @@ func (m *excerptModifier) Modify(value any) (any, error) { return "", err } - var isNotEmpty bool - var needSpace bool + var hasPrevSpace bool // for all node types and more details check // https://pkg.go.dev/golang.org/x/net/html#Parse @@ -87,37 +86,47 @@ func (m *excerptModifier) Modify(value any) (any, error) { stripTags = func(n *html.Node) { switch n.Type { case html.TextNode: - if txt := strings.TrimSpace(whitespaceRegex.ReplaceAllString(n.Data, " ")); txt != "" { - if isNotEmpty && needSpace { - needSpace = false - builder.WriteString(" ") - } + // collapse multiple spaces into one + txt := whitespaceRegex.ReplaceAllString(n.Data, " ") + + if hasPrevSpace { + txt = strings.TrimLeft(txt, " ") + } + + if txt != "" { + hasPrevSpace = strings.HasSuffix(txt, " ") builder.WriteString(txt) - - if !isNotEmpty { - isNotEmpty = true - } - } - case html.ElementNode: - if !needSpace && !list.ExistInSlice(n.Data, inlineTags) { - needSpace = true } } - if builder.Len() > m.max { + // excerpt max has been reached => no need to further iterate + // (+2 for the extra whitespace suffix/prefix that will be trimmed later) + if builder.Len() > m.max+2 { return } for c := n.FirstChild; c != nil; c = c.NextSibling { if c.Type != html.ElementNode || !list.ExistInSlice(c.Data, excludeTags) { + isBlock := c.Type == html.ElementNode && !list.ExistInSlice(c.Data, inlineTags) + + if isBlock && !hasPrevSpace { + builder.WriteString(" ") + hasPrevSpace = true + } + stripTags(c) + + if isBlock && !hasPrevSpace { + builder.WriteString(" ") + hasPrevSpace = true + } } } } stripTags(doc) - result := builder.String() + result := strings.TrimSpace(builder.String()) if len(result) > m.max { result = strings.TrimSpace(result[:m.max]) diff --git a/tools/rest/excerpt_modifier_test.go b/tools/rest/excerpt_modifier_test.go index 67c69850..eedb25a8 100644 --- a/tools/rest/excerpt_modifier_test.go +++ b/tools/rest/excerpt_modifier_test.go @@ -86,9 +86,9 @@ func TestNewExcerptModifier(t *testing.T) { func TestExcerptModifierModify(t *testing.T) { // plain text value: "Hello t est12 3 word" html := `
Hello