* Pin dev-trunk to SVN revision and remove unpinnable dist (#68) Extract per-slug revision numbers from SVN changelog (already available in the DAV log response) and store as trunk_revision on each package. The build step now emits `"reference": "trunk@<rev>"` in ~dev.json, making composer.lock reproducible for dev-trunk installs. The unversioned dist URL is removed since it always points to latest trunk. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Add backfill-revisions command and sanitize SVN log XML New `backfill-revisions` command scans SVN changelog backwards in chunks to populate trunk_revision for plugins that don't have one yet. Retries failed chunks up to 3 times and skips on persistent failure. Also sanitize illegal XML characters from SVN log responses to handle malformed commit messages in older revisions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Update dev-trunk notices to reflect SVN revision pinning With trunk_revision support, Composer now pins dev-trunk installs to a specific SVN revision in composer.lock. Replace mutability warnings with messaging about revision pinning. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Add untagged plugin install comparison to vs page Highlight that WP Packages pins dev-trunk to SVN revisions for reproducible installs, while WPackagist uses mutable trunk zips. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Ben Word <ben@benword.com>
231 lines
6.4 KiB
Go
231 lines
6.4 KiB
Go
package wporg
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"encoding/xml"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net/http"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
type SVNEntry struct {
|
|
Slug string
|
|
LastCommitted *time.Time
|
|
}
|
|
|
|
// SVNListingResult holds the parsed SVN listing along with the repository revision.
|
|
type SVNListingResult struct {
|
|
Revision int64
|
|
}
|
|
|
|
// ParseSVNListing fetches the SVN HTML directory listing and extracts slugs.
|
|
// It also returns the current SVN revision from the page header.
|
|
func (c *Client) ParseSVNListing(ctx context.Context, baseURL string, fn func(SVNEntry) error) (*SVNListingResult, error) {
|
|
client := &http.Client{Timeout: 600 * time.Second}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL, nil)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("creating SVN request: %w", err)
|
|
}
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("fetching SVN listing: %w", err)
|
|
}
|
|
defer func() { _ = resp.Body.Close() }()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("SVN listing returned status %d", resp.StatusCode)
|
|
}
|
|
|
|
return parseSVNHTML(ctx, resp.Body, fn, c.logger)
|
|
}
|
|
|
|
func parseSVNHTML(ctx context.Context, r interface{ Read([]byte) (int, error) }, fn func(SVNEntry) error, logger *slog.Logger) (*SVNListingResult, error) {
|
|
scanner := bufio.NewScanner(r)
|
|
result := &SVNListingResult{}
|
|
|
|
var count int
|
|
for scanner.Scan() {
|
|
if ctx.Err() != nil {
|
|
return nil, ctx.Err()
|
|
}
|
|
|
|
line := scanner.Text()
|
|
|
|
// Extract revision from title: " - Revision 3483213: /"
|
|
if result.Revision == 0 {
|
|
if rev := parseSVNRevision(line); rev > 0 {
|
|
result.Revision = rev
|
|
}
|
|
}
|
|
|
|
// Each entry is: <li><a href="slug-name/">slug-name/</a></li>
|
|
idx := strings.Index(line, `<a href="`)
|
|
if idx < 0 {
|
|
continue
|
|
}
|
|
rest := line[idx+len(`<a href="`):]
|
|
end := strings.IndexByte(rest, '"')
|
|
if end < 0 {
|
|
continue
|
|
}
|
|
href := rest[:end]
|
|
|
|
slug := strings.TrimSuffix(href, "/")
|
|
if slug == "" || slug == ".." || strings.HasPrefix(slug, "!svn") {
|
|
continue
|
|
}
|
|
|
|
if err := fn(SVNEntry{Slug: slug}); err != nil {
|
|
return result, err
|
|
}
|
|
|
|
count++
|
|
if count%10000 == 0 {
|
|
logger.Info("SVN discovery progress", "entries", count)
|
|
}
|
|
}
|
|
|
|
if err := scanner.Err(); err != nil {
|
|
return nil, fmt.Errorf("reading SVN listing: %w", err)
|
|
}
|
|
|
|
logger.Info("SVN discovery complete", "total_entries", count, "revision", result.Revision)
|
|
return result, nil
|
|
}
|
|
|
|
// parseSVNRevision extracts a revision number from lines like:
|
|
//
|
|
// <title> - Revision 3483213: /</title>
|
|
// <h2> - Revision 3483213: /</h2>
|
|
func parseSVNRevision(line string) int64 {
|
|
const marker = "Revision "
|
|
idx := strings.Index(line, marker)
|
|
if idx < 0 {
|
|
return 0
|
|
}
|
|
rest := line[idx+len(marker):]
|
|
end := strings.IndexAny(rest, ":< ")
|
|
if end < 0 {
|
|
return 0
|
|
}
|
|
rev, err := strconv.ParseInt(rest[:end], 10, 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return rev
|
|
}
|
|
|
|
// svnLogReport is the XML structure for an SVN DAV log-report response.
|
|
type svnLogReport struct {
|
|
Items []svnLogItem `xml:"log-item"`
|
|
}
|
|
|
|
type svnLogItem struct {
|
|
Revision int64 `xml:"version-name"`
|
|
Date string `xml:"date"`
|
|
AddedPaths []string `xml:"added-path"`
|
|
ModifiedPaths []string `xml:"modified-path"`
|
|
DeletedPaths []string `xml:"deleted-path"`
|
|
}
|
|
|
|
// FetchSVNChangedSlugs queries the SVN DAV log between two revisions and returns
|
|
// a map of unique top-level slugs (plugin/theme names) to the highest SVN revision
|
|
// that touched them within the queried range.
|
|
func (c *Client) FetchSVNChangedSlugs(ctx context.Context, baseURL string, fromRev, toRev int64) (map[string]int64, error) {
|
|
body := fmt.Sprintf(`<?xml version="1.0" encoding="utf-8"?>`+
|
|
`<S:log-report xmlns:S="svn:" xmlns:D="DAV:">`+
|
|
`<S:start-revision>%d</S:start-revision>`+
|
|
`<S:end-revision>%d</S:end-revision>`+
|
|
`<S:discover-changed-paths/>`+
|
|
`<S:limit>50000</S:limit>`+
|
|
`</S:log-report>`, toRev, fromRev)
|
|
|
|
reqURL := strings.TrimSuffix(baseURL, "/") + "/!svn/bc/0/"
|
|
req, err := http.NewRequestWithContext(ctx, "REPORT", reqURL, strings.NewReader(body))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("creating SVN log request: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "text/xml")
|
|
|
|
// Use a generous timeout — catch-up runs after downtime can span many
|
|
// revisions and produce large responses.
|
|
davClient := &http.Client{Timeout: 600 * time.Second}
|
|
resp, err := davClient.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("fetching SVN log: %w", err)
|
|
}
|
|
defer func() { _ = resp.Body.Close() }()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
|
|
return nil, fmt.Errorf("SVN log returned status %d: %s", resp.StatusCode, string(respBody))
|
|
}
|
|
|
|
data, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("reading SVN log response: %w", err)
|
|
}
|
|
|
|
return parseSVNLogSlugs(data)
|
|
}
|
|
|
|
// sanitizeXML strips illegal XML 1.0 characters (control chars except tab, newline, carriage return).
|
|
func sanitizeXML(data []byte) []byte {
|
|
out := make([]byte, 0, len(data))
|
|
for _, b := range data {
|
|
if b == 0x09 || b == 0x0A || b == 0x0D || b >= 0x20 {
|
|
out = append(out, b)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// parseSVNLogSlugs extracts unique top-level slugs from SVN log XML and maps
|
|
// each slug to the highest revision that touched it.
|
|
// Paths look like "/plugin-name/trunk/file.php" — we extract "plugin-name".
|
|
func parseSVNLogSlugs(data []byte) (map[string]int64, error) {
|
|
data = sanitizeXML(data)
|
|
var report svnLogReport
|
|
if err := xml.Unmarshal(data, &report); err != nil {
|
|
return nil, fmt.Errorf("parsing SVN log XML: %w", err)
|
|
}
|
|
|
|
slugRevisions := make(map[string]int64)
|
|
for _, item := range report.Items {
|
|
allPaths := make([]string, 0, len(item.AddedPaths)+len(item.ModifiedPaths)+len(item.DeletedPaths))
|
|
allPaths = append(allPaths, item.AddedPaths...)
|
|
allPaths = append(allPaths, item.ModifiedPaths...)
|
|
allPaths = append(allPaths, item.DeletedPaths...)
|
|
|
|
for _, p := range allPaths {
|
|
if slug := slugFromPath(p); slug != "" {
|
|
if item.Revision > slugRevisions[slug] {
|
|
slugRevisions[slug] = item.Revision
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return slugRevisions, nil
|
|
}
|
|
|
|
// slugFromPath extracts the top-level directory (slug) from an SVN path.
|
|
// e.g. "/akismet/trunk/akismet.php" → "akismet"
|
|
func slugFromPath(path string) string {
|
|
path = strings.TrimPrefix(path, "/")
|
|
if idx := strings.IndexByte(path, '/'); idx > 0 {
|
|
return path[:idx]
|
|
}
|
|
// bare directory entry like "akismet"
|
|
if path != "" && path != ".." {
|
|
return path
|
|
}
|
|
return ""
|
|
}
|