diff --git a/ext/go.mod b/ext/go.mod index cf7d436f..d579b420 100644 --- a/ext/go.mod +++ b/ext/go.mod @@ -4,6 +4,13 @@ go 1.20 require ( github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c + github.com/stretchr/testify v1.10.0 golang.org/x/net v0.34.0 golang.org/x/text v0.21.0 ) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/ext/go.sum b/ext/go.sum index 3d419c8d..e357d965 100644 --- a/ext/go.sum +++ b/ext/go.sum @@ -1,6 +1,16 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c h1:yWAGp1CjD1mQGLUsADqPn5s1n2AkGAX33XLDUgoXzyo= github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c/go.mod h1:P73liMk9TZCyF9fXG/RyMeSizmATvpvy3ZS61/1eXn4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/ext/har/logger.go b/ext/har/logger.go new file mode 100644 index 00000000..c020eaa0 --- /dev/null +++ b/ext/har/logger.go @@ -0,0 +1,124 @@ +package har + +import ( + "net/http" + "time" + + "github.com/elazarl/goproxy" +) + +// ExportFunc is a function type that users can implement to handle exported entries +type ExportFunc func([]Entry) + +// Logger implements a HAR logging extension for goproxy +type Logger struct { + exportFunc ExportFunc + exportInterval time.Duration + exportThreshold int + dataCh chan Entry +} + +// LoggerOption is a function type for configuring the Logger +type LoggerOption func(*Logger) + +// WithExportInterval sets the interval for automatic exports +func WithExportInterval(d time.Duration) LoggerOption { + return func(l *Logger) { + l.exportInterval = d + } +} + +// WithExportCount sets the number of requests after which to export entries +func WithExportThreshold(threshold int) LoggerOption { + return func(l *Logger) { + l.exportThreshold = threshold + } +} + +// NewLogger creates a new HAR logger instance +func NewLogger(exportFunc ExportFunc, opts ...LoggerOption) *Logger { + l := &Logger{ + exportFunc: exportFunc, + exportThreshold: 100, // Default threshold + exportInterval: 0, // Default no interval + dataCh: make(chan Entry), + } + + // Apply options + for _, opt := range opts { + opt(l) + } + + go l.exportLoop() + return l +} +// OnRequest handles incoming HTTP requests +func (l *Logger) OnRequest(req *http.Request, ctx *goproxy.ProxyCtx) (*http.Request, *http.Response) { + ctx.UserData = time.Now() + return req, nil +} + +// OnResponse handles HTTP responses +func (l *Logger) OnResponse(resp *http.Response, ctx *goproxy.ProxyCtx) *http.Response { + if resp == nil || ctx.Req == nil || ctx.UserData == nil { + return resp + } + startTime, ok := ctx.UserData.(time.Time) + if !ok { + return resp + } + + entry := Entry{ + StartedDateTime: startTime, + Time: time.Since(startTime).Milliseconds(), + Request: parseRequest(ctx), + Response: parseResponse(ctx), + Timings: Timings{ + Send: 0, + Wait: time.Since(startTime).Milliseconds(), + Receive: 0, + }, + } + entry.fillIPAddress(ctx.Req) + + l.dataCh <- entry + return resp +} + +func (l *Logger) exportLoop() { + var entries []Entry + + exportIfNeeded := func() { + if len(entries) > 0 { + go l.exportFunc(entries) + entries = nil + } + } + + var tickerC <-chan time.Time + if l.exportInterval > 0 { + ticker := time.NewTicker(l.exportInterval) + defer ticker.Stop() + tickerC = ticker.C + } + + for { + select { + case entry, ok := <-l.dataCh: + if !ok { + exportIfNeeded() + return + } + entries = append(entries, entry) + if l.exportThreshold > 0 && len(entries) >= l.exportThreshold { + exportIfNeeded() + } + case <-tickerC: + exportIfNeeded() + } + } +} + +func (l *Logger) Stop() { + close(l.dataCh) +} diff --git a/ext/har/logger_test.go b/ext/har/logger_test.go new file mode 100644 index 00000000..f6d8a807 --- /dev/null +++ b/ext/har/logger_test.go @@ -0,0 +1,217 @@ +package har + +import ( + "context" + "io" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "sync" + "testing" + "time" + "github.com/elazarl/goproxy" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// ConstantHandler is a simple HTTP handler that returns a constant response +type ConstantHandler string + +func (h ConstantHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain") + io.WriteString(w, string(h)) +} + +// createTestProxy sets up a test proxy with a HAR logger +func createTestProxy(logger *Logger) *httptest.Server { + proxy := goproxy.NewProxyHttpServer() + proxy.OnRequest().DoFunc(logger.OnRequest) + proxy.OnResponse().DoFunc(logger.OnResponse) + return httptest.NewServer(proxy) +} + +// createProxyClient creates an HTTP client that uses the given proxy +func createProxyClient(proxyURL string) *http.Client { + proxyURLParsed, _ := url.Parse(proxyURL) + tr := &http.Transport{ + Proxy: http.ProxyURL(proxyURLParsed), + } + return &http.Client{Transport: tr} +} + +func TestHarLoggerBasicFunctionality(t *testing.T) { + testCases := []struct { + name string + method string + body string + contentType string + expectedMethod string + }{ + { + name: "GET Request", + method: http.MethodGet, + expectedMethod: http.MethodGet, + }, + { + name: "POST Request", + method: http.MethodPost, + body: `{"test":"data"}`, + contentType: "application/json", + expectedMethod: http.MethodPost, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var wg sync.WaitGroup + wg.Add(1) + + var exportedEntries []Entry + exportFunc := func(entries []Entry) { + exportedEntries = append(exportedEntries, entries...) + wg.Done() + } + + logger := NewLogger(exportFunc, WithExportThreshold(1)) // Export after each request + defer logger.Stop() + + background := httptest.NewServer(ConstantHandler("hello world")) + defer background.Close() + + proxyServer := createTestProxy(logger) + defer proxyServer.Close() + + client := createProxyClient(proxyServer.URL) + + req, err := http.NewRequestWithContext( + context.Background(), + tc.method, + background.URL, + strings.NewReader(tc.body), + ) + require.NoError(t, err, "Should create request") + + if tc.contentType != "" { + req.Header.Set("Content-Type", tc.contentType) + } + + resp, err := client.Do(req) + require.NoError(t, err, "Should send request successfully") + defer resp.Body.Close() + + bodyBytes, err := io.ReadAll(resp.Body) + require.NoError(t, err, "Should read response body") + + body := string(bodyBytes) + assert.Equal(t, "hello world", body, "Response body should match") + + wg.Wait() // Wait for export to complete + + assert.Len(t, exportedEntries, 1, "Should have exactly one exported entry") + assert.Equal(t, tc.expectedMethod, exportedEntries[0].Request.Method, "Request method should match") + }) + } +} + +func TestLoggerThresholdExport(t *testing.T) { + var wg sync.WaitGroup + var exports [][]Entry + var mtx sync.Mutex + wg.Add(3) // Expect 3 exports (3,3,1) + + exportFunc := func(entries []Entry) { + mtx.Lock() + exports = append(exports, entries) + mtx.Unlock() + + t.Logf("Export occurred with %d entries", len(entries)) + wg.Done() + } + + threshold := 3 + logger := NewLogger(exportFunc, WithExportThreshold(threshold)) + + background := httptest.NewServer(ConstantHandler("test")) + defer background.Close() + proxyServer := createTestProxy(logger) + defer proxyServer.Close() + client := createProxyClient(proxyServer.URL) + + // Send 7 requests + for i := 0; i < 7; i++ { + req, err := http.NewRequestWithContext( + context.Background(), + http.MethodGet, + background.URL, + nil, + ) + require.NoError(t, err) + + resp, err := client.Do(req) + require.NoError(t, err) + resp.Body.Close() + } + + // Call Stop to trigger final export of remaining entries + logger.Stop() + wg.Wait() + + require.Equal(t, 3, len(exports), "should have 3 export batches") + + // Count batches by size + batchCounts := make(map[int]int) + for _, batch := range exports { + batchCounts[len(batch)]++ + } + + // Check batch sizes + assert.Equal(t, 2, batchCounts[threshold], "should have two batches of threshold size") + assert.Equal(t, 1, batchCounts[1], "should have one batch with 1 entry") +} + +func TestHarLoggerExportInterval(t *testing.T) { + var wg sync.WaitGroup + var mtx sync.Mutex + var exports [][]Entry + wg.Add(1) // Expect 1 export with all entries + + exportFunc := func(entries []Entry) { + mtx.Lock() + exports = append(exports, entries) + mtx.Unlock() + + t.Logf("Export occurred with %d entries", len(entries)) + wg.Done() + } + + logger := NewLogger(exportFunc, WithExportInterval(time.Second)) + + background := httptest.NewServer(ConstantHandler("test")) + defer background.Close() + proxyServer := createTestProxy(logger) + defer proxyServer.Close() + client := createProxyClient(proxyServer.URL) + + // Send 3 requests + for i := 0; i < 3; i++ { + req, err := http.NewRequestWithContext( + context.Background(), + http.MethodGet, + background.URL, + nil, + ) + require.NoError(t, err) + + resp, err := client.Do(req) + require.NoError(t, err) + resp.Body.Close() + } + + wg.Wait() + logger.Stop() + + require.Equal(t, 1, len(exports), "should have 1 export batch") + assert.Equal(t, 3, len(exports[0]), "Should have exported 3 entries") +} + diff --git a/ext/har/types.go b/ext/har/types.go new file mode 100644 index 00000000..0f169c77 --- /dev/null +++ b/ext/har/types.go @@ -0,0 +1,358 @@ +// Original implementation from abourget/goproxy, adapted for use as an extension. +// HAR specification: http://www.softwareishard.com/blog/har-12-spec/ +package har + +import ( + "bytes" + "io" + "net/http" + "net/url" + "mime" + "net" + "strings" + "time" + + "github.com/elazarl/goproxy" +) + +type Har struct { + Log Log `json:"log"` +} + +type Log struct { + Version string `json:"version"` + Creator Creator `json:"creator"` + Browser *Browser `json:"browser,omitempty"` + Pages []Page `json:"pages,omitempty"` + Entries []Entry `json:"entries"` + Comment string `json:"comment,omitempty"` +} + +func New() *Har { + har := &Har{ + Log: Log{ + Version: "1.2", + Creator: Creator{ + Name: "GoProxy", + Version: "1.0", + }, + Pages: make([]Page, 0, 10), + Entries: makeNewEntries(), + }, + } + return har +} + +func makeNewEntries() []Entry { + const startingEntrySize int = 1000 + return make([]Entry, 0, startingEntrySize) +} + +type Creator struct { + Name string `json:"name"` + Version string `json:"version"` + Comment string `json:"comment,omitempty"` +} + +type Browser struct { + Name string `json:"name"` + Version string `json:"version"` + Comment string `json:"comment,omitempty"` +} + +type Page struct { + ID string `json:"id,omitempty"` + StartedDateTime time.Time `json:"startedDateTime"` + Title string `json:"title"` + PageTimings PageTimings `json:"pageTimings"` + Comment string `json:"comment,omitempty"` +} + +type Entry struct { + PageRef string `json:"pageref,omitempty"` + StartedDateTime time.Time `json:"startedDateTime"` + Time int64 `json:"time"` + Request *Request `json:"request"` + Response *Response `json:"response"` + Cache Cache `json:"cache"` + Timings Timings `json:"timings"` + ServerIpAddress string `json:"serverIpAddress,omitempty"` + Connection string `json:"connection,omitempty"` + Comment string `json:"comment,omitempty"` +} + +type Cache struct { + BeforeRequest *CacheEntry `json:"beforeRequest,omitempty"` + AfterRequest *CacheEntry `json:"afterRequest,omitempty"` +} + +type CacheEntry struct { + Expires string `json:"expires,omitempty"` + LastAccess string `json:"lastAccess"` + ETag string `json:"eTag"` + HitCount int `json:"hitCount"` + Comment string `json:"comment,omitempty"` +} + +type Request struct { + Method string `json:"method"` + Url string `json:"url"` + HttpVersion string `json:"httpVersion"` + Cookies []Cookie `json:"cookies"` + Headers []NameValuePair `json:"headers"` + QueryString []NameValuePair `json:"queryString"` + PostData *PostData `json:"postData,omitempty"` + BodySize int64 `json:"bodySize"` + HeadersSize int64 `json:"headersSize"` +} + +func (entry *Entry) fillIPAddress(req *http.Request) { + host := req.URL.Hostname() + + // try to parse the host as an IP address + if ip := net.ParseIP(host); ip != nil { + entry.ServerIpAddress = ip.String() + return + } +} + +// Shared utility function for reading body content +func readBody(ctx *goproxy.ProxyCtx, body io.ReadCloser) ([]byte, error) { + content, err := io.ReadAll(body) + if err != nil { + ctx.Proxy.Logger.Printf("Error reading body: %v", err) + return nil, err + } + return content, nil +} + +// Shared function for handling mime types +func parseMediaType(ctx *goproxy.ProxyCtx, header http.Header) string { + contentType := header.Get("Content-Type") + if contentType == "" { + return "" + } + + mediaType, _, err := mime.ParseMediaType(contentType) + if err != nil { + ctx.Proxy.Logger.Printf("Error parsing media type: %v", err) + return "" + } + return mediaType +} + +func parsePostData(ctx *goproxy.ProxyCtx, req *http.Request) *PostData { + mediaType := parseMediaType(ctx, req.Header) + if mediaType == "" { + return nil + } + + harPostData := &PostData{ + MimeType: mediaType, + } + + if err := req.ParseForm(); err != nil { + ctx.Proxy.Logger.Printf("Error parsing form: %v", err) + return nil + } + + if len(req.PostForm) > 0 { + for k, vals := range req.PostForm { + for _, v := range vals { + param := PostDataParam{ + Name: k, + Value: v, + } + harPostData.Params = append(harPostData.Params, param) + } + } + } else if body, err := readBody(ctx, req.Body); err == nil { + req.Body = io.NopCloser(bytes.NewBuffer(body)) + harPostData.Text = string(body) + } + + return harPostData +} + +type Response struct { + Status int `json:"status"` + StatusText string `json:"statusText"` + HttpVersion string `json:"httpVersion"` + Cookies []Cookie `json:"cookies"` + Headers []NameValuePair `json:"headers"` + Content Content `json:"content"` + RedirectUrl string `json:"redirectURL"` + BodySize int64 `json:"bodySize"` + HeadersSize int64 `json:"headersSize"` + Comment string `json:"comment,omitempty"` +} + +func parseResponse(ctx *goproxy.ProxyCtx) *Response { + if ctx.Resp == nil { + return nil + } + + resp := ctx.Resp + harResponse := Response{ + Status: resp.StatusCode, + StatusText: http.StatusText(resp.StatusCode), + HttpVersion: resp.Proto, + Cookies: parseCookies(resp.Cookies()), + Headers: parseStringArrMap(resp.Header), + RedirectUrl: resp.Header.Get("Location"), + BodySize: resp.ContentLength, + HeadersSize: -1, + } + + if resp.Body == nil { + return &harResponse + } + + body, err := readBody(ctx, resp.Body) + if err != nil { + return &harResponse + } + + resp.Body = io.NopCloser(bytes.NewBuffer(body)) + harResponse.Content = Content{ + Size: len(body), + Text: string(body), + MimeType: parseMediaType(ctx, resp.Header), + } + + return &harResponse +} + +func parseRequest(ctx *goproxy.ProxyCtx) *Request { + if ctx.Req == nil { + ctx.Proxy.Logger.Printf("ParseRequest: nil request") + return nil + } + + req := ctx.Req + harRequest := &Request{ + Method: req.Method, + Url: req.URL.String(), + HttpVersion: req.Proto, + Cookies: parseCookies(req.Cookies()), + Headers: parseStringArrMap(req.Header), + QueryString: parseStringArrMap(req.URL.Query()), + BodySize: req.ContentLength, + HeadersSize: -1, + } + + if req.Method != http.MethodPost && req.Method != http.MethodPut { + return harRequest + } + + ctx.Proxy.Logger.Printf("ParseRequest: creating PostData, hasBody=%v, hasGetBody=%v", + req.Body != nil, req.GetBody != nil) + + if postData := parsePostData(ctx, req); postData != nil { + harRequest.PostData = postData + } + + return harRequest +} + +func parseStringArrMap(stringArrMap map[string][]string) []NameValuePair { + harQueryString := make([]NameValuePair, 0, len(stringArrMap)) + + for k, v := range stringArrMap { + escapedKey, err := url.QueryUnescape(k) + if err != nil { + // Use original key if unescaping fails + escapedKey = k + } + + escapedValues, err := url.QueryUnescape(strings.Join(v, ",")) + if err != nil { + // Use original joined values if unescaping fails + escapedValues = strings.Join(v, ",") + } + + harNameValuePair := NameValuePair{ + Name: escapedKey, + Value: escapedValues, + } + + harQueryString = append(harQueryString, harNameValuePair) + } + + return harQueryString +} + +func parseCookies(cookies []*http.Cookie) []Cookie { + harCookies := make([]Cookie, len(cookies)) + for i, cookie := range cookies { + harCookie := Cookie{ + Name: cookie.Name, + Domain: cookie.Domain, + HttpOnly: cookie.HttpOnly, + Path: cookie.Path, + Secure: cookie.Secure, + Value: cookie.Value, + } + if !cookie.Expires.IsZero() { + harCookie.Expires = &cookie.Expires + } + harCookies[i] = harCookie + } + return harCookies +} + +type Cookie struct { + Name string `json:"name"` + Value string `json:"value"` + Path string `json:"path,omitempty"` + Domain string `json:"domain,omitempty"` + Expires *time.Time `json:"expires,omitempty"` + HttpOnly bool `json:"httpOnly,omitempty"` + Secure bool `json:"secure,omitempty"` +} + +type NameValuePair struct { + Name string `json:"name"` + Value string `json:"value"` +} + +type PostData struct { + MimeType string `json:"mimeType"` + Params []PostDataParam `json:"params,omitempty"` + Text string `json:"text,omitempty"` + Comment string `json:"comment,omitempty"` +} + +type PostDataParam struct { + Name string `json:"name"` + Value string `json:"value,omitempty"` + FileName string `json:"fileName,omitempty"` + ContentType string `json:"contentType,omitempty"` + Comment string `json:"comment,omitempty"` +} + +type Content struct { + Size int `json:"size"` + Compression int `json:"compression,omitempty"` + MimeType string `json:"mimeType"` + Text string `json:"text,omitempty"` + Encoding string `json:"encoding,omitempty"` + Comment string `json:"comment,omitempty"` +} + +type PageTimings struct { + OnContentLoad int64 `json:"onContentLoad"` + OnLoad int64 `json:"onLoad"` + Comment string `json:"comment,omitempty"` +} + +type Timings struct { + Dns int64 `json:"dns,omitempty"` + Blocked int64 `json:"blocked,omitempty"` + Connect int64 `json:"connect,omitempty"` + Send int64 `json:"send"` + Wait int64 `json:"wait"` + Receive int64 `json:"receive"` + Ssl int64 `json:"ssl,omitempty"` + Comment string `json:"comment,omitempty"` +}