TUN-7373: Streaming logs override for same actor

To help accommodate web browser interactions with websockets, when a
streaming logs session is requested for the same actor while already
serving a session for that user in a separate request, the original
request will be closed and the new request start streaming logs
instead. This should help with rogue sessions holding on for too long
with no client on the other side (before idle timeout or connection
close).
This commit is contained in:
Devin Carr
2023-04-21 11:54:37 -07:00
parent ee5e447d44
commit 38cd455e4d
109 changed files with 12691 additions and 1798 deletions

View File

@@ -10,8 +10,10 @@ import (
"context"
"fmt"
"io"
"log"
"os"
"regexp"
"runtime"
"strconv"
"strings"
"sync"
@@ -232,6 +234,12 @@ func (i *Invocation) run(ctx context.Context, stdout, stderr io.Writer) error {
return runCmdContext(ctx, cmd)
}
// DebugHangingGoCommands may be set by tests to enable additional
// instrumentation (including panics) for debugging hanging Go commands.
//
// See golang/go#54461 for details.
var DebugHangingGoCommands = false
// runCmdContext is like exec.CommandContext except it sends os.Interrupt
// before os.Kill.
func runCmdContext(ctx context.Context, cmd *exec.Cmd) error {
@@ -243,11 +251,24 @@ func runCmdContext(ctx context.Context, cmd *exec.Cmd) error {
resChan <- cmd.Wait()
}()
select {
case err := <-resChan:
return err
case <-ctx.Done():
// If we're interested in debugging hanging Go commands, stop waiting after a
// minute and panic with interesting information.
if DebugHangingGoCommands {
select {
case err := <-resChan:
return err
case <-time.After(1 * time.Minute):
HandleHangingGoCommand(cmd.Process)
case <-ctx.Done():
}
} else {
select {
case err := <-resChan:
return err
case <-ctx.Done():
}
}
// Cancelled. Interrupt and see if it ends voluntarily.
cmd.Process.Signal(os.Interrupt)
select {
@@ -255,11 +276,63 @@ func runCmdContext(ctx context.Context, cmd *exec.Cmd) error {
return err
case <-time.After(time.Second):
}
// Didn't shut down in response to interrupt. Kill it hard.
cmd.Process.Kill()
// TODO(rfindley): per advice from bcmills@, it may be better to send SIGQUIT
// on certain platforms, such as unix.
if err := cmd.Process.Kill(); err != nil && DebugHangingGoCommands {
// Don't panic here as this reliably fails on windows with EINVAL.
log.Printf("error killing the Go command: %v", err)
}
// See above: don't wait indefinitely if we're debugging hanging Go commands.
if DebugHangingGoCommands {
select {
case err := <-resChan:
return err
case <-time.After(10 * time.Second): // a shorter wait as resChan should return quickly following Kill
HandleHangingGoCommand(cmd.Process)
}
}
return <-resChan
}
func HandleHangingGoCommand(proc *os.Process) {
switch runtime.GOOS {
case "linux", "darwin", "freebsd", "netbsd":
fmt.Fprintln(os.Stderr, `DETECTED A HANGING GO COMMAND
The gopls test runner has detected a hanging go command. In order to debug
this, the output of ps and lsof/fstat is printed below.
See golang/go#54461 for more details.`)
fmt.Fprintln(os.Stderr, "\nps axo ppid,pid,command:")
fmt.Fprintln(os.Stderr, "-------------------------")
psCmd := exec.Command("ps", "axo", "ppid,pid,command")
psCmd.Stdout = os.Stderr
psCmd.Stderr = os.Stderr
if err := psCmd.Run(); err != nil {
panic(fmt.Sprintf("running ps: %v", err))
}
listFiles := "lsof"
if runtime.GOOS == "freebsd" || runtime.GOOS == "netbsd" {
listFiles = "fstat"
}
fmt.Fprintln(os.Stderr, "\n"+listFiles+":")
fmt.Fprintln(os.Stderr, "-----")
listFilesCmd := exec.Command(listFiles)
listFilesCmd.Stdout = os.Stderr
listFilesCmd.Stderr = os.Stderr
if err := listFilesCmd.Run(); err != nil {
panic(fmt.Sprintf("running %s: %v", listFiles, err))
}
}
panic(fmt.Sprintf("detected hanging go command (pid %d): see golang/go#54461 for more details", proc.Pid))
}
func cmdDebugStr(cmd *exec.Cmd) string {
env := make(map[string]string)
for _, kv := range cmd.Env {