Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ The following optional telemetry sources can be enabled via environment variable

#### Flamegraph Command

Software flamegraphs are useful in diagnosing software performance bottlenecks. Run `perfspect flamegraph` to capture a system-wide software flamegraph. See [`perfspect flamegraph -h`](docs/perfspect_flamegraph.md) for all options.
Software flamegraphs are useful in diagnosing software performance bottlenecks. Run `perfspect flamegraph` to capture a system-wide software flamegraph. Native stacks use frame-pointer `perf` by default; `--dual-native-stacks` adds DWARF recording and merges those stacks with the frame-pointer profile. See [`perfspect flamegraph -h`](docs/perfspect_flamegraph.md) for all options.

> [!TIP]
> By default, flamegraphs are collected using the `cycles:P` event. To analyze different performance aspects, use the `--perf-event` flag to specify an alternative perf event (e.g., `cache-misses`, `instructions`, `branches`, `context-switches`, `mem-loads`, `mem-stores`, etc.).
Expand Down
52 changes: 30 additions & 22 deletions cmd/flamegraph/flamegraph.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,25 +44,27 @@ var Cmd = &cobra.Command{
}

var (
flagInput string
flagFormat []string
flagDuration int
flagFrequency int
flagPids []int
flagNoSystemSummary bool
flagMaxDepth int
flagPerfEvent string
flagAsprofArguments string
flagInput string
flagFormat []string
flagDuration int
flagFrequency int
flagPids []int
flagNoSystemSummary bool
flagMaxDepth int
flagPerfEvent string
flagAsprofArguments string
flagDualNativeStacks bool
)

const (
flagDurationName = "duration"
flagFrequencyName = "frequency"
flagPidsName = "pids"
flagNoSystemSummaryName = "no-summary"
flagMaxDepthName = "max-depth"
flagPerfEventName = "perf-event"
flagAsprofArgumentsName = "asprof-args"
flagDurationName = "duration"
flagFrequencyName = "frequency"
flagPidsName = "pids"
flagNoSystemSummaryName = "no-summary"
flagMaxDepthName = "max-depth"
flagPerfEventName = "perf-event"
flagAsprofArgumentsName = "asprof-args"
flagDualNativeStacksName = "dual-native-stacks"
)

func init() {
Expand All @@ -75,6 +77,7 @@ func init() {
Cmd.Flags().IntVar(&flagMaxDepth, flagMaxDepthName, 0, "")
Cmd.Flags().StringVar(&flagPerfEvent, flagPerfEventName, "cycles:P", "")
Cmd.Flags().StringVar(&flagAsprofArguments, flagAsprofArgumentsName, "-t -F probesp+vtable", "")
Cmd.Flags().BoolVar(&flagDualNativeStacks, flagDualNativeStacksName, false, "")
workflow.AddTargetFlags(Cmd)

Cmd.SetUsageFunc(usageFunc)
Expand Down Expand Up @@ -124,6 +127,10 @@ func getFlagGroups() []app.FlagGroup {
Name: flagPerfEventName,
Help: "perf event to use for native sampling (e.g., cpu-cycles, instructions, cache-misses, branches, context-switches, mem-loads, mem-stores, etc.)",
},
{
Name: flagDualNativeStacksName,
Help: "also record DWARF unwind perf and merge with frame-pointer stacks per process (larger profiles, longer post-processing time)",
},
{
Name: flagAsprofArgumentsName,
Help: "arguments to pass to async-profiler, e.g., $ asprof start <these arguments> -i <interval> <pid>.",
Expand Down Expand Up @@ -204,12 +211,13 @@ func runCmd(cmd *cobra.Command, args []string) error {
Cmd: cmd,
ReportNamePost: "flame",
ScriptParams: map[string]string{
"Frequency": strconv.Itoa(flagFrequency),
"Duration": strconv.Itoa(flagDuration),
"PIDs": strings.Join(util.IntSliceToStringSlice(flagPids), ","),
"MaxDepth": strconv.Itoa(flagMaxDepth),
"PerfEvent": flagPerfEvent,
"AsprofArguments": flagAsprofArguments,
"Frequency": strconv.Itoa(flagFrequency),
"Duration": strconv.Itoa(flagDuration),
"PIDs": strings.Join(util.IntSliceToStringSlice(flagPids), ","),
"MaxDepth": strconv.Itoa(flagMaxDepth),
"PerfEvent": flagPerfEvent,
"AsprofArguments": flagAsprofArguments,
"DualNativeStacks": strconv.FormatBool(flagDualNativeStacks),
},
Tables: tables,
Input: flagInput,
Expand Down
2 changes: 2 additions & 0 deletions docs/perfspect_flamegraph.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# perfspect flamegraph


```text
Collect flamegraph data from target(s)
Expand All @@ -17,6 +18,7 @@ Flags:
--frequency number of samples taken per second (default: 11)
--pids comma separated list of PIDs. If not specified, all PIDs will be collected (default: [])
--perf-event perf event to use for native sampling (e.g., cpu-cycles, instructions, cache-misses, branches, context-switches, mem-loads, mem-stores, etc.) (default: cycles:P)
--dual-native-stacks also record DWARF unwind perf and merge with frame-pointer stacks per process (larger profiles) (default: false)
--asprof-args arguments to pass to async-profiler, e.g., $ asprof start <these arguments> -i <interval> <pid>. (default: -t -F probesp+vtable)
--max-depth maximum render depth of call stack in flamegraph (0 = no limit) (default: 0)
--format choose output format(s) from: all, html, txt, json (default: [html])
Expand Down
53 changes: 30 additions & 23 deletions internal/script/scripts.go
Original file line number Diff line number Diff line change
Expand Up @@ -1725,6 +1725,7 @@ duration={{.Duration}}
frequency={{.Frequency}}
maxdepth={{.MaxDepth}}
perf_event={{.PerfEvent}}
dual_native_stacks={{.DualNativeStacks}}
read -r -a asprof_arguments <<< "{{.AsprofArguments}}"

ap_interval=0
Expand Down Expand Up @@ -1794,26 +1795,29 @@ stop_profiling() {
restore_settings
}

# Function to collapse perf data
# Function to collapse perf data (pipe to stackcollapse-perf to avoid large intermediate stack files)
collapse_perf_data() {
if [ -f perf_dwarf_data ]; then
("${PERF_CMD}" script -i perf_dwarf_data > perf_dwarf_stacks && stackcollapse-perf perf_dwarf_stacks > perf_dwarf_folded) &
local dwarf_pid=$!
else
echo "Error: perf_dwarf_data file not found" >&2
fi
local dwarf_pid="" fp_pid=""
if [ -f perf_fp_data ]; then
("${PERF_CMD}" script -i perf_fp_data > perf_fp_stacks && stackcollapse-perf perf_fp_stacks > perf_fp_folded) &
local fp_pid=$!
( set -o pipefail; "${PERF_CMD}" script -i perf_fp_data | stackcollapse-perf > perf_fp_folded ) &
fp_pid=$!
else
echo "Error: perf_fp_data file not found" >&2
fi
if [ -n "$dwarf_pid" ]; then
wait "$dwarf_pid" || echo "Error: failed to process perf_dwarf_data (perf script or stackcollapse-perf failed)" >&2
if [ "$dual_native_stacks" = "true" ]; then
if [ -f perf_dwarf_data ]; then
( set -o pipefail; "${PERF_CMD}" script -i perf_dwarf_data | stackcollapse-perf > perf_dwarf_folded ) &
dwarf_pid=$!
else
echo "Error: perf_dwarf_data file not found" >&2
fi
fi
if [ -n "$fp_pid" ]; then
wait "$fp_pid" || echo "Error: failed to process perf_fp_data (perf script or stackcollapse-perf failed)" >&2
fi
if [ -n "$dwarf_pid" ]; then
wait "$dwarf_pid" || echo "Error: failed to process perf_dwarf_data (perf script or stackcollapse-perf failed)" >&2
fi
}

# Function to print results to stdout
Expand Down Expand Up @@ -1894,7 +1898,7 @@ else
mapfile -t java_pids < <(pgrep java)
fi

# Start profiling with perf in frame pointer mode
# Frame-pointer perf record (default native profile)
if [ -n "$pids" ]; then
"${PERF_CMD}" record -e "$perf_event" -F "$frequency" -p "$pids" -g -o perf_fp_data -m 129 &
else
Expand All @@ -1907,17 +1911,20 @@ if ! kill -0 $perf_fp_pid 2>/dev/null; then
exit 1
fi

# Start profiling with perf in dwarf mode
if [ -n "$pids" ]; then
"${PERF_CMD}" record -e "$perf_event" -F "$frequency" -p "$pids" -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 &
else
"${PERF_CMD}" record -e "$perf_event" -F "$frequency" -a -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 &
fi
perf_dwarf_pid=$!
if ! kill -0 $perf_dwarf_pid 2>/dev/null; then
echo "Failed to start perf record in dwarf mode" >&2
stop_profiling
exit 1
# DWARF perf record (second native profile when dual_native_stacks is true)
perf_dwarf_pid=""
if [ "$dual_native_stacks" = "true" ]; then
if [ -n "$pids" ]; then
"${PERF_CMD}" record -e "$perf_event" -F "$frequency" -p "$pids" -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 &
else
"${PERF_CMD}" record -e "$perf_event" -F "$frequency" -a -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 &
fi
perf_dwarf_pid=$!
if ! kill -0 $perf_dwarf_pid 2>/dev/null; then
echo "Failed to start perf record in dwarf mode" >&2
stop_profiling
exit 1
fi
fi

if [ ${#java_pids[@]} -eq 0 ]; then
Expand Down
Loading