From f9e798ca761ad275417476346724b71c9b57d1ff Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Thu, 19 Mar 2026 10:59:06 -0700 Subject: [PATCH 1/2] feature: flamegraph dwarf data optional Signed-off-by: Harper, Jason M --- README.md | 2 +- cmd/flamegraph/flamegraph.go | 52 ++++++++++++++++++++--------------- docs/perfspect_flamegraph.md | 2 ++ internal/script/scripts.go | 53 ++++++++++++++++++++---------------- 4 files changed, 63 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index a674bb94..55d6b853 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,7 @@ The following optional telemetry sources can be enabled via environment variable #### Flamegraph Command -Software flamegraphs are useful in diagnosing software performance bottlenecks. Run `perfspect flamegraph` to capture a system-wide software flamegraph. See [`perfspect flamegraph -h`](docs/perfspect_flamegraph.md) for all options. +Software flamegraphs are useful in diagnosing software performance bottlenecks. Run `perfspect flamegraph` to capture a system-wide software flamegraph. Native stacks use frame-pointer `perf` by default; `--dual-native-stacks` adds DWARF recording and merges those stacks with the frame-pointer profile. See [`perfspect flamegraph -h`](docs/perfspect_flamegraph.md) for all options. > [!TIP] > By default, flamegraphs are collected using the `cycles:P` event. To analyze different performance aspects, use the `--perf-event` flag to specify an alternative perf event (e.g., `cache-misses`, `instructions`, `branches`, `context-switches`, `mem-loads`, `mem-stores`, etc.). diff --git a/cmd/flamegraph/flamegraph.go b/cmd/flamegraph/flamegraph.go index 68389800..948437e8 100644 --- a/cmd/flamegraph/flamegraph.go +++ b/cmd/flamegraph/flamegraph.go @@ -44,25 +44,27 @@ var Cmd = &cobra.Command{ } var ( - flagInput string - flagFormat []string - flagDuration int - flagFrequency int - flagPids []int - flagNoSystemSummary bool - flagMaxDepth int - flagPerfEvent string - flagAsprofArguments string + flagInput string + flagFormat []string + flagDuration int + flagFrequency int + flagPids []int + flagNoSystemSummary bool + flagMaxDepth int + flagPerfEvent string + flagAsprofArguments string + flagDualNativeStacks bool ) const ( - flagDurationName = "duration" - flagFrequencyName = "frequency" - flagPidsName = "pids" - flagNoSystemSummaryName = "no-summary" - flagMaxDepthName = "max-depth" - flagPerfEventName = "perf-event" - flagAsprofArgumentsName = "asprof-args" + flagDurationName = "duration" + flagFrequencyName = "frequency" + flagPidsName = "pids" + flagNoSystemSummaryName = "no-summary" + flagMaxDepthName = "max-depth" + flagPerfEventName = "perf-event" + flagAsprofArgumentsName = "asprof-args" + flagDualNativeStacksName = "dual-native-stacks" ) func init() { @@ -75,6 +77,7 @@ func init() { Cmd.Flags().IntVar(&flagMaxDepth, flagMaxDepthName, 0, "") Cmd.Flags().StringVar(&flagPerfEvent, flagPerfEventName, "cycles:P", "") Cmd.Flags().StringVar(&flagAsprofArguments, flagAsprofArgumentsName, "-t -F probesp+vtable", "") + Cmd.Flags().BoolVar(&flagDualNativeStacks, flagDualNativeStacksName, false, "") workflow.AddTargetFlags(Cmd) Cmd.SetUsageFunc(usageFunc) @@ -124,6 +127,10 @@ func getFlagGroups() []app.FlagGroup { Name: flagPerfEventName, Help: "perf event to use for native sampling (e.g., cpu-cycles, instructions, cache-misses, branches, context-switches, mem-loads, mem-stores, etc.)", }, + { + Name: flagDualNativeStacksName, + Help: "also record DWARF unwind perf and merge with frame-pointer stacks per process (larger profiles, longer post-processing time)", + }, { Name: flagAsprofArgumentsName, Help: "arguments to pass to async-profiler, e.g., $ asprof start -i .", @@ -204,12 +211,13 @@ func runCmd(cmd *cobra.Command, args []string) error { Cmd: cmd, ReportNamePost: "flame", ScriptParams: map[string]string{ - "Frequency": strconv.Itoa(flagFrequency), - "Duration": strconv.Itoa(flagDuration), - "PIDs": strings.Join(util.IntSliceToStringSlice(flagPids), ","), - "MaxDepth": strconv.Itoa(flagMaxDepth), - "PerfEvent": flagPerfEvent, - "AsprofArguments": flagAsprofArguments, + "Frequency": strconv.Itoa(flagFrequency), + "Duration": strconv.Itoa(flagDuration), + "PIDs": strings.Join(util.IntSliceToStringSlice(flagPids), ","), + "MaxDepth": strconv.Itoa(flagMaxDepth), + "PerfEvent": flagPerfEvent, + "AsprofArguments": flagAsprofArguments, + "DualNativeStacks": strconv.FormatBool(flagDualNativeStacks), }, Tables: tables, Input: flagInput, diff --git a/docs/perfspect_flamegraph.md b/docs/perfspect_flamegraph.md index 5c9396fc..abce9614 100644 --- a/docs/perfspect_flamegraph.md +++ b/docs/perfspect_flamegraph.md @@ -1,5 +1,6 @@ # perfspect flamegraph + ```text Collect flamegraph data from target(s) @@ -17,6 +18,7 @@ Flags: --frequency number of samples taken per second (default: 11) --pids comma separated list of PIDs. If not specified, all PIDs will be collected (default: []) --perf-event perf event to use for native sampling (e.g., cpu-cycles, instructions, cache-misses, branches, context-switches, mem-loads, mem-stores, etc.) (default: cycles:P) + --dual-native-stacks also record DWARF unwind perf and merge with frame-pointer stacks per process (larger profiles) (default: false) --asprof-args arguments to pass to async-profiler, e.g., $ asprof start -i . (default: -t -F probesp+vtable) --max-depth maximum render depth of call stack in flamegraph (0 = no limit) (default: 0) --format choose output format(s) from: all, html, txt, json (default: [html]) diff --git a/internal/script/scripts.go b/internal/script/scripts.go index 08e4736f..18088eb2 100644 --- a/internal/script/scripts.go +++ b/internal/script/scripts.go @@ -1725,6 +1725,7 @@ duration={{.Duration}} frequency={{.Frequency}} maxdepth={{.MaxDepth}} perf_event={{.PerfEvent}} +dual_native_stacks={{.DualNativeStacks}} read -r -a asprof_arguments <<< "{{.AsprofArguments}}" ap_interval=0 @@ -1794,26 +1795,29 @@ stop_profiling() { restore_settings } -# Function to collapse perf data +# Function to collapse perf data (pipe to stackcollapse-perf to avoid large intermediate stack files) collapse_perf_data() { - if [ -f perf_dwarf_data ]; then - ("${PERF_CMD}" script -i perf_dwarf_data > perf_dwarf_stacks && stackcollapse-perf perf_dwarf_stacks > perf_dwarf_folded) & - local dwarf_pid=$! - else - echo "Error: perf_dwarf_data file not found" >&2 - fi + local dwarf_pid="" fp_pid="" if [ -f perf_fp_data ]; then - ("${PERF_CMD}" script -i perf_fp_data > perf_fp_stacks && stackcollapse-perf perf_fp_stacks > perf_fp_folded) & - local fp_pid=$! + ("${PERF_CMD}" script -i perf_fp_data | stackcollapse-perf > perf_fp_folded) & + fp_pid=$! else echo "Error: perf_fp_data file not found" >&2 fi - if [ -n "$dwarf_pid" ]; then - wait "$dwarf_pid" || echo "Error: failed to process perf_dwarf_data (perf script or stackcollapse-perf failed)" >&2 + if [ "$dual_native_stacks" = "true" ]; then + if [ -f perf_dwarf_data ]; then + ("${PERF_CMD}" script -i perf_dwarf_data | stackcollapse-perf > perf_dwarf_folded) & + dwarf_pid=$! + else + echo "Error: perf_dwarf_data file not found" >&2 + fi fi if [ -n "$fp_pid" ]; then wait "$fp_pid" || echo "Error: failed to process perf_fp_data (perf script or stackcollapse-perf failed)" >&2 fi + if [ -n "$dwarf_pid" ]; then + wait "$dwarf_pid" || echo "Error: failed to process perf_dwarf_data (perf script or stackcollapse-perf failed)" >&2 + fi } # Function to print results to stdout @@ -1894,7 +1898,7 @@ else mapfile -t java_pids < <(pgrep java) fi -# Start profiling with perf in frame pointer mode +# Frame-pointer perf record (default native profile) if [ -n "$pids" ]; then "${PERF_CMD}" record -e "$perf_event" -F "$frequency" -p "$pids" -g -o perf_fp_data -m 129 & else @@ -1907,17 +1911,20 @@ if ! kill -0 $perf_fp_pid 2>/dev/null; then exit 1 fi -# Start profiling with perf in dwarf mode -if [ -n "$pids" ]; then - "${PERF_CMD}" record -e "$perf_event" -F "$frequency" -p "$pids" -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 & -else - "${PERF_CMD}" record -e "$perf_event" -F "$frequency" -a -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 & -fi -perf_dwarf_pid=$! -if ! kill -0 $perf_dwarf_pid 2>/dev/null; then - echo "Failed to start perf record in dwarf mode" >&2 - stop_profiling - exit 1 +# DWARF perf record (second native profile when dual_native_stacks is true) +perf_dwarf_pid="" +if [ "$dual_native_stacks" = "true" ]; then + if [ -n "$pids" ]; then + "${PERF_CMD}" record -e "$perf_event" -F "$frequency" -p "$pids" -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 & + else + "${PERF_CMD}" record -e "$perf_event" -F "$frequency" -a -g -o perf_dwarf_data -m 257 --call-graph dwarf,8192 & + fi + perf_dwarf_pid=$! + if ! kill -0 $perf_dwarf_pid 2>/dev/null; then + echo "Failed to start perf record in dwarf mode" >&2 + stop_profiling + exit 1 + fi fi if [ ${#java_pids[@]} -eq 0 ]; then From 6d2e58c12436cddbc092ae42c9e783944548688f Mon Sep 17 00:00:00 2001 From: "Harper, Jason M" Date: Thu, 19 Mar 2026 11:12:52 -0700 Subject: [PATCH 2/2] fix: ensure pipefail in script commands for better error handling - Updated the collapse_perf_data function to use 'set -o pipefail' for both perf_fp_data and perf_dwarf_data commands, improving error detection in the pipeline. - This change ensures that any failure in the command pipeline will be caught, enhancing the robustness of the profiling script. Signed-off-by: Harper, Jason M --- internal/script/scripts.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/script/scripts.go b/internal/script/scripts.go index 18088eb2..72686aa1 100644 --- a/internal/script/scripts.go +++ b/internal/script/scripts.go @@ -1799,14 +1799,14 @@ stop_profiling() { collapse_perf_data() { local dwarf_pid="" fp_pid="" if [ -f perf_fp_data ]; then - ("${PERF_CMD}" script -i perf_fp_data | stackcollapse-perf > perf_fp_folded) & + ( set -o pipefail; "${PERF_CMD}" script -i perf_fp_data | stackcollapse-perf > perf_fp_folded ) & fp_pid=$! else echo "Error: perf_fp_data file not found" >&2 fi if [ "$dual_native_stacks" = "true" ]; then if [ -f perf_dwarf_data ]; then - ("${PERF_CMD}" script -i perf_dwarf_data | stackcollapse-perf > perf_dwarf_folded) & + ( set -o pipefail; "${PERF_CMD}" script -i perf_dwarf_data | stackcollapse-perf > perf_dwarf_folded ) & dwarf_pid=$! else echo "Error: perf_dwarf_data file not found" >&2