All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHSET 0/6] perf annotate-data: Small updates in the data type profiling (v1)
@ 2024-05-02  6:00 Namhyung Kim
  2024-05-02  6:00 ` [PATCH 1/6] perf dwarf-aux: Add die_collect_global_vars() Namhyung Kim
                   ` (6 more replies)
  0 siblings, 7 replies; 14+ messages in thread
From: Namhyung Kim @ 2024-05-02  6:00 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users

Hello,

This is a mix of bug fixes and optimization in the data type profiling.

Firstly it now caches global variables and looks them up by address later.
This will be good for performance as well as improves the success rates
because some variables are defined in a separate file (compile unit) and
has no info in the call site for some reason.

Also it properly checks instructions that use more than one register for
a memory access like x86 SIB addressing.  And check the type of stack
variables correctly and discard constant values (without type info).

Thanks,
Namhyung


Namhyung Kim (6):
  perf dwarf-aux: Add die_collect_global_vars()
  perf annotate-data: Collect global variables in advance
  perf annotate-data: Handle direct global variable access
  perf annotate-data: Check memory access with two registers
  perf annotate-data: Handle multi regs in find_data_type_block()
  perf annotate-data: Check kind of stack variables

 tools/perf/util/annotate-data.c | 157 ++++++++++++++++++++++++++------
 tools/perf/util/dwarf-aux.c     |  62 +++++++++++++
 tools/perf/util/dwarf-aux.h     |   8 ++
 3 files changed, 197 insertions(+), 30 deletions(-)

-- 
2.45.0.rc1.225.g2a3ae87e7f-goog


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/6] perf dwarf-aux: Add die_collect_global_vars()
  2024-05-02  6:00 [PATCHSET 0/6] perf annotate-data: Small updates in the data type profiling (v1) Namhyung Kim
@ 2024-05-02  6:00 ` Namhyung Kim
  2024-05-02  6:00 ` [PATCH 2/6] perf annotate-data: Collect global variables in advance Namhyung Kim
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Namhyung Kim @ 2024-05-02  6:00 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users, Masami Hiramatsu

This function is to search all global variables in the CU.  We want to
have the list of global variables at once and match them later.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/dwarf-aux.c | 62 +++++++++++++++++++++++++++++++++++++
 tools/perf/util/dwarf-aux.h |  8 +++++
 2 files changed, 70 insertions(+)

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 40cfbdfe2d75..c0a492e65388 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -1630,6 +1630,68 @@ void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types)
 
 	die_find_child(sc_die, __die_collect_vars_cb, (void *)var_types, &die_mem);
 }
+
+static int __die_collect_global_vars_cb(Dwarf_Die *die_mem, void *arg)
+{
+	struct die_var_type **var_types = arg;
+	Dwarf_Die type_die;
+	int tag = dwarf_tag(die_mem);
+	Dwarf_Attribute attr;
+	Dwarf_Addr base, start, end;
+	Dwarf_Op *ops;
+	size_t nops;
+	struct die_var_type *vt;
+
+	if (tag != DW_TAG_variable)
+		return DIE_FIND_CB_SIBLING;
+
+	if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL)
+		return DIE_FIND_CB_SIBLING;
+
+	/* Only collect the location with an absolute address. */
+	if (dwarf_getlocations(&attr, 0, &base, &start, &end, &ops, &nops) <= 0)
+		return DIE_FIND_CB_SIBLING;
+
+	if (ops->atom != DW_OP_addr)
+		return DIE_FIND_CB_SIBLING;
+
+	if (!check_allowed_ops(ops, nops))
+		return DIE_FIND_CB_SIBLING;
+
+	if (die_get_real_type(die_mem, &type_die) == NULL)
+		return DIE_FIND_CB_SIBLING;
+
+	vt = malloc(sizeof(*vt));
+	if (vt == NULL)
+		return DIE_FIND_CB_END;
+
+	vt->die_off = dwarf_dieoffset(&type_die);
+	vt->addr = ops->number;
+	vt->reg = -1;
+	vt->offset = 0;
+	vt->next = *var_types;
+	*var_types = vt;
+
+	return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_collect_global_vars - Save all global variables
+ * @cu_die: a CU DIE
+ * @var_types: a pointer to save the resulting list
+ *
+ * Save all global variables in the @cu_die and save them to @var_types.
+ * The @var_types is a singly-linked list containing type and location info.
+ * Actual type can be retrieved using dwarf_offdie() with 'die_off' later.
+ *
+ * Callers should free @var_types.
+ */
+void die_collect_global_vars(Dwarf_Die *cu_die, struct die_var_type **var_types)
+{
+	Dwarf_Die die_mem;
+
+	die_find_child(cu_die, __die_collect_global_vars_cb, (void *)var_types, &die_mem);
+}
 #endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */
 
 #ifdef HAVE_DWARF_CFI_SUPPORT
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index b0f25fbf9668..24446412b869 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -171,6 +171,9 @@ Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr addr,
 /* Save all variables and parameters in this scope */
 void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types);
 
+/* Save all global variables in this CU */
+void die_collect_global_vars(Dwarf_Die *cu_die, struct die_var_type **var_types);
+
 #else /*  HAVE_DWARF_GETLOCATIONS_SUPPORT */
 
 static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused,
@@ -203,6 +206,11 @@ static inline void die_collect_vars(Dwarf_Die *sc_die __maybe_unused,
 {
 }
 
+static inline void die_collect_global_vars(Dwarf_Die *cu_die __maybe_unused,
+					   struct die_var_type **var_types __maybe_unused)
+{
+}
+
 #endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */
 
 #ifdef HAVE_DWARF_CFI_SUPPORT
-- 
2.45.0.rc1.225.g2a3ae87e7f-goog


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/6] perf annotate-data: Collect global variables in advance
  2024-05-02  6:00 [PATCHSET 0/6] perf annotate-data: Small updates in the data type profiling (v1) Namhyung Kim
  2024-05-02  6:00 ` [PATCH 1/6] perf dwarf-aux: Add die_collect_global_vars() Namhyung Kim
@ 2024-05-02  6:00 ` Namhyung Kim
  2024-05-02 13:50   ` Arnaldo Carvalho de Melo
  2024-05-02  6:00 ` [PATCH 3/6] perf annotate-data: Handle direct global variable access Namhyung Kim
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: Namhyung Kim @ 2024-05-02  6:00 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users

Currently it looks up global variables from the current CU using address
and name.  But it sometimes fails to find a variable as the variable can
come from a different CU - but it's still strange it failed to find a
declaration for some reason.

Anyway, it can collect all global variables from all CU once and then
lookup them later on.  This slightly improves the success rate of my
test data set.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/annotate-data.c | 57 +++++++++++++++++++++++++++++++--
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index 12d5faff3b7a..4dd0911904f2 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -28,6 +28,8 @@
 /* register number of the stack pointer */
 #define X86_REG_SP 7
 
+static void delete_var_types(struct die_var_type *var_types);
+
 enum type_state_kind {
 	TSR_KIND_INVALID = 0,
 	TSR_KIND_TYPE,
@@ -557,8 +559,8 @@ static bool global_var__add(struct data_loc_info *dloc, u64 addr,
 	if (gvar == NULL)
 		return false;
 
-	gvar->name = strdup(name);
-	if (gvar->name == NULL) {
+	gvar->name = name ? strdup(name) : NULL;
+	if (name && gvar->name == NULL) {
 		free(gvar);
 		return false;
 	}
@@ -612,6 +614,53 @@ static bool get_global_var_info(struct data_loc_info *dloc, u64 addr,
 	return true;
 }
 
+static void global_var__collect(struct data_loc_info *dloc)
+{
+	Dwarf *dwarf = dloc->di->dbg;
+	Dwarf_Off off, next_off;
+	Dwarf_Die cu_die, type_die;
+	size_t header_size;
+
+	/* Iterate all CU and collect global variables that have no location in a register. */
+	off = 0;
+	while (dwarf_nextcu(dwarf, off, &next_off, &header_size,
+			    NULL, NULL, NULL) == 0) {
+		struct die_var_type *var_types = NULL;
+		struct die_var_type *pos;
+
+		if (dwarf_offdie(dwarf, off + header_size, &cu_die) == NULL) {
+			off = next_off;
+			continue;
+		}
+
+		die_collect_global_vars(&cu_die, &var_types);
+
+		for (pos = var_types; pos; pos = pos->next) {
+			const char *var_name = NULL;
+			int var_offset = 0;
+
+			if (pos->reg != -1)
+				continue;
+
+			if (!dwarf_offdie(dwarf, pos->die_off, &type_die))
+				continue;
+
+			if (!get_global_var_info(dloc, pos->addr, &var_name,
+						 &var_offset))
+				continue;
+
+			if (var_offset != 0)
+				continue;
+
+			global_var__add(dloc, pos->addr, var_name, &type_die);
+		}
+
+		delete_var_types(var_types);
+
+		off = next_off;
+	}
+}
+
 static bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
 				u64 ip, u64 var_addr, int *var_offset,
 				Dwarf_Die *type_die)
@@ -620,8 +669,12 @@ static bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
 	int offset;
 	const char *var_name = NULL;
 	struct global_var_entry *gvar;
+	struct dso *dso = map__dso(dloc->ms->map);
 	Dwarf_Die var_die;
 
+	if (RB_EMPTY_ROOT(&dso->global_vars))
+		global_var__collect(dloc);
+
 	gvar = global_var__find(dloc, var_addr);
 	if (gvar) {
 		if (!dwarf_offdie(dloc->di->dbg, gvar->die_offset, type_die))
-- 
2.45.0.rc1.225.g2a3ae87e7f-goog


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/6] perf annotate-data: Handle direct global variable access
  2024-05-02  6:00 [PATCHSET 0/6] perf annotate-data: Small updates in the data type profiling (v1) Namhyung Kim
  2024-05-02  6:00 ` [PATCH 1/6] perf dwarf-aux: Add die_collect_global_vars() Namhyung Kim
  2024-05-02  6:00 ` [PATCH 2/6] perf annotate-data: Collect global variables in advance Namhyung Kim
@ 2024-05-02  6:00 ` Namhyung Kim
  2024-05-02  6:00 ` [PATCH 4/6] perf annotate-data: Check memory access with two registers Namhyung Kim
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Namhyung Kim @ 2024-05-02  6:00 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users

Like per-cpu base offset array, sometimes it accesses the global
variable directly using the offset.  Allow this type of instructions as
long as it finds a global variable for the address.

  movslq  %edi, %rcx
  mov     -0x7dc94ae0(,%rcx,8), %rcx   <<<--- here

As %rcx has a valid type (i.e. array index) from the first instruction,
it will be checked by the first case in check_matching_type().  But as
it's not a pointer type, the match will fail.  But in this case, it
should check if it accesses the kernel global array variable.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/annotate-data.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index 4dd0911904f2..f1e52a531563 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -1256,14 +1256,19 @@ static int check_matching_type(struct type_state *state,
 	if (state->regs[reg].ok && state->regs[reg].kind == TSR_KIND_TYPE) {
 		int tag = dwarf_tag(&state->regs[reg].type);
 
-		pr_debug_dtp("\n");
-
 		/*
 		 * Normal registers should hold a pointer (or array) to
 		 * dereference a memory location.
 		 */
-		if (tag != DW_TAG_pointer_type && tag != DW_TAG_array_type)
+		if (tag != DW_TAG_pointer_type && tag != DW_TAG_array_type) {
+			if (dloc->op->offset < 0 && reg != state->stack_reg)
+				goto check_kernel;
+
+			pr_debug_dtp("\n");
 			return -1;
+		}
+
+		pr_debug_dtp("\n");
 
 		/* Remove the pointer and get the target type */
 		if (die_get_real_type(&state->regs[reg].type, type_die) == NULL)
@@ -1376,12 +1381,14 @@ static int check_matching_type(struct type_state *state,
 		return -1;
 	}
 
-	if (map__dso(dloc->ms->map)->kernel && arch__is(dloc->arch, "x86")) {
+check_kernel:
+	if (map__dso(dloc->ms->map)->kernel) {
 		u64 addr;
 		int offset;
 
 		/* Direct this-cpu access like "%gs:0x34740" */
-		if (dloc->op->segment == INSN_SEG_X86_GS && dloc->op->imm) {
+		if (dloc->op->segment == INSN_SEG_X86_GS && dloc->op->imm &&
+		    arch__is(dloc->arch, "x86")) {
 			pr_debug_dtp(" this-cpu var\n");
 
 			addr = dloc->op->offset;
@@ -1394,17 +1401,13 @@ static int check_matching_type(struct type_state *state,
 			return -1;
 		}
 
-		/* Access to per-cpu base like "-0x7dcf0500(,%rdx,8)" */
+		/* Access to global variable like "-0x7dcf0500(,%rdx,8)" */
 		if (dloc->op->offset < 0 && reg != state->stack_reg) {
-			const char *var_name = NULL;
-
 			addr = (s64) dloc->op->offset;
 
-			if (get_global_var_info(dloc, addr, &var_name, &offset) &&
-			    !strcmp(var_name, "__per_cpu_offset") && offset == 0 &&
-			    get_global_var_type(cu_die, dloc, dloc->ip, addr,
+			if (get_global_var_type(cu_die, dloc, dloc->ip, addr,
 						&offset, type_die)) {
-				pr_debug_dtp(" percpu base\n");
+				pr_debug_dtp(" global var\n");
 
 				dloc->type_offset = offset;
 				return 1;
-- 
2.45.0.rc1.225.g2a3ae87e7f-goog


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/6] perf annotate-data: Check memory access with two registers
  2024-05-02  6:00 [PATCHSET 0/6] perf annotate-data: Small updates in the data type profiling (v1) Namhyung Kim
                   ` (2 preceding siblings ...)
  2024-05-02  6:00 ` [PATCH 3/6] perf annotate-data: Handle direct global variable access Namhyung Kim
@ 2024-05-02  6:00 ` Namhyung Kim
  2024-05-02 14:05   ` Arnaldo Carvalho de Melo
  2024-05-02  6:00 ` [PATCH 5/6] perf annotate-data: Handle multi regs in find_data_type_block() Namhyung Kim
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: Namhyung Kim @ 2024-05-02  6:00 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users

The following instruction pattern is used to access a global variable.

  mov     $0x231c0, %rax
  movsql  %edi, %rcx
  mov     -0x7dc94ae0(,%rcx,8), %rcx
  cmpl    $0x0, 0xa60(%rcx,%rax,1)     <<<--- here

The first instruction set the address of the per-cpu variable (here, it
is 'runqueus' of struct rq).  The second instruction seems like a cpu
number of the per-cpu base.  The third instruction get the base offset
of per-cpu area for that cpu.  The last instruction compares the value
of the per-cpu variable at the offset of 0xa60.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/annotate-data.c | 44 +++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index f1e52a531563..245e3ef3e2ff 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -1031,22 +1031,37 @@ static void update_insn_state_x86(struct type_state *state,
 		else if (has_reg_type(state, sreg) &&
 			 state->regs[sreg].kind == TSR_KIND_PERCPU_BASE) {
 			u64 ip = dloc->ms->sym->start + dl->al.offset;
+			u64 var_addr = src->offset;
 			int offset;
 
+			if (src->multi_regs) {
+				int reg2 = (sreg == src->reg1) ? src->reg2 : src->reg1;
+
+				if (has_reg_type(state, reg2) && state->regs[reg2].ok &&
+				    state->regs[reg2].kind == TSR_KIND_CONST)
+					var_addr += state->regs[reg2].imm_value;
+			}
+
 			/*
 			 * In kernel, %gs points to a per-cpu region for the
 			 * current CPU.  Access with a constant offset should
 			 * be treated as a global variable access.
 			 */
-			if (get_global_var_type(cu_die, dloc, ip, src->offset,
+			if (get_global_var_type(cu_die, dloc, ip, var_addr,
 						&offset, &type_die) &&
 			    die_get_member_type(&type_die, offset, &type_die)) {
 				tsr->type = type_die;
 				tsr->kind = TSR_KIND_TYPE;
 				tsr->ok = true;
 
-				pr_debug_dtp("mov [%x] percpu %#x(reg%d) -> reg%d",
-					     insn_offset, src->offset, sreg, dst->reg1);
+				if (src->multi_regs) {
+					pr_debug_dtp("mov [%x] percpu %#x(reg%d,reg%d) -> reg%d",
+						     insn_offset, src->offset, src->reg1,
+						     src->reg2, dst->reg1);
+				} else {
+					pr_debug_dtp("mov [%x] percpu %#x(reg%d) -> reg%d",
+						     insn_offset, src->offset, sreg, dst->reg1);
+				}
 				pr_debug_type_name(&tsr->type, tsr->kind);
 			} else {
 				tsr->ok = false;
@@ -1340,6 +1355,17 @@ static int check_matching_type(struct type_state *state,
 
 		pr_debug_dtp(" percpu var\n");
 
+		if (dloc->op->multi_regs) {
+			int reg2 = dloc->op->reg2;
+
+			if (dloc->op->reg2 == reg)
+				reg2 = dloc->op->reg1;
+
+			if (has_reg_type(state, reg2) && state->regs[reg2].ok &&
+			    state->regs[reg2].kind == TSR_KIND_CONST)
+				var_addr += state->regs[reg2].imm_value;
+		}
+
 		if (get_global_var_type(cu_die, dloc, dloc->ip, var_addr,
 					&var_offset, type_die)) {
 			dloc->type_offset = var_offset;
@@ -1527,8 +1553,16 @@ static int find_data_type_block(struct data_loc_info *dloc, int reg,
 		found = find_data_type_insn(dloc, reg, &basic_blocks, var_types,
 					    cu_die, type_die);
 		if (found > 0) {
-			pr_debug_dtp("found by insn track: %#x(reg%d) type-offset=%#x\n",
-				     dloc->op->offset, reg, dloc->type_offset);
+			char buf[64];
+
+			if (dloc->op->multi_regs)
+				snprintf(buf, sizeof(buf), "reg%d, reg%d",
+					 dloc->op->reg1, dloc->op->reg2);
+			else
+				snprintf(buf, sizeof(buf), "reg%d", dloc->op->reg1);
+
+			pr_debug_dtp("found by insn track: %#x(%s) type-offset=%#x\n",
+				     dloc->op->offset, buf, dloc->type_offset);
 			pr_debug_type_name(type_die, TSR_KIND_TYPE);
 			ret = 0;
 			break;
-- 
2.45.0.rc1.225.g2a3ae87e7f-goog


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 5/6] perf annotate-data: Handle multi regs in find_data_type_block()
  2024-05-02  6:00 [PATCHSET 0/6] perf annotate-data: Small updates in the data type profiling (v1) Namhyung Kim
                   ` (3 preceding siblings ...)
  2024-05-02  6:00 ` [PATCH 4/6] perf annotate-data: Check memory access with two registers Namhyung Kim
@ 2024-05-02  6:00 ` Namhyung Kim
  2024-05-02  6:00 ` [PATCH 6/6] perf annotate-data: Check kind of stack variables Namhyung Kim
  2024-05-02 14:25 ` [PATCHSET 0/6] perf annotate-data: Small updates in the data type profiling (v1) Arnaldo Carvalho de Melo
  6 siblings, 0 replies; 14+ messages in thread
From: Namhyung Kim @ 2024-05-02  6:00 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users

The instruction tracking should be the same for the both registers.
Just do it once and compare the result with multi regs as with the
previous patches.  Then we don't need to call find_data_type_block()
separately for each reg.  Let's remove the 'reg' argument from the
relevant functions.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/annotate-data.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index 245e3ef3e2ff..68fe7999f033 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -1258,11 +1258,12 @@ static void setup_stack_canary(struct data_loc_info *dloc)
  * are similar to global variables and no additional info is needed.
  */
 static int check_matching_type(struct type_state *state,
-			       struct data_loc_info *dloc, int reg,
+			       struct data_loc_info *dloc,
 			       Dwarf_Die *cu_die, Dwarf_Die *type_die)
 {
 	Dwarf_Word size;
 	u32 insn_offset = dloc->ip - dloc->ms->sym->start;
+	int reg = dloc->op->reg1;
 
 	pr_debug_dtp("chk [%x] reg%d offset=%#x ok=%d kind=%d",
 		     insn_offset, reg, dloc->op->offset,
@@ -1448,7 +1449,7 @@ static int check_matching_type(struct type_state *state,
 }
 
 /* Iterate instructions in basic blocks and update type table */
-static int find_data_type_insn(struct data_loc_info *dloc, int reg,
+static int find_data_type_insn(struct data_loc_info *dloc,
 			       struct list_head *basic_blocks,
 			       struct die_var_type *var_types,
 			       Dwarf_Die *cu_die, Dwarf_Die *type_die)
@@ -1481,7 +1482,7 @@ static int find_data_type_insn(struct data_loc_info *dloc, int reg,
 			update_var_state(&state, dloc, addr, dl->al.offset, var_types);
 
 			if (this_ip == dloc->ip) {
-				ret = check_matching_type(&state, dloc, reg,
+				ret = check_matching_type(&state, dloc,
 							  cu_die, type_die);
 				goto out;
 			}
@@ -1502,7 +1503,7 @@ static int find_data_type_insn(struct data_loc_info *dloc, int reg,
  * Construct a list of basic blocks for each scope with variables and try to find
  * the data type by updating a type state table through instructions.
  */
-static int find_data_type_block(struct data_loc_info *dloc, int reg,
+static int find_data_type_block(struct data_loc_info *dloc,
 				Dwarf_Die *cu_die, Dwarf_Die *scopes,
 				int nr_scopes, Dwarf_Die *type_die)
 {
@@ -1550,7 +1551,7 @@ static int find_data_type_block(struct data_loc_info *dloc, int reg,
 		fixup_var_address(var_types, start);
 
 		/* Find from start of this scope to the target instruction */
-		found = find_data_type_insn(dloc, reg, &basic_blocks, var_types,
+		found = find_data_type_insn(dloc, &basic_blocks, var_types,
 					    cu_die, type_die);
 		if (found > 0) {
 			char buf[64];
@@ -1716,8 +1717,13 @@ static int find_data_type_die(struct data_loc_info *dloc, Dwarf_Die *type_die)
 		goto out;
 	}
 
+	if (loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) {
+		reg = loc->reg2;
+		goto retry;
+	}
+
 	if (reg != DWARF_REG_PC) {
-		ret = find_data_type_block(dloc, reg, &cu_die, scopes,
+		ret = find_data_type_block(dloc, &cu_die, scopes,
 					   nr_scopes, type_die);
 		if (ret == 0) {
 			ann_data_stat.insn_track++;
@@ -1725,11 +1731,6 @@ static int find_data_type_die(struct data_loc_info *dloc, Dwarf_Die *type_die)
 		}
 	}
 
-	if (loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) {
-		reg = loc->reg2;
-		goto retry;
-	}
-
 	if (ret < 0) {
 		pr_debug_dtp("no variable found\n");
 		ann_data_stat.no_var++;
-- 
2.45.0.rc1.225.g2a3ae87e7f-goog


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 6/6] perf annotate-data: Check kind of stack variables
  2024-05-02  6:00 [PATCHSET 0/6] perf annotate-data: Small updates in the data type profiling (v1) Namhyung Kim
                   ` (4 preceding siblings ...)
  2024-05-02  6:00 ` [PATCH 5/6] perf annotate-data: Handle multi regs in find_data_type_block() Namhyung Kim
@ 2024-05-02  6:00 ` Namhyung Kim
  2024-05-02 14:25 ` [PATCHSET 0/6] perf annotate-data: Small updates in the data type profiling (v1) Arnaldo Carvalho de Melo
  6 siblings, 0 replies; 14+ messages in thread
From: Namhyung Kim @ 2024-05-02  6:00 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, Kan Liang
  Cc: Jiri Olsa, Adrian Hunter, Peter Zijlstra, Ingo Molnar, LKML,
	linux-perf-users

I sometimes see ("unknown type") in the result and it was because it
didn't check the type of stack variables properly during the instruction
tracking.  The stack can carry constant values (without type info) and
if the target instruction is accessing the stack location, it resulted
in the "unknown type".

Maybe we could pick one of integer types for the constant, but it
doesn't really mean anything useful.  Let's just drop the stack slot if
it doesn't have a valid type info.

Here's an example how it got the unknown type.
Note that 0xffffff48 = -0xb8.
  -----------------------------------------------------------
  find data type for 0xffffff48(reg6) at ...
  CU for ...
  frame base: cfa=0 fbreg=6
  scope: [2/2] (die:11cb97f)
  bb: [37 - 3a]
  var [37] reg15 type='int' size=0x4 (die:0x1180633)
  bb: [40 - 4b]
  mov [40] imm=0x1 -> reg13
  var [45] reg8 type='sigset_t*' size=0x8 (die:0x11a39ee)
  mov [45] imm=0x1 -> reg2                     <---  here reg2 has a constant
  bb: [215 - 237]
  mov [218] reg2 -> -0xb8(stack) constant      <---  and save it to the stack
  mov [225] reg13 -> -0xc4(stack) constant
  call [22f] find_task_by_vgpid
  call [22f] return -> reg0 type='struct task_struct*' size=0x8 (die:0x11881e8)
  bb: [5c8 - 5cf]
  bb: [2fb - 302]
  mov [2fb] -0xc4(stack) -> reg13 constant
  bb: [13b - 14d]
  mov [143] 0xd50(reg3) -> reg5 type='struct task_struct*' size=0x8 (die:0xa31f3c)
  bb: [153 - 153]
  chk [153] reg6 offset=0xffffff48 ok=0 kind=0 fbreg    <--- access here
  found by insn track: 0xffffff48(reg6) type-offset=0
   type='G<EF>^K<F6><AF>U' size=0 (die:0xffffffffffffffff)

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/annotate-data.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index 68fe7999f033..2c98813f95cd 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -1314,6 +1314,9 @@ static int check_matching_type(struct type_state *state,
 			return -1;
 		}
 
+		if (stack->kind != TSR_KIND_TYPE)
+			return 0;
+
 		*type_die = stack->type;
 		/* Update the type offset from the start of slot */
 		dloc->type_offset -= stack->offset;
@@ -1343,6 +1346,9 @@ static int check_matching_type(struct type_state *state,
 			return -1;
 		}
 
+		if (stack->kind != TSR_KIND_TYPE)
+			return 0;
+
 		*type_die = stack->type;
 		/* Update the type offset from the start of slot */
 		dloc->type_offset -= fboff + stack->offset;
-- 
2.45.0.rc1.225.g2a3ae87e7f-goog


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/6] perf annotate-data: Collect global variables in advance
  2024-05-02  6:00 ` [PATCH 2/6] perf annotate-data: Collect global variables in advance Namhyung Kim
@ 2024-05-02 13:50   ` Arnaldo Carvalho de Melo
  2024-05-02 18:23     ` Namhyung Kim
  0 siblings, 1 reply; 14+ messages in thread
From: Arnaldo Carvalho de Melo @ 2024-05-02 13:50 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
	Ingo Molnar, LKML, linux-perf-users

On Wed, May 01, 2024 at 11:00:07PM -0700, Namhyung Kim wrote:
> Currently it looks up global variables from the current CU using address
> and name.  But it sometimes fails to find a variable as the variable can
> come from a different CU - but it's still strange it failed to find a
> declaration for some reason.
> 
> Anyway, it can collect all global variables from all CU once and then
> lookup them later on.  This slightly improves the success rate of my
> test data set.

It would be interesting you could provide examples from your test data
set, i.e. after this patch these extra global variables were considered
in the test results, with some tool output, etc.

This would help intersested parties to reproduce your results,
validate the end result, etc.

- Arnaldo
 
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/util/annotate-data.c | 57 +++++++++++++++++++++++++++++++--
>  1 file changed, 55 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
> index 12d5faff3b7a..4dd0911904f2 100644
> --- a/tools/perf/util/annotate-data.c
> +++ b/tools/perf/util/annotate-data.c
> @@ -28,6 +28,8 @@
>  /* register number of the stack pointer */
>  #define X86_REG_SP 7
>  
> +static void delete_var_types(struct die_var_type *var_types);
> +
>  enum type_state_kind {
>  	TSR_KIND_INVALID = 0,
>  	TSR_KIND_TYPE,
> @@ -557,8 +559,8 @@ static bool global_var__add(struct data_loc_info *dloc, u64 addr,
>  	if (gvar == NULL)
>  		return false;
>  
> -	gvar->name = strdup(name);
> -	if (gvar->name == NULL) {
> +	gvar->name = name ? strdup(name) : NULL;
> +	if (name && gvar->name == NULL) {
>  		free(gvar);
>  		return false;
>  	}
> @@ -612,6 +614,53 @@ static bool get_global_var_info(struct data_loc_info *dloc, u64 addr,
>  	return true;
>  }
>  
> +static void global_var__collect(struct data_loc_info *dloc)
> +{
> +	Dwarf *dwarf = dloc->di->dbg;
> +	Dwarf_Off off, next_off;
> +	Dwarf_Die cu_die, type_die;
> +	size_t header_size;
> +
> +	/* Iterate all CU and collect global variables that have no location in a register. */
> +	off = 0;
> +	while (dwarf_nextcu(dwarf, off, &next_off, &header_size,
> +			    NULL, NULL, NULL) == 0) {
> +		struct die_var_type *var_types = NULL;
> +		struct die_var_type *pos;
> +
> +		if (dwarf_offdie(dwarf, off + header_size, &cu_die) == NULL) {
> +			off = next_off;
> +			continue;
> +		}
> +
> +		die_collect_global_vars(&cu_die, &var_types);
> +
> +		for (pos = var_types; pos; pos = pos->next) {
> +			const char *var_name = NULL;
> +			int var_offset = 0;
> +
> +			if (pos->reg != -1)
> +				continue;
> +
> +			if (!dwarf_offdie(dwarf, pos->die_off, &type_die))
> +				continue;
> +
> +			if (!get_global_var_info(dloc, pos->addr, &var_name,
> +						 &var_offset))
> +				continue;
> +
> +			if (var_offset != 0)
> +				continue;
> +
> +			global_var__add(dloc, pos->addr, var_name, &type_die);
> +		}
> +
> +		delete_var_types(var_types);
> +
> +		off = next_off;
> +	}
> +}
> +
>  static bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
>  				u64 ip, u64 var_addr, int *var_offset,
>  				Dwarf_Die *type_die)
> @@ -620,8 +669,12 @@ static bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
>  	int offset;
>  	const char *var_name = NULL;
>  	struct global_var_entry *gvar;
> +	struct dso *dso = map__dso(dloc->ms->map);
>  	Dwarf_Die var_die;
>  
> +	if (RB_EMPTY_ROOT(&dso->global_vars))
> +		global_var__collect(dloc);
> +
>  	gvar = global_var__find(dloc, var_addr);
>  	if (gvar) {
>  		if (!dwarf_offdie(dloc->di->dbg, gvar->die_offset, type_die))
> -- 
> 2.45.0.rc1.225.g2a3ae87e7f-goog

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/6] perf annotate-data: Check memory access with two registers
  2024-05-02  6:00 ` [PATCH 4/6] perf annotate-data: Check memory access with two registers Namhyung Kim
@ 2024-05-02 14:05   ` Arnaldo Carvalho de Melo
  2024-05-02 18:14     ` Namhyung Kim
  0 siblings, 1 reply; 14+ messages in thread
From: Arnaldo Carvalho de Melo @ 2024-05-02 14:05 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
	Ingo Molnar, LKML, linux-perf-users

On Wed, May 01, 2024 at 11:00:09PM -0700, Namhyung Kim wrote:
> The following instruction pattern is used to access a global variable.
> 
>   mov     $0x231c0, %rax
>   movsql  %edi, %rcx
>   mov     -0x7dc94ae0(,%rcx,8), %rcx
>   cmpl    $0x0, 0xa60(%rcx,%rax,1)     <<<--- here
> 
> The first instruction set the address of the per-cpu variable (here, it
> is 'runqueus' of struct rq).  The second instruction seems like a cpu

You mean 'runqueues', i.e. this one:

kernel/sched/core.c
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);

?

But that 0xa60 would be in an alignment hole, at least in:

$ pahole --hex rq | egrep 0xa40 -A12
	struct mm_struct *         prev_mm;              /* 0xa40   0x8 */
	unsigned int               clock_update_flags;   /* 0xa48   0x4 */

	/* XXX 4 bytes hole, try to pack */

	u64                        clock;                /* 0xa50   0x8 */

	/* XXX 40 bytes hole, try to pack */

	/* --- cacheline 42 boundary (2688 bytes) --- */
	u64                        clock_task __attribute__((__aligned__(64))); /* 0xa80   0x8 */
	u64                        clock_pelt;           /* 0xa88   0x8 */
	long unsigned int          lost_idle_time;       /* 0xa90   0x8 */
$ uname -a
Linux toolbox 6.7.11-200.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Wed Mar 27 16:50:39 UTC 2024 x86_64 GNU/Linux
$

The paragraph then reads:

----
The first instruction set the address of the per-cpu variable (here, it
is 'runqueues' of type 'struct rq').  The second instruction seems like
a cpu number of the per-cpu base.  The third instruction get the base
offset of per-cpu area for that cpu.  The last instruction compares the
value of the per-cpu variable at the offset of 0xa60.
----

Ok?

> number of the per-cpu base.  The third instruction get the base offset
> of per-cpu area for that cpu.  The last instruction compares the value
> of the per-cpu variable at the offset of 0xa60.
> 
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/perf/util/annotate-data.c | 44 +++++++++++++++++++++++++++++----
>  1 file changed, 39 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
> index f1e52a531563..245e3ef3e2ff 100644
> --- a/tools/perf/util/annotate-data.c
> +++ b/tools/perf/util/annotate-data.c
> @@ -1031,22 +1031,37 @@ static void update_insn_state_x86(struct type_state *state,
>  		else if (has_reg_type(state, sreg) &&
>  			 state->regs[sreg].kind == TSR_KIND_PERCPU_BASE) {
>  			u64 ip = dloc->ms->sym->start + dl->al.offset;
> +			u64 var_addr = src->offset;
>  			int offset;
>  
> +			if (src->multi_regs) {
> +				int reg2 = (sreg == src->reg1) ? src->reg2 : src->reg1;
> +
> +				if (has_reg_type(state, reg2) && state->regs[reg2].ok &&
> +				    state->regs[reg2].kind == TSR_KIND_CONST)
> +					var_addr += state->regs[reg2].imm_value;
> +			}
> +
>  			/*
>  			 * In kernel, %gs points to a per-cpu region for the
>  			 * current CPU.  Access with a constant offset should
>  			 * be treated as a global variable access.
>  			 */
> -			if (get_global_var_type(cu_die, dloc, ip, src->offset,
> +			if (get_global_var_type(cu_die, dloc, ip, var_addr,
>  						&offset, &type_die) &&
>  			    die_get_member_type(&type_die, offset, &type_die)) {
>  				tsr->type = type_die;
>  				tsr->kind = TSR_KIND_TYPE;
>  				tsr->ok = true;
>  
> -				pr_debug_dtp("mov [%x] percpu %#x(reg%d) -> reg%d",
> -					     insn_offset, src->offset, sreg, dst->reg1);
> +				if (src->multi_regs) {
> +					pr_debug_dtp("mov [%x] percpu %#x(reg%d,reg%d) -> reg%d",
> +						     insn_offset, src->offset, src->reg1,
> +						     src->reg2, dst->reg1);
> +				} else {
> +					pr_debug_dtp("mov [%x] percpu %#x(reg%d) -> reg%d",
> +						     insn_offset, src->offset, sreg, dst->reg1);
> +				}
>  				pr_debug_type_name(&tsr->type, tsr->kind);
>  			} else {
>  				tsr->ok = false;
> @@ -1340,6 +1355,17 @@ static int check_matching_type(struct type_state *state,
>  
>  		pr_debug_dtp(" percpu var\n");
>  
> +		if (dloc->op->multi_regs) {
> +			int reg2 = dloc->op->reg2;
> +
> +			if (dloc->op->reg2 == reg)
> +				reg2 = dloc->op->reg1;
> +
> +			if (has_reg_type(state, reg2) && state->regs[reg2].ok &&
> +			    state->regs[reg2].kind == TSR_KIND_CONST)
> +				var_addr += state->regs[reg2].imm_value;
> +		}
> +
>  		if (get_global_var_type(cu_die, dloc, dloc->ip, var_addr,
>  					&var_offset, type_die)) {
>  			dloc->type_offset = var_offset;
> @@ -1527,8 +1553,16 @@ static int find_data_type_block(struct data_loc_info *dloc, int reg,
>  		found = find_data_type_insn(dloc, reg, &basic_blocks, var_types,
>  					    cu_die, type_die);
>  		if (found > 0) {
> -			pr_debug_dtp("found by insn track: %#x(reg%d) type-offset=%#x\n",
> -				     dloc->op->offset, reg, dloc->type_offset);
> +			char buf[64];
> +
> +			if (dloc->op->multi_regs)
> +				snprintf(buf, sizeof(buf), "reg%d, reg%d",
> +					 dloc->op->reg1, dloc->op->reg2);
> +			else
> +				snprintf(buf, sizeof(buf), "reg%d", dloc->op->reg1);
> +
> +			pr_debug_dtp("found by insn track: %#x(%s) type-offset=%#x\n",
> +				     dloc->op->offset, buf, dloc->type_offset);
>  			pr_debug_type_name(type_die, TSR_KIND_TYPE);
>  			ret = 0;
>  			break;
> -- 
> 2.45.0.rc1.225.g2a3ae87e7f-goog
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCHSET 0/6] perf annotate-data: Small updates in the data type profiling (v1)
  2024-05-02  6:00 [PATCHSET 0/6] perf annotate-data: Small updates in the data type profiling (v1) Namhyung Kim
                   ` (5 preceding siblings ...)
  2024-05-02  6:00 ` [PATCH 6/6] perf annotate-data: Check kind of stack variables Namhyung Kim
@ 2024-05-02 14:25 ` Arnaldo Carvalho de Melo
  6 siblings, 0 replies; 14+ messages in thread
From: Arnaldo Carvalho de Melo @ 2024-05-02 14:25 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
	Ingo Molnar, LKML, linux-perf-users

On Wed, May 01, 2024 at 11:00:05PM -0700, Namhyung Kim wrote:
> Hello,
> 
> This is a mix of bug fixes and optimization in the data type profiling.
> 
> Firstly it now caches global variables and looks them up by address later.
> This will be good for performance as well as improves the success rates
> because some variables are defined in a separate file (compile unit) and
> has no info in the call site for some reason.
> 
> Also it properly checks instructions that use more than one register for
> a memory access like x86 SIB addressing.  And check the type of stack
> variables correctly and discard constant values (without type info).

Applied locally, doing build tests.

- Arnaldo

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/6] perf annotate-data: Check memory access with two registers
  2024-05-02 14:05   ` Arnaldo Carvalho de Melo
@ 2024-05-02 18:14     ` Namhyung Kim
  2024-05-04 18:26       ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 14+ messages in thread
From: Namhyung Kim @ 2024-05-02 18:14 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
	Ingo Molnar, LKML, linux-perf-users

On Thu, May 2, 2024 at 7:05 AM Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
>
> On Wed, May 01, 2024 at 11:00:09PM -0700, Namhyung Kim wrote:
> > The following instruction pattern is used to access a global variable.
> >
> >   mov     $0x231c0, %rax
> >   movsql  %edi, %rcx
> >   mov     -0x7dc94ae0(,%rcx,8), %rcx
> >   cmpl    $0x0, 0xa60(%rcx,%rax,1)     <<<--- here
> >
> > The first instruction set the address of the per-cpu variable (here, it
> > is 'runqueus' of struct rq).  The second instruction seems like a cpu
>
> You mean 'runqueues', i.e. this one:
>
> kernel/sched/core.c
> DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
>
> ?

Right, sorry for the typo.

>
> But that 0xa60 would be in an alignment hole, at least in:
>
> $ pahole --hex rq | egrep 0xa40 -A12
>         struct mm_struct *         prev_mm;              /* 0xa40   0x8 */
>         unsigned int               clock_update_flags;   /* 0xa48   0x4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         u64                        clock;                /* 0xa50   0x8 */
>
>         /* XXX 40 bytes hole, try to pack */
>
>         /* --- cacheline 42 boundary (2688 bytes) --- */
>         u64                        clock_task __attribute__((__aligned__(64))); /* 0xa80   0x8 */
>         u64                        clock_pelt;           /* 0xa88   0x8 */
>         long unsigned int          lost_idle_time;       /* 0xa90   0x8 */
> $ uname -a
> Linux toolbox 6.7.11-200.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Wed Mar 27 16:50:39 UTC 2024 x86_64 GNU/Linux
> $

This would be different on kernel version, config and
other changes like backports or local modifications.

On my system, it was cpu_stop_work.arg.

$ pahole --hex rq | grep 0xa40 -C1
    /* --- cacheline 41 boundary (2624 bytes) --- */
    struct cpu_stop_work       active_balance_work;  /* 0xa40  0x30 */
    int                        cpu;                  /* 0xa70   0x4 */

$ pahole --hex cpu_stop_work
struct cpu_stop_work {
    struct list_head           list;                 /*     0  0x10 */
    cpu_stop_fn_t              fn;                   /*  0x10   0x8 */
    long unsigned int          caller;               /*  0x18   0x8 */
    void *                     arg;                  /*  0x20   0x8 */
    struct cpu_stop_done *     done;                 /*  0x28   0x8 */

    /* size: 48, cachelines: 1, members: 5 */
    /* last cacheline: 48 bytes */
};


>
> The paragraph then reads:
>
> ----
> The first instruction set the address of the per-cpu variable (here, it
> is 'runqueues' of type 'struct rq').  The second instruction seems like
> a cpu number of the per-cpu base.  The third instruction get the base
> offset of per-cpu area for that cpu.  The last instruction compares the
> value of the per-cpu variable at the offset of 0xa60.
> ----
>
> Ok?

Yep, looks good.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/6] perf annotate-data: Collect global variables in advance
  2024-05-02 13:50   ` Arnaldo Carvalho de Melo
@ 2024-05-02 18:23     ` Namhyung Kim
  2024-05-02 23:28       ` Namhyung Kim
  0 siblings, 1 reply; 14+ messages in thread
From: Namhyung Kim @ 2024-05-02 18:23 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
	Ingo Molnar, LKML, linux-perf-users

On Thu, May 2, 2024 at 6:50 AM Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
>
> On Wed, May 01, 2024 at 11:00:07PM -0700, Namhyung Kim wrote:
> > Currently it looks up global variables from the current CU using address
> > and name.  But it sometimes fails to find a variable as the variable can
> > come from a different CU - but it's still strange it failed to find a
> > declaration for some reason.
> >
> > Anyway, it can collect all global variables from all CU once and then
> > lookup them later on.  This slightly improves the success rate of my
> > test data set.
>
> It would be interesting you could provide examples from your test data
> set, i.e. after this patch these extra global variables were considered
> in the test results, with some tool output, etc.
>
> This would help intersested parties to reproduce your results,
> validate the end result, etc.

Hmm.. ok.  I'll try to find public data that I can share.
But it's just a perf data file recorded with perf mem.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/6] perf annotate-data: Collect global variables in advance
  2024-05-02 18:23     ` Namhyung Kim
@ 2024-05-02 23:28       ` Namhyung Kim
  0 siblings, 0 replies; 14+ messages in thread
From: Namhyung Kim @ 2024-05-02 23:28 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
	Ingo Molnar, LKML, linux-perf-users

On Thu, May 2, 2024 at 11:23 AM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Thu, May 2, 2024 at 6:50 AM Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
> >
> > On Wed, May 01, 2024 at 11:00:07PM -0700, Namhyung Kim wrote:
> > > Currently it looks up global variables from the current CU using address
> > > and name.  But it sometimes fails to find a variable as the variable can
> > > come from a different CU - but it's still strange it failed to find a
> > > declaration for some reason.
> > >
> > > Anyway, it can collect all global variables from all CU once and then
> > > lookup them later on.  This slightly improves the success rate of my
> > > test data set.
> >
> > It would be interesting you could provide examples from your test data
> > set, i.e. after this patch these extra global variables were considered
> > in the test results, with some tool output, etc.

Here's the example:

Before
-----------------------------------------------------------
find data type for 0x6014(PC) at entry_SYSCALL_64+0x7
CU for arch/x86/entry/entry_64.S (die:0x85e1d)
no variable found

After
-----------------------------------------------------------
find data type for 0x6014(PC) at entry_SYSCALL_64+0x7
CU for arch/x86/entry/entry_64.S (die:0x85e1d)
found by addr=0x6014 type_offset=0x14
 type='struct tss_struct' size=0x5000 (die:0x74dbe1)

It was asm code so it doesn't have the type info in the CU.
With this change it can see the type (from a different CU).
It seems we have a per-cpu variable called cpu_tss_rw at
address 0x6000.

  $ nm vmlinux | grep 6000 | grep tss
  0000000000006000 D cpu_tss_rw

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/6] perf annotate-data: Check memory access with two registers
  2024-05-02 18:14     ` Namhyung Kim
@ 2024-05-04 18:26       ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 14+ messages in thread
From: Arnaldo Carvalho de Melo @ 2024-05-04 18:26 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Ian Rogers, Kan Liang, Jiri Olsa, Adrian Hunter, Peter Zijlstra,
	Ingo Molnar, LKML, linux-perf-users

On Thu, May 02, 2024 at 11:14:50AM -0700, Namhyung Kim wrote:
> On Thu, May 2, 2024 at 7:05 AM Arnaldo Carvalho de Melo <acme@kernel.org> wrote:
> >
> > On Wed, May 01, 2024 at 11:00:09PM -0700, Namhyung Kim wrote:
> > > The following instruction pattern is used to access a global variable.
> > >
> > >   mov     $0x231c0, %rax
> > >   movsql  %edi, %rcx
> > >   mov     -0x7dc94ae0(,%rcx,8), %rcx
> > >   cmpl    $0x0, 0xa60(%rcx,%rax,1)     <<<--- here
> > >
> > > The first instruction set the address of the per-cpu variable (here, it
> > > is 'runqueus' of struct rq).  The second instruction seems like a cpu
> >
> > You mean 'runqueues', i.e. this one:
> >
> > kernel/sched/core.c
> > DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
> >
> > ?
> 
> Right, sorry for the typo.
> 
> >
> > But that 0xa60 would be in an alignment hole, at least in:
> >
> > $ pahole --hex rq | egrep 0xa40 -A12
> >         struct mm_struct *         prev_mm;              /* 0xa40   0x8 */
> >         unsigned int               clock_update_flags;   /* 0xa48   0x4 */
> >
> >         /* XXX 4 bytes hole, try to pack */
> >
> >         u64                        clock;                /* 0xa50   0x8 */
> >
> >         /* XXX 40 bytes hole, try to pack */
> >
> >         /* --- cacheline 42 boundary (2688 bytes) --- */
> >         u64                        clock_task __attribute__((__aligned__(64))); /* 0xa80   0x8 */
> >         u64                        clock_pelt;           /* 0xa88   0x8 */
> >         long unsigned int          lost_idle_time;       /* 0xa90   0x8 */
> > $ uname -a
> > Linux toolbox 6.7.11-200.fc39.x86_64 #1 SMP PREEMPT_DYNAMIC Wed Mar 27 16:50:39 UTC 2024 x86_64 GNU/Linux
> > $
> 
> This would be different on kernel version, config and
> other changes like backports or local modifications.
> 
> On my system, it was cpu_stop_work.arg.

Sure, so please include the pahole output for the data that lead you to
the conclusions in the explanation for the results obtained, so that we
can have a better mental map of all the pieces and thus get convinced of
the results and have a way to try to reproduce it in our systems.

In the future we will be grateful to this effort when looking back at
these patches :-)

Thanks for all your work in these features!

- Arnaldo
 
> $ pahole --hex rq | grep 0xa40 -C1
>     /* --- cacheline 41 boundary (2624 bytes) --- */
>     struct cpu_stop_work       active_balance_work;  /* 0xa40  0x30 */
>     int                        cpu;                  /* 0xa70   0x4 */
> 
> $ pahole --hex cpu_stop_work
> struct cpu_stop_work {
>     struct list_head           list;                 /*     0  0x10 */
>     cpu_stop_fn_t              fn;                   /*  0x10   0x8 */
>     long unsigned int          caller;               /*  0x18   0x8 */
>     void *                     arg;                  /*  0x20   0x8 */
>     struct cpu_stop_done *     done;                 /*  0x28   0x8 */
> 
>     /* size: 48, cachelines: 1, members: 5 */
>     /* last cacheline: 48 bytes */
> };
> 
> 
> >
> > The paragraph then reads:
> >
> > ----
> > The first instruction set the address of the per-cpu variable (here, it
> > is 'runqueues' of type 'struct rq').  The second instruction seems like
> > a cpu number of the per-cpu base.  The third instruction get the base
> > offset of per-cpu area for that cpu.  The last instruction compares the
> > value of the per-cpu variable at the offset of 0xa60.
> > ----
> >
> > Ok?
> 
> Yep, looks good.
> 
> Thanks,
> Namhyung

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2024-05-04 18:27 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-02  6:00 [PATCHSET 0/6] perf annotate-data: Small updates in the data type profiling (v1) Namhyung Kim
2024-05-02  6:00 ` [PATCH 1/6] perf dwarf-aux: Add die_collect_global_vars() Namhyung Kim
2024-05-02  6:00 ` [PATCH 2/6] perf annotate-data: Collect global variables in advance Namhyung Kim
2024-05-02 13:50   ` Arnaldo Carvalho de Melo
2024-05-02 18:23     ` Namhyung Kim
2024-05-02 23:28       ` Namhyung Kim
2024-05-02  6:00 ` [PATCH 3/6] perf annotate-data: Handle direct global variable access Namhyung Kim
2024-05-02  6:00 ` [PATCH 4/6] perf annotate-data: Check memory access with two registers Namhyung Kim
2024-05-02 14:05   ` Arnaldo Carvalho de Melo
2024-05-02 18:14     ` Namhyung Kim
2024-05-04 18:26       ` Arnaldo Carvalho de Melo
2024-05-02  6:00 ` [PATCH 5/6] perf annotate-data: Handle multi regs in find_data_type_block() Namhyung Kim
2024-05-02  6:00 ` [PATCH 6/6] perf annotate-data: Check kind of stack variables Namhyung Kim
2024-05-02 14:25 ` [PATCHSET 0/6] perf annotate-data: Small updates in the data type profiling (v1) Arnaldo Carvalho de Melo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.