Staging
v0.8.1
https://github.com/torvalds/linux
Raw File
Tip revision: c8d2bc9bc39ebea8437fd974fdbc21847bb897a3 authored by Linus Torvalds on 02 October 2016, 23:24:33 UTC
Linux 4.8
Tip revision: c8d2bc9
mpx-dig.c
/*
 * Written by Dave Hansen <dave.hansen@intel.com>
 */

#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>
#include <fcntl.h>
#include "mpx-debug.h"
#include "mpx-mm.h"
#include "mpx-hw.h"

unsigned long bounds_dir_global;

#define mpx_dig_abort()	__mpx_dig_abort(__FILE__, __func__, __LINE__)
static void inline __mpx_dig_abort(const char *file, const char *func, int line)
{
	fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func);
	printf("MPX dig abort @ %s::%d in %s()\n", file, line, func);
	abort();
}

/*
 * run like this (BDIR finds the probably bounds directory):
 *
 *	BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \
 *		| head -1 | awk -F- '{print $1}')";
 *	./mpx-dig $pid 0x$BDIR
 *
 * NOTE:
 *	assumes that the only 2097152-kb VMA is the bounds dir
 */

long nr_incore(void *ptr, unsigned long size_bytes)
{
	int i;
	long ret = 0;
	long vec_len = size_bytes / PAGE_SIZE;
	unsigned char *vec = malloc(vec_len);
	int incore_ret;

	if (!vec)
		mpx_dig_abort();

	incore_ret = mincore(ptr, size_bytes, vec);
	if (incore_ret) {
		printf("mincore ret: %d\n", incore_ret);
		perror("mincore");
		mpx_dig_abort();
	}
	for (i = 0; i < vec_len; i++)
		ret += vec[i];
	free(vec);
	return ret;
}

int open_proc(int pid, char *file)
{
	static char buf[100];
	int fd;

	snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file);
	fd = open(&buf[0], O_RDONLY);
	if (fd < 0)
		perror(buf);

	return fd;
}

struct vaddr_range {
	unsigned long start;
	unsigned long end;
};
struct vaddr_range *ranges;
int nr_ranges_allocated;
int nr_ranges_populated;
int last_range = -1;

int __pid_load_vaddrs(int pid)
{
	int ret = 0;
	int proc_maps_fd = open_proc(pid, "maps");
	char linebuf[10000];
	unsigned long start;
	unsigned long end;
	char rest[1000];
	FILE *f = fdopen(proc_maps_fd, "r");

	if (!f)
		mpx_dig_abort();
	nr_ranges_populated = 0;
	while (!feof(f)) {
		char *readret = fgets(linebuf, sizeof(linebuf), f);
		int parsed;

		if (readret == NULL) {
			if (feof(f))
				break;
			mpx_dig_abort();
		}

		parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest);
		if (parsed != 3)
			mpx_dig_abort();

		dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest);
		if (nr_ranges_populated >= nr_ranges_allocated) {
			ret = -E2BIG;
			break;
		}
		ranges[nr_ranges_populated].start = start;
		ranges[nr_ranges_populated].end = end;
		nr_ranges_populated++;
	}
	last_range = -1;
	fclose(f);
	close(proc_maps_fd);
	return ret;
}

int pid_load_vaddrs(int pid)
{
	int ret;

	dprintf2("%s(%d)\n", __func__, pid);
	if (!ranges) {
		nr_ranges_allocated = 4;
		ranges = malloc(nr_ranges_allocated * sizeof(ranges[0]));
		dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid,
			 nr_ranges_allocated, ranges);
		assert(ranges != NULL);
	}
	do {
		ret = __pid_load_vaddrs(pid);
		if (!ret)
			break;
		if (ret == -E2BIG) {
			dprintf2("%s(%d) need to realloc\n", __func__, pid);
			nr_ranges_allocated *= 2;
			ranges = realloc(ranges,
					nr_ranges_allocated * sizeof(ranges[0]));
			dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__,
					pid, nr_ranges_allocated, ranges);
			assert(ranges != NULL);
			dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated);
		}
	} while (1);

	dprintf2("%s(%d) done\n", __func__, pid);

	return ret;
}

static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r)
{
	if (vaddr < r->start)
		return 0;
	if (vaddr >= r->end)
		return 0;
	return 1;
}

static inline int vaddr_mapped_by_range(unsigned long vaddr)
{
	int i;

	if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range]))
		return 1;

	for (i = 0; i < nr_ranges_populated; i++) {
		struct vaddr_range *r = &ranges[i];

		if (vaddr_in_range(vaddr, r))
			continue;
		last_range = i;
		return 1;
	}
	return 0;
}

const int bt_entry_size_bytes = sizeof(unsigned long) * 4;

void *read_bounds_table_into_buf(unsigned long table_vaddr)
{
#ifdef MPX_DIG_STANDALONE
	static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES];
	off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET);
	if (seek_ret != table_vaddr)
		mpx_dig_abort();

	int read_ret = read(fd, &bt_buf, sizeof(bt_buf));
	if (read_ret != sizeof(bt_buf))
		mpx_dig_abort();
	return &bt_buf;
#else
	return (void *)table_vaddr;
#endif
}

int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr,
		unsigned long bde_vaddr)
{
	unsigned long offset_inside_bt;
	int nr_entries = 0;
	int do_abort = 0;
	char *bt_buf;

	dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n",
			__func__, base_controlled_vaddr, bde_vaddr);

	bt_buf = read_bounds_table_into_buf(table_vaddr);

	dprintf4("%s() read done\n", __func__);

	for (offset_inside_bt = 0;
	     offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES;
	     offset_inside_bt += bt_entry_size_bytes) {
		unsigned long bt_entry_index;
		unsigned long bt_entry_controls;
		unsigned long this_bt_entry_for_vaddr;
		unsigned long *bt_entry_buf;
		int i;

		dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__,
			offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES);
		bt_entry_buf = (void *)&bt_buf[offset_inside_bt];
		if (!bt_buf) {
			printf("null bt_buf\n");
			mpx_dig_abort();
		}
		if (!bt_entry_buf) {
			printf("null bt_entry_buf\n");
			mpx_dig_abort();
		}
		dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__,
				bt_entry_buf);
		if (!bt_entry_buf[0] &&
		    !bt_entry_buf[1] &&
		    !bt_entry_buf[2] &&
		    !bt_entry_buf[3])
			continue;

		nr_entries++;

		bt_entry_index = offset_inside_bt/bt_entry_size_bytes;
		bt_entry_controls = sizeof(void *);
		this_bt_entry_for_vaddr =
			base_controlled_vaddr + bt_entry_index*bt_entry_controls;
		/*
		 * We sign extend vaddr bits 48->63 which effectively
		 * creates a hole in the virtual address space.
		 * This calculation corrects for the hole.
		 */
		if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL)
			this_bt_entry_for_vaddr |= 0xffff800000000000;

		if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) {
			printf("bt_entry_buf: %p\n", bt_entry_buf);
			printf("there is a bte for %lx but no mapping\n",
					this_bt_entry_for_vaddr);
			printf("	  bde   vaddr: %016lx\n", bde_vaddr);
			printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr);
			printf("	  table_vaddr: %016lx\n", table_vaddr);
			printf("	  entry vaddr: %016lx @ offset %lx\n",
				table_vaddr + offset_inside_bt, offset_inside_bt);
			do_abort = 1;
			mpx_dig_abort();
		}
		if (DEBUG_LEVEL < 4)
			continue;

		printf("table entry[%lx]: ", offset_inside_bt);
		for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long))
			printf("0x%016lx ", bt_entry_buf[i]);
		printf("\n");
	}
	if (do_abort)
		mpx_dig_abort();
	dprintf4("%s() done\n",  __func__);
	return nr_entries;
}

int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes,
		int *nr_populated_bdes)
{
	unsigned long i;
	int total_entries = 0;

	dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf,
			len_bytes, bd_offset_bytes, buf + len_bytes);

	for (i = 0; i < len_bytes; i += sizeof(unsigned long)) {
		unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long);
		unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i];
		unsigned long bounds_dir_entry;
		unsigned long bd_for_vaddr;
		unsigned long bt_start;
		unsigned long bt_tail;
		int nr_entries;

		dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i,
				bounds_dir_entry_ptr);

		bounds_dir_entry = *bounds_dir_entry_ptr;
		if (!bounds_dir_entry) {
			dprintf4("no bounds dir at index 0x%lx / 0x%lx "
				 "start at offset:%lx %lx\n", bd_index, bd_index,
					bd_offset_bytes, i);
			continue;
		}
		dprintf3("found bounds_dir_entry: 0x%lx @ "
			 "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i,
					&buf[i]);
		/* mask off the enable bit: */
		bounds_dir_entry &= ~0x1;
		(*nr_populated_bdes)++;
		dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes);
		dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes);

		bt_start = bounds_dir_entry;
		bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1;
		if (!vaddr_mapped_by_range(bt_start)) {
			printf("bounds directory 0x%lx points to nowhere\n",
					bounds_dir_entry);
			mpx_dig_abort();
		}
		if (!vaddr_mapped_by_range(bt_tail)) {
			printf("bounds directory end 0x%lx points to nowhere\n",
					bt_tail);
			mpx_dig_abort();
		}
		/*
		 * Each bounds directory entry controls 1MB of virtual address
		 * space.  This variable is the virtual address in the process
		 * of the beginning of the area controlled by this bounds_dir.
		 */
		bd_for_vaddr = bd_index * (1UL<<20);

		nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr,
				bounds_dir_global+bd_offset_bytes+i);
		total_entries += nr_entries;
		dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries "
			 "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n",
				bd_index, buf+i,
				bounds_dir_entry, nr_entries, total_entries,
				bd_for_vaddr, bd_for_vaddr + (1UL<<20));
	}
	dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes,
			bd_offset_bytes);
	return total_entries;
}

int proc_pid_mem_fd = -1;

void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir,
			   long buffer_size_bytes, void *buffer)
{
	unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir;
	int read_ret;
	off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET);

	if (seek_ret != seekto)
		mpx_dig_abort();

	read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes);
	/* there shouldn't practically be short reads of /proc/$pid/mem */
	if (read_ret != buffer_size_bytes)
		mpx_dig_abort();

	return buffer;
}
void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir,
			   long buffer_size_bytes, void *buffer)

{
	unsigned char vec[buffer_size_bytes / PAGE_SIZE];
	char *dig_bounds_dir_ptr =
		(void *)(bounds_dir_global + byte_offset_inside_bounds_dir);
	/*
	 * use mincore() to quickly find the areas of the bounds directory
	 * that have memory and thus will be worth scanning.
	 */
	int incore_ret;

	int incore = 0;
	int i;

	dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr);

	incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]);
	if (incore_ret) {
		printf("mincore ret: %d\n", incore_ret);
		perror("mincore");
		mpx_dig_abort();
	}
	for (i = 0; i < sizeof(vec); i++)
		incore += vec[i];
	dprintf4("%s() total incore: %d\n", __func__, incore);
	if (!incore)
		return NULL;
	dprintf3("%s() total incore: %d\n", __func__, incore);
	return dig_bounds_dir_ptr;
}

int inspect_pid(int pid)
{
	static int dig_nr;
	long offset_inside_bounds_dir;
	char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)];
	char *dig_bounds_dir_ptr;
	int total_entries = 0;
	int nr_populated_bdes = 0;
	int inspect_self;

	if (getpid() == pid) {
		dprintf4("inspecting self\n");
		inspect_self = 1;
	} else {
		dprintf4("inspecting pid %d\n", pid);
		mpx_dig_abort();
	}

	for (offset_inside_bounds_dir = 0;
	     offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES;
	     offset_inside_bounds_dir += sizeof(bounds_dir_buf)) {
		static int bufs_skipped;
		int this_entries;

		if (inspect_self) {
			dig_bounds_dir_ptr =
				fill_bounds_dir_buf_self(offset_inside_bounds_dir,
							 sizeof(bounds_dir_buf),
							 &bounds_dir_buf[0]);
		} else {
			dig_bounds_dir_ptr =
				fill_bounds_dir_buf_other(offset_inside_bounds_dir,
							  sizeof(bounds_dir_buf),
							  &bounds_dir_buf[0]);
		}
		if (!dig_bounds_dir_ptr) {
			bufs_skipped++;
			continue;
		}
		this_entries = search_bd_buf(dig_bounds_dir_ptr,
					sizeof(bounds_dir_buf),
					offset_inside_bounds_dir,
					&nr_populated_bdes);
		total_entries += this_entries;
	}
	printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr,
			total_entries, nr_populated_bdes);
	return total_entries + nr_populated_bdes;
}

#ifdef MPX_DIG_REMOTE
int main(int argc, char **argv)
{
	int err;
	char *c;
	unsigned long bounds_dir_entry;
	int pid;

	printf("mpx-dig starting...\n");
	err = sscanf(argv[1], "%d", &pid);
	printf("parsing: '%s', err: %d\n", argv[1], err);
	if (err != 1)
		mpx_dig_abort();

	err = sscanf(argv[2], "%lx", &bounds_dir_global);
	printf("parsing: '%s': %d\n", argv[2], err);
	if (err != 1)
		mpx_dig_abort();

	proc_pid_mem_fd = open_proc(pid, "mem");
	if (proc_pid_mem_fd < 0)
		mpx_dig_abort();

	inspect_pid(pid);
	return 0;
}
#endif

long inspect_me(struct mpx_bounds_dir *bounds_dir)
{
	int pid = getpid();

	pid_load_vaddrs(pid);
	bounds_dir_global = (unsigned long)bounds_dir;
	dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir);
	return inspect_pid(pid);
}
back to top