--- include/linux/topology.h	Mon Apr  9 12:43:37 2001
+++ include/linux/topology.h	Mon Apr  9 11:30:39 2001
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2001 Silicon Graphics, Inc.
+ * Copyright (C) 2001 Kanoj Sarcar (kanoj@sgi.com)
+ *
+ * This file deals with exposing as much of the machine structure
+ * as sophisticated users might need to optimize application performance.
+ */
+
+#ifndef _LINUX_TOPOLOGY_H
+#define _LINUX_TOPOLOGY_H
+
+#define MACHSTRING	"machine"
+#define VERSTRING	"version"
+#define NODESTRING	"node"
+#define CPUSTRING	"cpu"
+#define MEMSTRING	"memory"
+#define MSIZESTRING	"memsize"
+#define SHCACHSTRING	"shcach"
+#define DISTSTRING	"distances"
+
+#define MAXSTRINGLEN	20
+#define MAXNUMBERLEN	5
+#define MAXNUMBER	9999
+
+/*
+ * Consider this the user level manpage for the topology graph:
+ *
+ * The graph _must_ always be searched recursively. Additional links
+ * (whether generic, or platform specific) might be added or deleted 
+ * without notice. Portable user programs that look at this graph must 
+ * look for certain known names in heirarchical order.
+ *
+ * The topology graph is rooted at /proc/machine. Platforms can link
+ * this point into their own topology graph implementations, if they
+ * follow the rules below. Else, they risk being incompatible with
+ * all other Linux platforms.
+ *
+ * Programs should read the "version" vertex under the root to 
+ * determine what version of the graph they are looking at. The
+ * following describes version 0 of the graph. Future platforms
+ * might lead to changes in the graph description, and those will
+ * be marked by newer versions of the graph.
+ *
+ * Some levels under the root, there will be, possibly multiple,
+ * "nodeABCD" vertices. ABCD is the logical node number. This means 
+ * the machine is composed of these nodes.
+ *
+ * Some levels under "nodeABCD" vertex, there may be, possibly multiple,
+ * "cpuPQRS" vertices. PQRS is the logical cpu number. The node has 
+ * all these cpus.
+ *
+ * Some levels under "nodeABCD" vertex, there may be a "memoryEFGH"
+ * vertex. If there is, some levels under "memoryEFGH" vertex, there 
+ * will be a "memsize" vertex. The number obtained by cat'ing this 
+ * vertex represents the total memory on the node.
+ *
+ * Some levels under "nodeABCD" vertex, there will be a vertex
+ * named "distances". Cat'ing this file will give distances from
+ * this node to all others. These distances loosely indicate how far 
+ * from each other the nodes are, and how fat the links between the
+ * nodes are.
+ *
+ * Some levels under "cpuPQRS" and "cpuWXYZ" vertices, possible
+ * cache sharing is indicated by the presence of the same vertex
+ * name "shcachKLMN".
+ *
+ * At any point in the graph, if there is a vertex with a name that is
+ * not mentioned above, it is a platform specific component. Platform
+ * vendors define these names and the tools/drivers to utilize them.
+ *
+ * Whenever a vertex needs to be cat'ed to obtain information, the
+ * kernel sends back information via streams of ascii characters. This
+ * stream should be parsed with the knowledge that string seperators
+ * (like \n, \t etc) can be replaced with others for better 
+ * presentability.
+ *
+ * No other rules should be assumed by users looking at the graph.
+ * If any other rules are assumed, program portability across platforms
+ * and compatibility across releases is not guaranteed.
+ *
+ * Example: find /proc/machine -name "*" -print may yield the line
+ * /proc/machine/......./node0003/......./cpu0007. This indicates
+ * node 3 contains cpu 7. If it also yields the output
+ * /proc/machine/..../node0002/.../memory0002/memsize, it indicates
+ * node 2 has some memory, whose size can be obtained by cat'ing the
+ * "memsize" vertex. If it also yields the line
+ * /proc/machine/.../node0001/.../platreg, then "platreg" represents
+ * a platform specific component on node 1 that the platform wants to
+ * expose to users for some reason.
+ */
+
+/*
+ * Kernel interfaces for arch/platform code. On success, the interfaces 
+ * return 0, on failure, they return 1.
+ */
+
+typedef struct proc_dir_entry * topo_vertex_t;
+
+/*
+ * Returns root of the topology graph. Input determines which version
+ * of the graph the platform wants to use.
+ */
+extern topo_vertex_t topo_init(int);
+
+/*
+ * Add the (first parameter) node to the topology graph at the (second
+ * parameter) add point. If the addpoint is 0, the node is added 
+ * directly under the topology graph root.
+ */
+extern int topo_node_add(node_data_t *, topo_vertex_t);
+
+/*
+ * Add the (second parameter) cpunumber to the topology graph at the
+ * (third parameter) add point. If the addpoint is 0, the cpu is added
+ * directly under the topology vertex for the (first parameter) node.
+ */
+extern int topo_cpu_add(node_data_t *, unsigned int, topo_vertex_t);
+
+/*
+ * Add the memory vertices for the (first parameter) node to the
+ * (seconed parameter) add point. If the addpoint is 0, the memory
+ * heirarchy is added directly under the topology vertex for the node.
+ */
+extern int topo_mem_add(node_data_t *, topo_vertex_t);
+
+/*
+ * Indicates that the first and second components share a resource,
+ * which is currently only a level of cache.
+ */
+extern int topo_shared_cache_add(topo_vertex_t, topo_vertex_t);
+
+#endif /* _LINUX_TOPOLOGY_H */
--- kernel/topology.c	Mon Apr  9 12:43:37 2001
+++ kernel/topology.c	Mon Apr  9 12:01:04 2001
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2001 Silicon Graphics, Inc.
+ * Copyright (C) 2001 Kanoj Sarcar (kanoj@sgi.com)
+ *
+ * This file deals with exposing as much of the machine structure
+ * as sophisticated users might need to optimize application performance.
+ *
+ * Implementation currently uses procfs, but could be converted to 
+ * use devfs, or some other simple fs. Proc fs suffers from not having
+ * file links, which are needed for shared resources (caches, routers etc).
+ */
+
+#include <linux/string.h>
+#include <linux/proc_fs.h>
+#include <linux/mmzone.h>
+#include <linux/topology.h>
+#include <linux/stat.h>
+
+extern void free_proc_entry(struct proc_dir_entry *);
+
+static topo_vertex_t mach_root;
+
+static void num_to_string(unsigned int number, char *str)
+{
+	int index = MAXNUMBERLEN - 2;
+
+	if (number > MAXNUMBER)
+		printk("WARNING: node/cpu number exceeded ... %d\n", number);
+	str[index + 1] = 0;
+	while (number) {
+		str[index] = '0' + (number % 10);
+		number /= 10;
+		index--;
+	}
+	while (index >= 0)
+		str[index--] = '0';
+}
+
+static void fillstring(char *target, char *string, int number)
+{
+	char numstr[MAXNUMBERLEN];
+
+	strcpy(target, string);
+	num_to_string(number, numstr);
+	strcat(target, numstr);
+}
+
+/*
+ * This must be called before any other functions in this file 
+ * are invoked.
+ */
+topo_vertex_t topo_init(int version)
+{
+	char versname[MAXSTRINGLEN];
+
+	mach_root = proc_mkdir(MACHSTRING, 0);
+	if (mach_root) {
+		fillstring(versname, VERSTRING, version);
+		proc_mkdir(versname, mach_root);
+	}
+	return(mach_root);
+}
+
+static int read_distances(char *page, char **start, off_t off, int count,
+						int *eof, void *data)
+{
+	int i, len = 0;
+	node_data_t *ndat = (node_data_t *)data;
+
+	for (i = 0; i < numnodes; i++, page += 13)
+		len += sprintf(page, "Node %3d: %2d\n", i, ndat->distances[i]);
+	return len;
+}
+
+int topo_node_add(node_data_t *ndat, topo_vertex_t addpoint)
+{
+	char nodename[MAXSTRINGLEN];
+	struct proc_dir_entry *entry;
+
+	fillstring(nodename, NODESTRING, ndat->node_id);
+	if (addpoint == (topo_vertex_t)0)
+		addpoint = mach_root;
+	if ((ndat->nodeinfo = (void *)proc_mkdir(nodename, mach_root))) {
+		entry = create_proc_entry(DISTSTRING, S_IRUGO, ndat->nodeinfo);
+		if (!entry) {
+			free_proc_entry((struct proc_dir_entry *)(ndat->nodeinfo));
+			ndat->nodeinfo = 0;
+			return 1;
+		}
+		entry->nlink = 1;
+		entry->data = (void *)ndat;
+		entry->read_proc = read_distances;
+		return 0;
+	}
+	return 1;
+}
+
+/*
+ * This module does not manage cpu numbers.
+ */
+int topo_cpu_add(node_data_t *ndat, unsigned int cpunum, topo_vertex_t addpoint)
+{
+	int i;
+	char cpuname[MAXSTRINGLEN];
+	struct proc_dir_entry *entry;
+
+	fillstring(cpuname, CPUSTRING, cpunum);
+	if (addpoint == (topo_vertex_t)0)
+		addpoint = (topo_vertex_t)(ndat->nodeinfo);
+	entry = proc_mkdir(cpuname, addpoint);
+	if (!entry)
+		return 1;
+	for (i = 0; i < MAX_CPUS_PER_NODE; i++)
+		if (ndat->pinfo[i] == (void *)0) {
+			ndat->pinfo[i] = (void *)entry;
+			return 0;
+		}
+	return 1;
+}
+
+/*
+ * When we support multiple memory extants in a node, this will return
+ * the sum across all extants. Alternatively, we could expose per
+ * extant information too. Fix: node_size does not represent the
+ * true size for platforms that handle their own holes.
+ */
+static int read_memsize(char *page, char **start, off_t off, int count,
+						int *eof, void *data)
+{
+	node_data_t *ndat = (node_data_t *)data;
+
+	if (count < 8)
+		return -EINVAL;
+	return sprintf(page, "%8d kB\n", (ndat->node_pgdat->node_size << 
+						(PAGE_SHIFT - 10)));
+}
+
+/*
+ * This module hands out memory numbers, at least for now (note that
+ * memory numbers and node numbers might be different, if there are
+ * nodes with no memory). When we support multiple memory extants in 
+ * a node, this will take an input to describe the extant.
+ */
+int topo_mem_add(node_data_t *ndat, topo_vertex_t addpoint)
+{
+	static int mem_id = 0;
+	char memname[MAXSTRINGLEN];
+	struct proc_dir_entry *entry;
+
+	fillstring(memname, MEMSTRING, mem_id++);
+	if (addpoint == (topo_vertex_t)0)
+		addpoint = (topo_vertex_t)(ndat->nodeinfo);
+	if ((ndat->minfo = proc_mkdir(memname, addpoint))) {
+		entry = create_proc_entry(MSIZESTRING, S_IRUGO, ndat->minfo);
+		if (!entry) {
+			free_proc_entry((struct proc_dir_entry *)(ndat->minfo));
+			ndat->minfo = 0;
+			return 1;
+		}
+		entry->nlink = 1;
+		entry->data = (void *)ndat;
+		entry->read_proc = read_memsize;
+		return 0;
+	}
+	return 1;
+}
+
+int topo_shared_cache_add(topo_vertex_t first, topo_vertex_t second)
+{
+	static int cache_id = 0;
+	char cachename[MAXSTRINGLEN];
+	struct proc_dir_entry *entry1, *entry2;
+
+	fillstring(cachename, SHCACHSTRING, cache_id++);
+	entry1 = proc_mkdir(cachename, first);
+	if (entry1 == 0)
+		return 1;
+	entry2 = proc_mkdir(cachename, second);
+	if (entry2 == 0) {
+		free_proc_entry(entry1);
+		return 1;
+	}
+	return 0;
+}
+
--- kernel/Makefile	Wed Jan 10 09:18:09 2001
+++ kernel/Makefile	Wed Apr  4 21:31:10 2001
@@ -14,7 +14,7 @@
 obj-y     = sched.o dma.o fork.o exec_domain.o panic.o printk.o \
 	    module.o exit.o itimer.o info.o time.o softirq.o resource.o \
 	    sysctl.o acct.o capability.o ptrace.o timer.o user.o \
-	    signal.o sys.o kmod.o context.o
+	    signal.o sys.o kmod.o context.o topology.o
 
 obj-$(CONFIG_UID16) += uid16.o
 obj-$(CONFIG_MODULES) += ksyms.o
--- include/linux/mmzone.h	Wed Nov 22 18:00:56 2000
+++ include/linux/mmzone.h	Mon Apr  9 12:38:37 2001
@@ -7,6 +7,7 @@
 #include <linux/config.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
+#include <linux/threads.h>
 
 /*
  * Free memory management - zoned buddy allocator.
@@ -76,6 +77,7 @@
 #define NR_GFPINDEX		0x100
 
 struct bootmem_data;
+struct node_data;
 typedef struct pglist_data {
 	zone_t node_zones[MAX_NR_ZONES];
 	zonelist_t node_zonelists[NR_GFPINDEX];
@@ -87,6 +89,7 @@
 	unsigned long node_size;
 	int node_id;
 	struct pglist_data *node_next;
+	struct node_data *node_info;
 } pg_data_t;
 
 extern int numnodes;
@@ -108,6 +111,14 @@
 
 extern pg_data_t contig_page_data;
 
+#ifndef CONFIG_NUMA
+
+#define MAX_CPUS_PER_NODE	NR_CPUS
+#define numa_node_id()		0
+#define MAXNODES		1
+
+#endif /* !CONFIG_NUMA */
+
 #ifndef CONFIG_DISCONTIGMEM
 
 #define NODE_DATA(nid)		(&contig_page_data)
@@ -121,6 +132,21 @@
 
 #define MAP_ALIGN(x)	((((x) % sizeof(mem_map_t)) == 0) ? (x) : ((x) + \
 		sizeof(mem_map_t) - ((x) % sizeof(mem_map_t))))
+
+/*
+ * This structure maintains information about a "node", which has
+ * some combination of cpus, memory and devices. A "node" is defined
+ * as the biggest collection of components that have uniform access 
+ * to all components on other "node"s.
+ */
+typedef struct node_data {
+	int node_id;
+	pg_data_t *node_pgdat;
+	void *nodeinfo;
+	void *pinfo[MAX_CPUS_PER_NODE];
+	void *minfo;
+	int distances[MAXNODES];
+} node_data_t;
 
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
