9,0,1,7,4,3,1,3,1,9, where all keys are known to be from 0..9
with the keyIndex sorting implementation below.
How does that compare to sorting the following keys with the same approach?
9,0,1,7,4,3,1,3,1,999999, where all keys are known to be from 0..999999
//Sort array of size n with keys between 0 and m-1 void keyIndexed(int items[], int n, int m) { int i, index; int *count = malloc((m+1)* sizeof(int)); int *tmp = malloc((n) *sizeof(int)); //initialise array for (i = 0; i <= m; i++){ count[i] = 0; } //Count how many of each key for (i = 0; i < n; i++){ count[items[i] + 1]++; } //cumulative total for (i = 1; i <= m; i++){ count[i] += count[i - 1]; } //use cumulative totals as indexes for (i = 0; i < n; i++){ index = count[items[i]]++; tmp[index] = items[i]; } //copy back sorted data for (i = 0; i < n; i++){ items[i] = tmp[i]; } free(tmp); free(count); }Key indexed-counting sort has a time comlexity of O(N+M) where N is the size of the data set we are sorting and M is the size of the range of possible keys. In both data sets we are sorting in this question, N is 10. In the first data set M is also only 10 but in the second M is 10000000. This means we would have to use additional space to store the counts for the 10000000 possibilities and then traverse through these arrays, making the second data set much more inefficient to sort with this algorithm.
Use radix sort LSD to sort the strings.
art rub rat mat are bet fab wet tubRepeat using an MSD radix sort, where MSD is used for the initial pass and then LSD radix sort is used to sort the subfiles. Do a stable sort on the right most character, then the middle character, then the first character.
rub fab tub are art rat mat bet wet fab rat mat bet wet are art rub tub are art bet fab mat rat rub tub wetRepeat using an MSD radix sort, where MSD is used for the initial pass and then LSD radix sort is used to sort the subfiles.
Sort on the first letter.
art
are
bet
fab
mat
rub
rat
tub
wet
Then sort each subfile. In this case only the words starting with a and r need to be sorted. We start with the LSD so the right-most character.
are
art
bet
fab
mat
rub
rat
tub
wet
Now the middle character.
are
art
bet
fab
mat
rat
rub
tub
wet
In what order would you visit the nodes if you started at vertex 0
and used the cheapest least visited approach from assn2 and you had
stamina of 100000? (Show the first 10 vertices you would visit in the
order you would travel to them).
0 2 0 7 1 7 6 4 3 5 3
What about if you had a stamina level of 50?
0 2 2 0 0 7 7 1 7 7 6
Graph Implementations
In lectures we have looked at two different implemtations of graphs. Adjacency matrices and adjacency lists. See representations below:
//Graph.h definitions // vertices denoted by integers 0..N-1 typedef struct graphRep * Graph; typedef int Vertex; typedef struct edge Edge; // edges are pairs of vertices (end-points) struct edge { Vertex v; Vertex w; } ;
//Adjacency matrix unweighted graph representation struct graphRep { int nV; // #vertices int nE; // #edges int **adj; // matrix of booleans (0 or 1) };
//Adjacency list graph representation typedef struct vNode *VList; struct vNode { Vertex v; VList next; }; struct graphRep { int nV; // #vertices int nE; // #edges VList VList *adj; // array of lists };
int isEdgeInGraph (Graph G, Edge e);that tests whether a given graph edge is present in the graph. The function should return 1 if the edge exists in the graph and 0 otherwise.
Implement the function for both the adjacency-matrix and the adjacency-list representations.
//adjacency matrix version int GRAPHedge(Graph g, Edge e){ assert(g != NULL); return g->adj[e.v][e.w]; } //adjacency list version int GRAPHedge(Graph g, Edge e){ assert(g != NULL); int isEdge = 0; VList tmp; for(tmp = g->adj[e.v]; tmp != NULL && !isEdge;tmp = tmp->next){ if(tmp->v == e.w) { isEdge = 1; } } return isEdge; }
How could you change the implementations to represent a weighted graph with weights of type int?
Would your implementations of isEdgeInGraph need to change?
//Adjacency Matrix
#define NO_EDGE INT_MAX
struct graphRep {
int nV; // #vertices
int nE; // #edges
int **adj; //No need to change this - it will be filled with
//int values representing the weights and INT_MAX if there is no edge.
}
//For adjacency list
//We just need to change the node struct
struct vNode{
Vertex v;
VList next;
int weight;
};
Would your implementations of isEdgeInGraph need to change?
The adjacency list version would still work. The adjacency matrix
version would need to change slightly.
//adjacency matrix version
int GRAPHedge(Graph g, Edge e){
assert(g != NULL);
return (g->adj[e.v][e.w] != NO_EDGE);
}
int GRAPHdegree(Graph g, Vertex v);
int GRAPHdegree(Graph g, Vertex v){ assert(g != NULL); assert(v >= 0 &&v < g->V); int degree = 0; vlink curr = g->adj[v]; while(curr != NULL){ degree++; curr = curr->next; } return degree; }
How could you store a label for each vertex in the graph, that you could
access easily via its vertex id?
//matrix version
struct graphRep {
int nV; // #vertices
int nE; // #edges
int **adj; // matrix of booleans (0 or 1)
char ** labels; //array of strings
};
//adjacency list version
struct graphRep {
int nV; // #vertices
int nE; // #edges VList
VList *adj; // array of lists
char ** labels; //array of strings
};
Edge *edges(Graph g, int *nE) { ... }
// which would be used as ...
Edge *es; int n; es = edges(g, &n);
Implement this edges() function for each of the two Graph representations. It should return the edges in normalised/canonical form (e.v < e.w), so that each edge appears exactly once in the result array.
// for adjacency matrix representation Edge *edges(Graph g, int *nE) { Edge *new = malloc(g->nE*sizeof(Edge)); *nE = g->nE; int v, w; for (v = 0; v < g->nV; v++) { for (w = v+1; w < g->nV; w++) { if (!g->adj[v][w]) continue; new[i++] = mkEdge(v,w); } } } // for adjacency list representation Edge *edges(Graph g, int *nE) { Edge *new = malloc(g->nE*sizeof(Edge)); *nE = g->nE; int n = 0, v; for (v = 0; v < g->nV; v++) { VList c; for (c = g->adj[v]; c != NULL; c = c->next) { w = c->v; if (w < v) continue; new[n++] = mkEdge(v,w); } } return new; }
The standard adjacency matrix representation for a graph uses a full V×V matrix and stores each edge twice (at [v,w] and [w,v]). This consumes a lot of space, and wastes a lot of space when the graph is sparse. One simple way to improve the space usage would be to define the matrix elements as bool rather than int, e.g.
int **edges; // matrix of booleans (0 or 1) ... becomes ... bool **edges; // matrix of booleans (0 or 1) ... where bool is defined as ... typedef unsigned char bool;
We could use even less space by storing just the upper (or lower) triangular part of the matrix, as shown in the diagram below:
The V×V matrix has been replaced by a single array containing just the "useful" parts of the matrix. This gives a new Graph representation:
// Upper-triangluar adjacency matrix graph representation struct graphRep { int nV; // #vertices int nE; // #edges bool *edges; // array of booleans (0 or 1) };
Accessing the elements is no longer as simple as edges[v][w]. Write a function that takes two vertices from an edge and determines the corresponding index in the edges array which holds the boolean value for this edge. Use the following function template:
int indexOf(Vertex v, Vertex w)
{
assert(v != w); // no self-edges
if (v > w) { Vertex tmp = v; v = w; v = tmp; }
...
}
int indexOf(Vertex v, Vertex w) { assert(v != w); // no self-edges if (v > w) { Vertex tmp = v; v = w; v = tmp; } assert(w > v); // redundant int i; // counts the number of iterations int j; // gives the increment for eah iteration int k; // accumulates the index value for (i = 0, j = nV-1, k = 0; i < v; i++, j--) { k += j; } k += w-1; return k; }
Maybe the use of comma in for loops hasn't been introduced. Re-write the function without using it if the above is not clear:
int indexOf(Vertex v, Vertex w) { assert(v != w); // no self-edges if (v > w) { Vertex tmp = v; v = w; v = tmp; } assert(w > v); // redundant int i; // counts the number of iterations int j = nV-1; // gives the increment for eah iteration int k = 0; // accumulates the index value for (i = 0; i < v; i++) { k += j; j--; } k += w-1; return k; }
The standard implementation of the adjacency list representation for graphs stores each edge twice. The edge (v,w) appears as a w stored in the adjacency list for v and as a v stored in the adjacency list for w. A more storage efficient representation (analgous to storing just the top-right half of the adjacency matrix), would be store information for each edge just once.
Re-implement the following functions from lectures to use this each-edge-stored-once variation of adjacency lists:
void insertE(Graph g, Edge e); void removeE(Graph g, Edge e); int neighbours(Graph g, Vertex x, Vertex y);
You should not assume that supplied Edge values will necessarily satisfy (e.v < e.w).
The following functions support the each-edge-stored-once variation of adjacency lists:
Edge normalise(Edge e) { if (e.v > e.w) { Vertex tmp = e.v; e.v = e.w; e.w = tmp; } return e; } void insertE(Graph g, Edge e) { assert(g != NULL); assert(validV(g,e.v) && validV(g,e.w)); e = normalise(e); int orig = length(g->edges[e.v]); g->edges[e.v] = insertVList(g->edges[e.v], e.w); if (length(g->edges[e.v]) > orig) g->nE++; } void removeE(Graph g, Edge e) { assert(g != NULL); assert(validV(g,e.v) && validV(g,e.w)); e = normalise(e); int orig = length(g->edges[e.v]); g->edges[e.v] = deleteVList(g->edges[e.v], e.w); if (length(g->edges[e.v]) < orig) g->nE--; } int neighbours(Graph e, Vertex v, Vertex w) { if (v > w) { Vertex t = v; v = w; w = t; } return searchVList(g->edges[v], w); }