Templateclass k class E Class dictionary Public virtual

Template<class k, class E> Class dictionary() { Public: virtual ~dictionary() {} virtual bool empty() const =0; virtual int size() const = 0; virtual pair<const K, E>*find(const K&) const = 0; virtual void erase(const k&) = 0; virtual void insert(const pair<const K, E>&) = 0; } 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 6

类 Sorted. Chain first … e 1 … ei-1 e 2 ei ei+1 0 en template <class K, class E> struct pair. Node { typedef pair<const K, E> pair. Type; pair. Type element; pair. Node<K, E> *next; pair. Node(const pair. Type& the. Pair): element(the. Pair){} pair. Node(const pair. Type& the. Pair, pair. Node<K, E>* the. Next) : element(the. Pair){next = the. Next; } }; 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 9

Class Sorted. Chain template<class K, class E> class sorted. Chain : public dictionary<K, E> { public: sorted. Chain() {first. Node = NULL; d. Size = 0; } ~sorted. Chain(); bool empty() const {return d. Size == 0; } int size() const {return d. Size; } pair<const K, E>* find(const K&) const; void erase(const K&); void insert(const pair<const K, E>&); void output(ostream& out) const; protected: pair. Node<K, E>* first. Node; // pointer to first node in chain int d. Size; // number of elements in dictionary }; 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 10

操作 ‘find’ template<class E, class K> pair<const K, E>* sorted. Chain<E, K>: : find(const K& the. Key) const {// 搜索与k匹配的元素， //如果没有匹配的元素，则返回NULL pair. Node<K, E>*current. Node = first. Node; while (current. Node !=NULL && current. Node->element. first < the. Key) current. Node = current. Node->next; if (current. Node != NULL && current->element. first == the. Key) return &current->element; // 与the. Key相匹配 return NULL; // 不存在相匹配的元素 } 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 11

template<class E, class K> void sorted. Chain<E, K>: : insert(const pair<K, E>& the. Pair) {// 如果表中不存在关键值与e相同的元素，则插入e, 否� 替� pair. Node<E, K> *p = first. Node, *tp = NULL; // p指向节点的前驱 while (p!=NULL && p->element. first < the. Pair. first) { tp = p, p = p->next; } if (p != NULL && p->element. first = = the. Pair) {p->element. second = the. Pair. second; // 替换旧值 return； } // 若没有出现重复关键值, 则产生一个关键值为e的新节点 pair. Node<E, K> *new. Node = new pair. Node<E, K>(the. Pair, p); if (tp ==NULL) first. Node = new. Node; // 新节点插入表头 else tp->next = new. Node; d. Size ++; return; } 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 13

操作 ‘erase first tp-> link=p->link … 0 … e 1 e 2 ei-1 ei tp p en ei+1 first=p ->link first … e 1 … ei-1 e 2 ei ei+1 0 en p tp=0 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 14

template<class E, class K> void sorted. Chain<E, K>: : erase(const K& the. Key) {// 删除与the. Key相匹配的元素， pair. Node<K, E> *p = first. Node, tp = NULL； //p 指向匹配的节点，tp 指向p 前面的节点。 while (p != NULL && p->element. first < the. Key) { tp = p, p = p->next; }// 搜索与k相匹配的元素 // 验证是否与k匹配 if (p !=NULL) && p->element. first = = the. Key) {// 找到一个相匹配的元素 if（tp ==NULL）first. Node = p->next; else tp->next = p->next; 是链首节点 delete p; d. Size--; } 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 15

7. 3. 1 ideal case 20 24 30 40 60 75 80 Search : 最坏情况下的比较次数 f(n)= n 20 24 30 40 60 Search : f(n)= n/2+1 if we keep a pointer in the middle. 2 1 0 20 24 山东大学计算机科学与技术学院 30 数据结构 40 第 7章 60 跳表和散列 75 80 18

7. 3. 3 level assignment n Then the probability that the next random number is Cut. Off=p*RAND_MAX is p。 0 cutoff RAND_MAX = p * RAND_MAX next random number is Cutoff, then the new element should be in level 1, and check the next random number l. Generally, the final level assigned to the new element is int lev = 0 while (rand() <= Cut. Off) lev++; l. If 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 26

7. 3. 4 类Skip. Node 20 24 30 40 60 75 80 template<class E, class K> struct Skip. Node { { typedef pair<const K, E> pair. Type; pair. Type element; skip. Node<K, E>**next; // 一维指针数组 skip. Node(const pair. Type& the. Pair, int size) : element(the. Pair){next = new skip. Node<K, E>*[size]; } }; next 0 1 山东大学计算机科学与技术学院数据结构第 7章 2 ………… 跳表和散列 size-1 29

#ifndef skip. List_ #define skip. List_ #include <iostream> #include <math. h> #include <sstream> #include <string> #include "dictionary. h" #include "skip. Node. h" #include "my. Exceptions. h" using namespace std; template<class K, class E> class skip. List : public dictionary<K, E> { public: skip. List(K, int max. Pairs = 10000, float prob = 0. 5); ~skip. List(); 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 30

bool empty() const {return d. Size == 0; } int size() const {return d. Size; } pair<const K, E>* find(const K&) const; void erase(const K&); void insert(const pair<const K, E>&); void output(ostream& out) const; }; protected: float cut. Off; // used to decide level number int level() const; // generate a random level number int levels; // max current nonempty chain int d. Size; // number of pairs in dictionary int max. Level; // max permissible chain level K tail. Key; // a large key skip. Node<K, E>* search(const K&) const; // search saving last nodes seen skip. Node<K, E>* header. Node; // header node pointer skip. Node<K, E>* tail. Node; // tail node pointer skip. Node<K, E>** last; // last[i] = last node seen on level i 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 31

template<class K, class E> skip. List<K, E>: : skip. List(K large. Key, int max. Pairs, float prob) {// Constructor for skip lists with keys smaller than large. Key and // size at most max. Pairs. 0 < prob < 1. cut. Off = prob * RAND_MAX; max. Level = (int) ceil(logf((float) max. Pairs) / logf(1/prob)) - 1; levels = 0; // initial number of levels d. Size = 0; tail. Key = large. Key; // create header & tail nodes and last array pair<K, E> tail. Pair; //申请变量 tail. Pair. first = tail. Key; //赋值 header. Node = new skip. Node<K, E> (tail. Pair, max. Level + 1); tail. Node = new skip. Node<K, E> (tail. Pair, 0); //建立尾结点 last = new skip. Node<K, E> *[max. Level+1]; //用于记录指针的数组 } // header points to tail at all levels as lists are empty for (int i = 0; i <= max. Level; i++) header. Node->next[i] = tail. Node; //建立首尾两个节点的链接山东大学计算机科学与技术学院数据结构第 7章跳表和散列 32

template<class K, class E> skip. List<K, E>: : ~skip. List() {// Delete all nodes and array last. skip. Node<K, E> *next. Node; // delete all nodes by following level 0 chain while (header. Node != tail. Node) { next. Node = header. Node->next[0]; //沿着0层删除 delete header. Node; header. Node = next. Node; } delete tail. Node; } delete [] last; 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 33

template<class K, class E> pair<const K, E>* skip. List<K, E>: : find(const K& the. Key) const {// fa // Return NULL if no matching pair. if (the. Key >= tail. Key) return NULL; // 超出合理关键词的取值范围了 // position before. Node just before possible node with the. Key skip. Node<K, E>* before. Node = header. Node; //要找的节点前驱节点 for (int i = levels; i >= 0; i--) // 从上到下逐层寻找 // follow level i pointers while (before. Node->next[i]->element. first < the. Key) before. Node = before. Node->next[i]; // check if next node has the. Key if (before. Node->next[0]->element. first == the. Key) return &before. Node->next[0]->element; } return NULL; // no matching pair 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 34

template<class K, class E> int skip. List<K, E>: : level() const {// Return a random level number <= max. Level. int lev = 0; while (rand() <= cut. Off) lev++; return (lev <= max. Level) ? lev : max. Level; } 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 35

template<class K, class E> skip. Node<K, E>* skip. List<K, E>: : search(const K& the. Key) const {// Search 作为一个辅助函数，找到含有the. Key的节点，并且 // 为记录level 的数组last赋值 skip. Node<K, E>* before. Node = header. Node; for (int i = levels; i >= 0; i--) { while (before. Node->next[i]->element. first < the. Key) before. Node = before. Node->next[i]; last[i] = before. Node; // last 记录了该插入或删除节点每层的前驱节点 } return before. Node->next[0]; //返回指向要找节点的指针 } 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 36

template<class K, class E> void skip. List<K, E>: : insert(const pair<const K, E>& the. Pair) {// 基于search的查找，插入新节点，或修改已有节点 if (the. Pair. first >= tail. Key) // key too large {ostringstream s; s << "Key = " << the. Pair. first << " Must be < " << tail. Key; throw illegal. Parameter. Value(s. str()); } // see if pair with the. Key already present skip. Node<K, E>* the. Node = search(the. Pair. first); if (the. Node->element. first == the. Pair. first)//已经存在只改值 {// update the. Node->element. second = the. Pair. second; return; } 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 37

// 增加新节点，首先计算该节点指针数组大大小，及所在层次 int the. Level = level(); // level of new node // fix the. Level to be <= levels + 1 if (the. Level > levels) { the. Level = ++levels; last[the. Level] = header. Node; //因为新增加了一层，必须从头开始 } // get and insert new node just after the. Node skip. Node<K, E>* new. Node = new skip. Node<K, E>(the. Pair, the. Level + 1); for (int i = 0; i <= the. Level; i++) {// insert into level i chain new. Node->next[i] = last[i]->next[i]; last[i]->next[i] = new. Node; } d. Size++; return; } 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 38

emplate<class K, class E> void skip. List<K, E>: : erase(const K& the. Key) {// Delete the pair, if any, whose key equals the. Key. if (the. Key >= tail. Key) // too large return; // see if matching pair present skip. Node<K, E>* the. Node = search(the. Key); if (the. Node->element. first != the. Key) // not present return; // delete node from skip list for (int i = 0; i <= levels && last[i]->next[i] == the. Node; i++) last[i]->next[i] = the. Node->next[i]; 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 39

// update levels while (levels > 0 && header. Node->next[levels] == tail. Node) levels--; } delete the. Node; d. Size--; 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 40

template<class K, class E> void skip. List<K, E>: : output(ostream& out) const {// Insert the dictionary pairs into the stream out. // follow level 0 chain for (skip. Node<K, E>* current. Node = header. Node->next[0]; current. Node != tail. Node; current. Node = current. Node->next[0]) out << current. Node->element. first << " " << current. Node->element. second << " "; } // overload << template <class K, class E> ostream& operator<<(ostream& out, const skip. List<K, E>& x) {x. output(out); return out; } 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 41

操作 ‘Insert’ Head 2 1 0 Tail 20 24 30 40 60 75 80 p last Head 2 1 0 20 Level 0 24 山东大学计算机科学与技术学院 30 40 60 数据结构第 7章 1 Tail 2 75 跳表和散列 77 80 44

10. 13 把字符串转换为整数 int string. To. Long(string s) {int length = (int) a. length; //假定 3 long answer =s. at(0); } answer = (answer <<8) +s. at(1) return (answer << 8) +s. at(2); 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 48

$hash(<string> C++ STL Template<> class hash<string> { public: size_t operator() (const string the. Key)$

hash(<string> C++ STL Template<> class hash<string> { public: size_t operator() (const string the. Key) const {//把关�� the. Key�� 非� 的整数 unsisgned long hash. Value = 0; int length = (int) the. Key. length(); for (int i =0; i<length; i++) hash. Value = 5*hash. Value + the. Key. at(i); return size_t(hash. Value); }} 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 49

template<class K, class E> class hash. Table { public: hash. Table(int the. Divisor = 11); ~hash. Table(){delete [] table; } bool empty() const {return d. Size == 0; } int size() const {return d. Size; } pair<const K, E>* find(const K&) const; void insert(const pair<const K, E>&); void output(ostream& out) const; protected: int search(const K&) const; pair<const K, E>** table; // hash table hash<K> hash; // maps K to integer int d. Size; // number of pairs in dictionary int divisor; // hash function divisor }; 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 63

template<class K, class E> hash. Table<K, E>: : hash. Table(int the. Divisor) { divisor = the. Divisor; d. Size = 0; } // allocate and initialize hash table array table = new pair<const K, E>* [divisor]; for (int i = 0; i < divisor; i++) table[i] = NULL; 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 64

template<class K, class E> int hash. Table<K, E>: : search(const K& the. Key) const {// the. Key 是要检索的元素关键词，其实与the. Key的哈希映射位置，遍历整个表，若找到该元素或空置单元，则返回位置下标 } int i = (int) hash(the. Key) % divisor; // home bucket int j = i; // start at home bucket do { if (table[j] == NULL || table[j]->first == the. Key) return j; j = (j + 1) % divisor; // next bucket } while (j != i); // returned to home bucket? return j; // table full 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 65

template<class K, class E> pair<const K, E>* hash. Table<K, E>: : find(const K& the. Key) const {// 查找是否含有the. Key的元素在表中？在返回全部信息，否则 //null int b = search(the. Key); // see if a match was found at table[b] if (table[b] == NULL || table[b]->first != the. Key) return NULL; // no match } return table[b]; // matching pair 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 66

template<class K, class E> void hash. Table<K, E>: : insert(const pair<const K, E>& the. Pair) {// 插入对（K, E），若已经存在，在把第二部分修改 int b = search(the. Pair. first); // check if matching pair found if (table[b] == NULL) { // no matching pair and table not full table[b] = new pair<const K, E> (the. Pair); d. Size++; } else 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 67

{// check if duplicate or table full if (table[b]->first == the. Pair. first) {// duplicate, change table[b]->second = the. Pair. second; } else // table is full throw hash. Table. Full(); } } 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 68

template<class E, class K> class chain. Hash. Table { public: chain. Hash. Table(int divisor = 11 )//divisor(除数） {d = divisor; table = new Sorted. Chain<E, K> [D]; } //head pointer array, each table[i] has a Sorted chain. ~chain. Hash. Table() {delete [] table; } bool search(const K& k, E& e) const {return table[k%D]. search(k, e); } chain. Hash. Table<E, K>& insert(const E& e) {table[e%d]. insert(e); return *this; } chain. Hash. Table<E, K>& erase(const K& k, E& e) {table[k%d]. erase(k, e); return *this; } void Output() const; // 输出散列表 private: int d// 位置数 sorted. Chain<E, K> *table; // 链表数组 } 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 71

An improved implementation ∞ 0 ∞ 0 ∞ 0 Adding a tail node to the end of each chain 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 72

Comparison in running time n n 线性开型寻址散列 : n 最坏情况: (n) n 平均性能: Un and Sn are the average number of buckets examined during an unsuccessful search and successful search respectively, n/b= n Sn ~ ½ (1 + 1/(1 - α )) 2 n Un ~ ½ (1+1/(1 - α) ) 链表散列 : n 最坏情况: O(n) n 平均性能: we expect the length of a chain n Sn ~ 1+α/2 n Un ~ (α+1)/2, 1, superior to linear array 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 74

文本压缩的应用 • Popular text compressors such as zip and Unix’s compress are based on the LZW (Lempel-Ziv-Welch) method. 山东大学计算机科学与技术学院数据结构第 7章跳表和散列

idea n We can often reduce the desk storage needed to store a text file by storing a code version of the file, especially when the frequency of the appearance of sub-strings are high. E. g. 1000 x 1000 y, 2002 bytes are needed if using “x”, ”y”; 10 bytes are enough if “ 1000 x 1000 y”. The LZW methods is to map a long substring to a fix length code, and using codes to denote the text file. 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 77

LZW Compression n n The lzw maps strings of text characters into member code that are dynamically determined. The method scans the strings from left to right and then generates a table consisting code and key based on LZW rule Then use code in the table to denote the string in order to compressed the text. File coding is done by compressor and decoding by a decomporessor. 山东大学计算机科学与技术学院数据结构第 7章跳表和散列

LZW rule n Beginning with the dictionary initialized. The LZW compressor repeatedly finds the longest prefix, p, of the unencoded part of the input file that is in the dictionary and outputs its code. If there is a next character c in the input file, then pc is assigned to the next code and inserted into the dictionary. 山东大学计算机科学与技术学院数据结构第 7章跳表和散列 79

LZW Compression example n n n Assume the letters in the text are limited to {a, b}. The characters in the alphabet are assigned code numbers beginning at 0. The initial code table is: Initialized dictionary code 0 key a 山东大学计算机科学与技术学院数据结构 1 b 第 7章跳表和散列

LZW rule code 0 key a n n 1 b Original text = abababbabaabbabba Compression is done by scanning the original text from left to right. Find longest prefix p for which there is a code in the code table, a. output 0; c=b and assign (2, ab) to dictionary 山东大学计算机科学与技术学院数据结构第 7章跳表和散列

LZW Compression n n code 0 1 2 key a b ab Original text = abababbabaabbabba Compressed text = “ 0” 山东大学计算机科学与技术学院数据结构第 7章跳表和散列

LZW Compression code 0 key a 1 b 2 3 ab ba n Original text = abababbabaabbabba Compressed text = 0 • • • p=b p. Code = 1 c=a Represent b by 1 and enter pair (3, ba) into the code table. Compressed text = “ 01” n 山东大学计算机科学与技术学院数据结构第 7章跳表和散列

LZW Compression code 0 key a n n • • • 1 b 2 3 4 ab ba aba Original text = abababbabaabbabba Compressed text = 01 p = ab p. Code = 2 c=a Represent ab by 2 and enter (4, aba) into the code table. Compressed text = “ 012” 山东大学计算机科学与技术学院数据结构第 7章跳表和散列

LZW Compression code 0 key a n n • • • 1 b 2 3 4 5 ab ba abb Original text = abababbabaabbabba Compressed text = 012 p = ab p. Code = 2 c=b Represent ab by 2 and enter (5, abb) into the code table. Compressed text = “ 0122” 山东大学计算机科学与技术学院数据结构第 7章跳表和散列

LZW Compression code 0 key a n n • • • 1 b 2 3 4 5 6 ab ba abb bab Original text = abababbabaabbabba Compressed text = 0122 p = ba p. Code = 3 c=b Represent ba by 3 and enter (6, bab) into the code table. Compressed text = 01223 山东大学计算机科学与技术学院数据结构第 7章跳表和散列

LZW Compression code 0 key a n n • • • 1 b 2 3 4 5 6 7 ab ba abb baa Original text = abababbabaabbabba Compressed text = 01223 p = ba p. Code = 3 c=a Represent ba by 3 and enter (7, baa) into the code table. Compressed text = 012233 山东大学计算机科学与技术学院数据结构第 7章跳表和散列

LZW Compression code 0 key a n n • • • 1 b 2 3 4 5 6 7 8 ab ba abb baa abba Original text = abababbabaabbabba Compressed text = 012233 p = abb p. Code = 5 c=a Represent abb by 5 and enter (8, abba) into the code table. Compressed text = 0122335 山东大学计算机科学与技术学院数据结构第 7章跳表和散列

LZW Compression code 0 key a n n • • • 1 b 9 2 3 4 5 6 7 8 ab ba abb baa abbaa Original text = abababbabaabbabba Compressed text = 0122335 p = abba p. Code = 8 c=a Represent abba by 8 and enter (9, abbaa) into the code table. Compressed text = 01223358 山东大学计算机科学与技术学院数据结构第 7章跳表和散列

LZW Compression code 0 key a 1 b 2 3 4 5 6 7 8 9 ab ba abb baa abbaa n Original text = abababbabaabbabba Compressed text = 01223358 • • • p = abba p. Code = 8 c = null Represent abba by 8. Compressed text = 012233588 n 山东大学计算机科学与技术学院数据结构第 7章跳表和散列

Code Table Representation code 0 key a n 2 3 4 5 6 7 8 9 ab ba abb baa abbaa Dictionary. n n n 1 b Pairs are (key, element) = (key, code). Operations are : get(key) and put(key, code) Use a hash table. n n n Convert variable length keys into fixed length keys. Each key has the form pc, where the string p is a key that is already in the table. Replace pc with (p. Code)c. 山东大学计算机科学与技术学院数据结构第 7章跳表和散列

Time Complexity n Compression. n n O(n) expected time, where n is the length of the text that is being compressed. Decompression. n O(n) time, where n is the length of the decompressed text. 山东大学计算机科学与技术学院数据结构第 7章跳表和散列