001package algs53; // section 5.3 002import stdlib.*; 003/* ************************************************************* 004 * Compilation: java KMPplus.java 005 * Execution: java KMPplus pattern text 006 * 007 * Knuth-Morris-Pratt algorithm over UNICODE alphabet. 008 * 009 * % java KMPplus ABABAC BCBAABACAABABACAA 010 * text: BCBAABACAABABACAA 011 * pattern: ABABAC 012 * 013 * % java KMPplus aabaaaba ccaabaabaabaaabaab 014 * text: ccaabaabaabaaabaab 015 * pattern: aabaaaba 016 * 017 * % java KMPplus aabaaabb ccaabaabaabaaabaab 018 * text: ccaabaabaabaaabaab 019 * pattern: aabaaabb 020 * 021 ***************************************************************/ 022 023public class XKMPplus { 024 private final String pattern; 025 private final int[] next; 026 027 // create Knuth-Morris-Pratt NFA from pattern 028 public XKMPplus(String pattern) { 029 this.pattern = pattern; 030 int M = pattern.length(); 031 next = new int[M]; 032 int j = -1; 033 for (int i = 0; i < M; i++) { 034 if (i == 0) next[i] = -1; 035 else if (pattern.charAt(i) != pattern.charAt(j)) next[i] = j; 036 else next[i] = next[j]; 037 while (j >= 0 && pattern.charAt(i) != pattern.charAt(j)) { 038 j = next[j]; 039 } 040 j++; 041 } 042 043 for (int i = 0; i < M; i++) 044 StdOut.println("next[" + i + "] = " + next[i]); 045 } 046 047 // return offset of first occurrence of text in pattern (or N if no match) 048 // simulate the NFA to find match 049 public int search(String text) { 050 int M = pattern.length(); 051 int N = text.length(); 052 int i, j; 053 for (i = 0, j = 0; i < N && j < M; i++) { 054 while (j >= 0 && text.charAt(i) != pattern.charAt(j)) 055 j = next[j]; 056 j++; 057 } 058 if (j == M) return i - M; 059 return N; 060 } 061 062 063 // test client 064 public static void main(String[] args) { 065 //args = new String[] { "abracadabra", "abacadabrabracabracadabrabrabracad" }; 066 //args = new String[] { "rab", "abacadabrabracabracadabrabrabracad" }; 067 //args = new String[] { "bcara", "abacadabrabracabracadabrabrabracad" }; 068 //args = new String[] { "rabrabracad", "abacadabrabracabracadabrabrabracad" }; 069 args = new String[] { "abacad", "abacadabrabracabracadabrabrabracad" }; 070 String pat = args[0]; 071 String txt = args[1]; 072 073 // substring search 074 XKMPplus kmp = new XKMPplus(pat); 075 int offset = kmp.search(txt); 076 077 // print results 078 StdOut.println("text: " + txt); 079 StdOut.print("pattern: "); 080 for (int i = 0; i < offset; i++) 081 StdOut.print(" "); 082 StdOut.println(pat); 083 } 084 085}