# 字串比對

December 1, 2021

## 解法思路

Boyer-Moore 字串比對由關鍵字的後面開始核對字串，並製作前進表，如果比對不符合，依前進表中的值前進至下個核對處，假設是 p 好了，然後比對字串中 p - n + 1 至 p 的值是否與關鍵字相同。

4 3 2 1 4（match?）

## 程式實作

``````#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SKIP_TABLE_SIZE 256
#define STRING_LENGTH 80

void table(int*, char*);  // 建立前進表
int indexOf(int*, int, char*, char*); // 搜尋關鍵字
void subString(char*, char*, int, int); // 取出子字串

int main(void) {
int skip[SKIP_TABLE_SIZE];

char input[STRING_LENGTH];
char key[STRING_LENGTH];
char sub[STRING_LENGTH] = {'\0'};

printf("字串：");
gets(input);
printf("關鍵字：");
gets(key);

table(skip, key);

int m = strlen(input); // 計算字串長度
int n = strlen(key);
int p = indexOf(skip, n - 1, input, key);

while(p != -1) {
subString(input, sub, p, m);
printf("%s\n", sub);
p = indexOf(skip, p + n + 1, input, key);
}

return 0;
}

void table(int* skip, char *key) {
int n = strlen(key);
int k;
for(k = 0; k < SKIP_TABLE_SIZE; k++) {
skip[k] = n;
}
for(k = 0; k < n - 1; k++) {
skip[key[k]] = n - k - 1;
}
}

int indexOf(int* skip, int from, char* str, char* key) {
char sub[STRING_LENGTH] = {'\0'};

int strLen = strlen(str);
int keyLen = strlen(key);
int index = from;

while(index < strLen) {
subString(str, sub, index - keyLen + 1, index);
if(!strcmp(sub, key)) { // 比較兩字串是否相同
return index - keyLen + 1;
}
index += skip[str[index]];
}

return -1;
}

void subString(char *text, char* sub, int s, int e) {
int i, j;
for(i = s, j = 0; i <= e; i++, j++) {
sub[j] = text[i];
}
sub[j] = '\0';
}
``````
``````import java.util.*;
import java.io.*;
import static java.lang.System.*;

public class StringMatcher implements Iterable<String> {
private String str;
private String key;
private int[] skip = new int[256];

public StringMatcher(String str, String key) {
this.str = str;
this.key = key;
Arrays.fill(skip, key.length());
for(int k = 0; k < key.length() - 1; k++) {
skip[key.charAt(k)] = key.length() - k - 1;
}
}

public Iterator<String> iterator() { return new Itr(); }

private class Itr implements Iterator<String> {
private int index;

{ index = indexOf(key.length() - 1, str, key); }

private int indexOf(int from, String str, String key) {
int tp = from;
while(tp < str.length() &&
! str.substring(tp - key.length() + 1, tp + 1)
.equals(key)) {
tp += skip[str.charAt(tp)];
}
return tp - key.length() + 1;
}

public boolean hasNext() {
return index < str.length() - 1;
}

public String next() {
String sub = str.substring(index);
index = indexOf(index + key.length() + 1, str, key);
return sub;
}

public void remove() { throw new RuntimeException("Not supported"); }
}

public static void main(String[] args) {
Scanner scanner = new Scanner(in);

out.print("請輸入字串：");
String str = scanner.nextLine();
out.print("請輸入搜尋關鍵字：");
String key = scanner.nextLine();

for(String s : new StringMatcher(str, key)) {
out.println(s);
}
}
}
``````
``````def matcher(str, key):
strLen = len(str)
keyLen = len(key)

skip = [keyLen - key.rindex(chr(k)) - 1
if(chr(k) in key[:-1]) else keyLen for k in range(256)]

def next(i):
return (next(i + skip[ord(str[i])])
if(i < strLen and str[i - keyLen + 1: i + 1] != key)
else i - keyLen + 1)

def match(i):
nextI = next(i + keyLen + 1)
return [str[i:]] + (match(nextI) if(nextI < strLen - 1) else [])

return match(next(keyLen))

for s in matcher(input("字串："), input("關鍵字：")):
print(s)
``````
``````def matcher(str: String, key: String) = {
val skip = for(k <- 0 until 256) yield
if(key.init.contains(k.toChar))
key.length - key.lastIndexOf(k.toChar) - 1
else key.length

def next(i: Int): Int = {
if(i < str.length &&
!str.substring(i - key.length + 1, i + 1).equals(key))
next(i + skip(str.charAt(i).toInt))
else i - key.length + 1
}

def find(i: Int): List[String] = {
val nextI = next(i + key.length + 1)
str.substring(i) :: (if(nextI < str.length - 1) find(nextI) else Nil)
}

find(next(key.length))
}

print("字串：")
print("關鍵字：")
matcher(str, key).foreach(println)
``````
``````# encoding: UTF-8
def matcher(str, key)
skip = (0...256).map { |k|
if key[0...-1].include? k.chr
key.size - key.rindex(k.chr) - 1
else
key.size
end
}

nextIndex = ->(i) {
if i < str.size and str[i - key.size + 1 ... i + 1] != key
nextIndex.call(i + skip[str[i].ord])
else
i - key.size + 1
end
}

match = ->(i) {
nextI = nextIndex.call(i + key.size + 1)
[str[i..-1]] + if nextI < str.size - 1; match.call(nextI) else [] end
}

return match.call(nextIndex.call(key.size))
end

print "字串："
str = gets.chomp
print "關鍵字："
key = gets.chomp

matcher(str, key).each do |s|
puts s
end
``````
``````var matcher = function() {
function range(n) {
var r = [];
for(var i = 0; i < n; i++) { r[i] = i; }
return r;
}

return function(str, key) {
var skip = range(256).map(function(k) {
return key.slice(0, -1)
.indexOf(String.fromCharCode(k)) !== -1 ?
key.length - key.lastIndexOf(String.fromCharCode(k)) - 1 :
key.length;
});

function next(i) {
return i < str.length &&
str.slice(i - key.length + 1, i + 1) !== key ?
next(i + skip[str.charCodeAt(i)]) :
i - key.length + 1
}

function match(i) {
var nextI = next(i + key.length + 1);
return [str.slice(i)].concat(
nextI < str.length - 1 ? match(nextI) : []);
}

return match(next(key.length))
};
}();

matcher('This is a test', 'is').forEach(function(s) {
print(s);
});
``````
``````import Data.Char
import Data.List

rindex elem list =
let (Just a) = elem `elemIndex` (reverse list)
in length list - a - 1

matcher str key =  find \$ next keyLen
where strLen = length str
keyLen = length key
skip = [if (chr k) `elem` (init key) then
keyLen - rindex (chr k) key - 1
else keyLen | k <- [0..255]]
next i = if i < strLen &&
(drop (i - keyLen + 1) . take (i + 1) \$ str) /= key
then next (i + skip !! (ord (str !! i)))
else i - keyLen + 1
find i = let nextI = next \$ i + length key + 1
in (drop i str) : if nextI < strLen - 1 then find nextI
else []

main = do
putStrLn "String..."
str <- getLine
putStrLn "Keyword..."
key <- getLine
sequence [putStrLn s | s <- matcher str key]
``````
``````# 我寫的玩具語言 https://github.com/JustinSDK/toy_lang

SKIP_TABLE_SIZE = 256

def table(skip, key) {
n = key.length()
iterate(0, SKIP_TABLE_SIZE).forEach(k -> skip.set(k, n))
iterate(0, n - 1).forEach(k -> skip.set(key.charCodeAt(k), n - k - 1))
}

def indexOf(skip, from, text, key) {
index = from
while index < text.length() {
if text.substring(index - key.length() + 1, index + 1) == key {
return index - key.length() + 1
}
index += skip.get(text.charCodeAt(index))
}
return -1
}

text = input('字串：')
key = input('關鍵字：')

skip = range(0, SKIP_TABLE_SIZE)
table(skip, key)

p = indexOf(skip, key.length() - 1, text, key)
while p != -1 {
println(text.substring(p, text.length()))
p = indexOf(skip, p + key.length() + 1, text, key)
}
``````