212. Word Search II

題目網址：https://leetcode.cn/problems/word-search-ii/

題意：給一 m x n 的網格 board, 和一 string list words, 返回同時存在於 board 和 words 中的所有單字。

單字必須按照字母順序, 並通過相鄰的 cell 所構成, 其中相鄰是指水平相鄰 or 垂直相鄰, 且同一位置的 cell 不允許被重複使用。

其中 board[i][j] 和 words[i] 皆由小寫字母組成, 且 word 中的 string 互不相同。

Solution 1：（TLE 無法通過）

想法：利用 DFS + Backtracking, 同 79. Word Search, 每一個 words[i] 都從 (0, 0) ~ (m, n) 為起點開始搜索

class Solution {
public:
    vector<string> findWords(vector<vector<char>>& board, vector<string>& words) {
        vector<string> res;
        m = board.size(), n = board[0].size();

        for (const auto& word : words) {
            if (exist(board, word)) {
                res.emplace_back(word);
            }
        }

        return res;
    }

private:
    int m, n;

    bool exist(vector<vector<char>>& board, string& word){
        for (int i = 0; i < m; ++i) {
            for (int j = 0; j < n; ++j) {
                if (dfs(board, i, j, word, 0)) {
                    return true;
                }
            }
        }
        return false;
    }

    bool dfs(vector<vector<char>>& board, int i, int j, string& word, int idx){
        if (outOfBound(i, j) || board[i][j] != word[idx]) {
            return false;
        }

        if (idx == word.size() - 1) {
            return true;
        }

        const auto cur = board[i][j];

        // 將當前的 char 改成不該出現在 word 中的 char
        // 本題 word 中只能為英文字母, 故這邊設為數字
        // 這樣下方 dfs 中若走到拜訪過的位置必和 word[idx] 不相等, 並返回 false
        // 代表一定不會重複取同樣位置的 cell
        board[i][j] = '0'; // 標記成拜訪過的

        // 往 上、下、左、右 四個方向搜索
        const bool found = dfs(board, i - 1, j, word, idx + 1) ||
                           dfs(board, i + 1, j, word, idx + 1) ||
                           dfs(board, i, j - 1, word, idx + 1) ||
                           dfs(board, i, j + 1, word, idx + 1);

        // DFS 做完了, 代表當前 char 的所有可能都搜索完了, 還原當前的 char, 理由如下:
        // 若為 true, 則返回 true
        // 否則, 返回 false, 但下一個 char 為起始點是可以探索這個 char 的, 故要還原
        board[i][j] = cur;

        return found;
    }

    bool outOfBound(const int i, const int j){
        if (i < 0 || i >= m || j < 0 || j >= n) {
            return true;
        }
        return false;
    }
};

time：$O(w \cdot m \cdot n \cdot 4^L)$ ➔ w 為 words 中單字的個數, L 為 words 中最長單字的長度
- $O(w)$：總共要搜索 $w$ 個 word
- $O(m \cdot n)$：每個 word 需以 $m * n$ 個 cell 為起點開始遍歷
- $O(4^L)$：每個 cell 最多被拜訪 $4^L$ 次
space：$O(L)$ ➔ 取決於 dfs 的遞迴深度, 也就是 words 中最長單字的長度

Solution 2：（TLE 無法通過）

想法：利用 Trie + DFS + Backtracking

Trie 儲存 words, 可利用 prefix 來剪枝和節省空間

DFS + Backtracking 搜索 board

class TrieNode{
public:
    TrieNode* children[26] = {nullptr};
    bool isEnd = false; // 預設每個 char 不為 end
};

class Trie{
public:
    TrieNode* root;

    Trie() : root(new TrieNode()) {}

    void insert(string& word) {
        TrieNode *cur = root;
        for (const auto& ch : word) {
            const int i = ch - 'a';
            if (!cur->children[i]) {
                cur->children[i] = new TrieNode();
            }
            cur = cur->children[i];
        }
        cur->isEnd = true; // 將最後一個 char 設為 end
    }
};

class Solution {
public:
    vector<string> findWords(vector<vector<char>>& board, vector<string>& words) {
        m = board.size(), n = board[0].size();

        // insert words into trie
        for (const auto& word : words) {
            trie.insert(word);
        }

        // 每一個 (i, j) 作為起點向外搜索
        for (int i = 0; i < m; ++i) {
            for (int j = 0; j < n; ++j) {
                TrieNode *node = trie.root;
                string word; // 紀錄遍歷 board 時的 word
                dfs(board, i, j, node, word);
            }
        }

        vector<string> ans(res.begin(), res.end()); // 將 set 轉回 vector

        return ans;
    }

private:
    int m, n;
    Trie trie;
    unordered_set<string> res; // 同一個單字有可能在不同的路徑中出現, 故取 set
    vector<pair<int, int>> dir = {
        {1,0}, {-1,0},
        {0,1}, {0,-1}
    };

    void dfs(vector<vector<char>>& board, const int i, const int j, TrieNode* node, string& word){
        // 若超出邊界 or 已被拜訪, 則返回
        if (outOfBound(i, j) || board[i][j] == '0') {
            return;
        }

        const auto ch = board[i][j];
        const int idx = ch - 'a';

        // 若 board[i][j] 不為下一個 char, 則返回
        if (!node->children[idx]) {
            return;
        }

        // 若 board[i][j] 為下一個 char, 則把它加到 word 中, 並前進到下一個 char
        word += ch;
        node = node->children[idx];
        board[i][j] = '0'; // 將 (i, j) 標記為已拜訪

        // 若為結尾, 則將 word 加到 res 中, 並繼續往下搜索, 而非直接返回
        // 因為若 words = [aa, aaa], 在 board 中找到 aa 後, 仍要繼續找看看有沒有 aaa
        if (node->isEnd) {
            res.emplace(word);
        }

        // 往 上、下、左、右 四個方向搜索
        for (int k = 0; k < dir.size(); ++k) {
            const int row = i + dir[k].first;
            const int col = j + dir[k].second;
            dfs(board, row, col, node, word);
        }

        board[i][j] = ch; // 返回上一層時, 要把 (i, j) 標記成未拜訪
        word.pop_back(); // 返回上一層時, 要把 ch 給 pop 掉
    }

    bool outOfBound(const int row, const int col){
        if (row < 0 || row >= m || col < 0 || col >= n) {
            return true;
        }
        return false;
    }
};

time：$O(m \cdot n \cdot 4^L)$ ➔ L 為 words 中最長單字的長度
- $O(m \cdot n)$：需遍歷 $m * n$ 個 cell
- $O(4^L)$：每個 cell 最多被拜訪 $4^L$ 次
space：$O(w \cdot L)$ ➔ 其中 w 為 words 中單字的個數, 而 L 為最長單字的長度, worse case：trie 中最多有 $O(w \cdot L)$ 個 node

Solution 3：

想法：改善 Solution 2, 將已找到的單字從 trie 中刪除, 藉此來進行剪枝, 避免重複搜索已找到的單字。因此我們在 TrieNode 中新增一屬性 cnt 用來記錄該 char 被走過的次數。一旦某個單字被找到我們就把它從 Trie 移除, 移除的方法分成兩步驟：

遍歷 word 每一個 char, 並把 trie 中該 char 的 cnt 減一

把 word 最後一個 char 標記成 not end

e.g. words = [a, aa], 則 Trie 如下圖

第一個 a 的 cnt = 2, 是因為 a 和 aa 都會走到

第二個 a 的 cnt = 1, 是因為只有 aa 會走到

class TrieNode{
public:
    TrieNode* children[26] = {nullptr};
    bool isEnd = false; // 預設每個 char 不為 end
    int cnt = 0; // 紀錄每個 char 被走過幾遍(也就是有幾個 word)
};

class Trie{
public:
    TrieNode* root;

    Trie() : root(new TrieNode()) {}

    void insert(string word) {
        TrieNode *cur = root;
        for (const auto& ch : word) {
            const int i = ch - 'a';
            if (!cur->children[i]) {
                cur->children[i] = new TrieNode();
            }
            cur = cur->children[i];
            cur->cnt += 1; // cnt + 1
        }
        cur->isEnd = true; // 將最後一個 char 設為 end
    }

    void remove(string& word){
        TrieNode *node = root;

        // word 中每個 char 之都要 cnt 減一
        for (const auto& ch : word) {
            const int i = ch - 'a';
            node->children[i]->cnt -= 1;
            node = node->children[i];
        }
        node->isEnd = false; // 把 word 最後一個 char 標記成 not end
    }
};

class Solution {
public:
    vector<string> findWords(vector<vector<char>>& board, vector<string>& words) {
        m = board.size(), n = board[0].size();

        // insert words into trie
        for (const auto& word : words) {
            trie.insert(word);
        }

        // 每一個 (i, j) 作為起點向外搜索
        for (int i = 0; i < m; ++i) {
            for (int j = 0; j < n; ++j) {
                const auto node = trie.root;
                string word; // 紀錄遍歷 board 時的 word
                dfs(board, i, j, node, word);
            }
        }

        vector<string> ans(res.begin(), res.end()); // 將 set 轉回 vector

        return ans;
    }

private:
    int m, n;
    Trie trie;
    unordered_set<string> res; // 同一個單字有可能在不同的路徑中出現, 故取 set
    vector<pair<int, int>> dir = {
        {1,0}, {-1,0},
        {0,1}, {0,-1}
    };

    void dfs(vector<vector<char>>& board, const int i, const int j, TrieNode* node, string& word){
        // 若超出邊界 or 已被拜訪, 則返回
        if (outOfBound(i, j) || board[i][j] == '0') {
            return;
        }

        const auto ch = board[i][j];
        const int idx = ch - 'a';

        // 若 board[i][j] 不為下一個 char, 則返回
        if (!node->children[idx]) {
            return;
        }

        // 若下一個 char 的 cnt 為 0, 則返回
        if (node->children[idx]->cnt == 0) {
            return;
        }

        // 若 board[i][j] 為下一個 char, 則把它加到 word 中, 並前進到下一個 char
        word += ch;
        node = node->children[idx];
        board[i][j] = '0'; // 將 (i, j) 標記為已拜訪

        // 若為結尾, 則將 word 加到 res 中, 並繼續往下搜索, 而非直接返回
        // 因為若 words = [aa, aaa], 在 board 中找到 aa 後, 仍要繼續找看看有沒有 aaa
        if (node->isEnd) {
            res.emplace(word);
            trie.remove(word); // 從 trie 中移除 word
        }

        // 往 上、下、左、右 四個方向搜索
        for (int k = 0; k < dir.size(); ++k) {
            const int row = i + dir[k].first;
            const int col = j + dir[k].second;
            dfs(board, row, col, node, word);
        }

        board[i][j] = ch; // 返回上一層時, 要把 (i, j) 標記成未拜訪
        word.pop_back(); // 返回上一層時, 要把 ch 給 pop 掉
    }

    bool outOfBound(const int row, const int col){
        if (row < 0 || row >= m || col < 0 || col >= n) {
            return true;
        }
        return false;
    }
};

time：$O(m \cdot n \cdot 4^L)$ ➔ L 為 words 中最長單字的長度
- $O(m \cdot n)$：需遍歷 $m * n$ 個 cell
- $O(4^L)$：每個 cell 最多被拜訪 $4^L$ 次
space：$O(w \cdot L)$ ➔ 其中 w 為 words 中單字的個數, 而 L 為最長單字的長度, worse case：trie 中最多有 $O(w \cdot L)$ 個 node