472. Concatenated Words

題目網址：https://leetcode.cn/problems/concatenated-words/

題意：給一 string array words (沒有重複單字), 找出並返回 words 中所有的連接詞。

連接詞：一個完全由 words 中至少兩個單字所組成的 string。

Solution 1：

想法：利用 DFS + Trie, 同 139. Word Break, 先對 words 根據 string 的長度進行排序, 因為排序後 word[i] 只會由比它短的單字所組成, 因此我們先把較短的單字 insert 到 trie 中後, 再來搜尋 word[i] 是否能由 trie 中的單字所組成

class TrieNode{
public:
    TrieNode* children[26] = {nullptr};
    bool isEnd = false;
};

class Trie{
public:
    TrieNode *root;

    Trie() : root(new TrieNode()) {}

    void insert(const string& word){
        TrieNode *node = root;

        for (const auto& ch : word) {
            const int i = ch - 'a';
            if (!node->children[i]) {
                node->children[i] = new TrieNode();
            }
            node = node->children[i];
        }

        node->isEnd = true;
    }
};

class Solution {
public:
    vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {
        sort(words.begin(), words.end(), [](const string& s1, const string& s2){
           return s1.size() < s2.size();
        });

        vector<string> res;
        for (const auto& word : words) {
            if (dfs(word, 0)) {
                res.emplace_back(word);
            }
            trie.insert(word);
        }

        return res;
    }

private:
    Trie trie;

    bool dfs(const string& word, int cur){
        if (cur == word.size()) {
            return true;
        }

        TrieNode *node = trie.root;

        for (int i = cur; i < word.size(); ++i) {
            const int idx = word[i] - 'a';
            if (node->children[idx]) {
                node = node->children[idx];
                if (node->isEnd && dfs(word, i + 1)) {
                    return true;
                }
            } else {
                break;
            }
        }

        return false;
    }
};

time：$O(n \cdot log(n) + \sum\limits_{i = 0}^{n-1} + \sum\limits_{i = 0}^{n-1}{2^{w_i}})$, 其中 $n$ 為 words 中的單字個數, $w_i$ 為 words[i] 的長度
- $O(n \cdot log(n))$：sorting
- $O(\sum\limits_{i = 0}^{n-1})$：insert 單字到 trie 中
- $O(\sum\limits_{i = 0}^{n-1}{2^{w_i}})$：判斷每個 words[i] 是否為連接詞需 $O(2^{w_i})$
  - $O(2^{n})：T(n) = T(n - 1) + T(n - 2) + … + T(1)$
space：$O(\sum\limits_{i = 0}^{n-1}{w_i})$ ➔ $O(26 \cdot \sum\limits_{i = 0}^{n-1}{w_i})$, worse case：trie 中所有單字都沒有重複的 prefix

Solution 2：

想法：利用 DFS + Trie + Memorization, 同 139. Word Break, 改進 Solution 1, 若 word 中 idx = cur 為開頭往後的 substring 無法由若干個 words[i] 所組成, 則透過 visited 紀錄起來, 藉此來進行剪枝, 避免重複走失敗的道路

class TrieNode{
public:
    TrieNode* children[26] = {nullptr};
    bool isEnd = false;
};

class Trie{
public:
    TrieNode *root;

    Trie() : root(new TrieNode()) {}

    void insert(string& word){
        TrieNode *node = root;

        for (const auto& ch : word) {
            const int i = ch - 'a';
            if (!node->children[i]) {
                node->children[i] = new TrieNode();
            }
            node = node->children[i];
        }

        node->isEnd = true;
    }
};

class Solution {
public:
    vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {
        sort(words.begin(), words.end(), [](const string& s1, const string& s2){
           return s1.size() < s2.size();
        });

        vector<string> res;
        for (const auto& word : words) {
            if (exist(word, 0)) {
                res.emplace_back(word);
            }
            trie.insert(word);
        }

        return res;
    }

private:
    Trie trie;

    bool exist(string& word, int idx){
        // 每個 word 都要重新初始化 visited
        vector<bool> visited(word.size(), 0);
        return dfs(word, idx, visited);
    }

    bool dfs(string& word, int cur, vector<bool>& visited){
        if (cur == word.size()) {
            return true;
        }

        // 若已經被標記為失敗, 則直接返回
        if (visited[cur] == true) {
            return false;
        }

        TrieNode *node = trie.root;
        for (int i = cur; i < word.size(); ++i) {
            const int idx = word[i] - 'a';
            if (node->children[idx]) {
                node = node->children[idx];
                if (node->isEnd && dfs(word, i + 1, visited)) {
                    return true;
                }
            } else {
                break;
            }
        }

        visited[cur] = true; // 標記為失敗

        return false;
    }
};

time：$O(n \cdot log(n) + \sum\limits_{i = 0}^{n-1} + \sum\limits_{i = 0}^{n-1}{w_i^2})$ ➔ $T(n) = T(n - 1) + O(n)$
- $O(n \cdot log(n))$：sorting
- $O(\sum\limits_{i = 0}^{n-1})$：insert 單字到 trie 中
- $O(\sum\limits_{i = 0}^{n-1}{w_i^2})$：判斷每個 words[i] 是否為連接詞需 $O(w_i^2)$
  - $O(n^2)：T(n) = T(n - 1) +$ $O(n)$, 因為有用 visited 記憶, 所以呼叫 T(n-1) 即可, T(n-1) 會再往下呼叫 T(n-2), 依此類推 …, 每個 T(n-i) 只要呼叫一次即可(因為會記住結果)。
  - $O(n)$ 是因為呼叫一次 T(n-1) 後, 剩下 T(n-2), ..., T(1) 都會計算出來。原本 $O(n)$ 應寫作 T(n-2) + T(n-3) + ... + T(1), 但是除了 T(n-1), 剩下的 T(n-i) 都只需 $O(1)$, 故 T(n-2) + T(n-3) + ... + T(1) 可直接寫成 $O(n)$
space：$O(\sum\limits_{i = 0}^{n-1}{w_i} + L)$ ➔ $O(26 \cdot \sum\limits_{i = 0}^{n-1}{w_i}) + O(L)$
- $O(26 \cdot \sum\limits_{i = 0}^{n-1}{w_i})$：worse case : trie 中所有單字都沒有重複的 prefix
- $O(L)$：visited, 其中 $L$ 為 words 中最長單字的長度