Basics on Dynamic Programming(DP) 6 - Word Break

字符串处理，Word Break

一般有N个数/字符，就开N+1个位置的数组，第0个位置单独留出来作初始化

1. Word Break - Leetcode 139

Given a string s and a dictionary of words dict, determine if s can be segmented into a space-separated sequence of one or more dictionary words.

For example, given
s = "leetcode",
dict = ["leet", "code"].

Return true because "leetcode" can be segmented as "leet code".

bool wordBreak(string s, unordered_set<string> &dict) {
  int wordLength = 0;
  for (auto word : dict) {
    wordLength = max(wordLength, (int)word.size());
  }
  // DP[i]代表，s[0,i) 前i个字符是否能满足答案
  // 方程dp[i] = dp[j] && j+1~i属于dict
  int n = s.size();
  vector<bool> dp(n + 1, false);  //初始化，长度为 n 的字符串有 n+1 个隔板
  dp[0] = true;  //这样是为了减少0位置的判断,空字符串
  for (int i = 1; i <= n; i++) {
    // 超时了，优化: 切分位置的枚举，从后往前，到单词的最长位置
    /*for (int j = 0; j < i; j++) {
      if (dp[j] && dict.find(s.substr(j, i - j)) != dict.end()) {  //从j开始算的
        dp[i] = true;
        break;
      }
    }*/
    for (int j = 1; j <= wordLength && j <= i; j++) {
      if (dp[i - j] && dict.find(s.substr(i - j, j)) != dict.end()) {  //从j开始算的
        dp[i] = true;
        break;
      }
    }
  }
  return dp[n];
}

2. Word Break II- Leetcode 140

Given a string s and a dictionary of words dict, add spaces in s to construct a sentence where each word is a valid dictionary word.

Return all such possible sentences. ==>这种题目，不能用DP，只能DFS

For example, given
s = "catsanddog",
dict = ["cat", "cats", "and", "sand", "dog"].

A solution is ["cats and dog", "cat sand dog"].

DFS原始代码会超时：

vector<string> wordBreak(string s, unordered_set<string> &dict) {
  vector<string> res;
  DFS(s, 0, dict, "", res);
  return res;
}
void DFS(string s, int index, unordered_set<string> &dict, string path,
         vector<string> &res) {
  if (index == s.size()) {
    res.push_back(path);
    return;
  }
  for (int i = index; i < s.size(); i++) {
    if (dict.find(s.substr(index, i - index + 1)) != dict.end()) {
      string tmp_path = path;  // tmp_path不改变原始path
      tmp_path += (path == "") ? "" : " ";  //是否有空格
      tmp_path += s.substr(index, i - index + 1);
      DFS(s, i + 1, dict, tmp_path, res);
    }
  }
}

优化：重复计算太多，可以借鉴DP，存储所有的合法的单词 - 难!!

vector<string> wordBreak(string s, unordered_set<string> &dict) {
  // 先用1的方法得到结果的组数，再分别求结果
  // prev[i][j]表示s[i, j)是一个合法单词,可以从j处切开
  vector<vector<bool> > prev(s.length(), vector<bool>(s.length() + 1, false));
  vector<bool> dp(s.size() + 1, false);  //初始化为false!
  dp[0] = true;                          // 空字符串
  for (int i = 1; i <= s.size(); i++)
    for (int j = i - 1; j >= 0; j--) {
      if (dp[j] && dict.find(s.substr(j, i - j)) != dict.end()) {
        dp[i] = true;
        prev[j][i] = true;
      }
    }
  vector<string> result;
  if (!dp[s.size()]) return result;
  gen_path(s, prev, 0, "", result);
  reverse(result.begin(), result.end());
  return result;
}
// DFS遍历树，生成路径
void gen_path(string s, const vector<vector<bool> > &prev, int cur, string path,
              vector<string> &result) {
  if (cur == s.size()) {  //找到，存入
    result.push_back(path);
    return;  // string path恢复原状
  }
  for (int i = cur + 1; i <= s.size(); i++) {  //找从cur开始的，可行的解!
    if (prev[cur][i]) {
      path += (path == "") ? "" : " ";  //是否有空格
      path += s.substr(cur, i - cur);
      gen_path(s, prev, i, path, result);  //这里还是i!
      path = path.substr(0, path.size()-i+cur);
      if(path != "")  path = path.substr(0, path.size()-1);
    }
  }
}