-
Notifications
You must be signed in to change notification settings - Fork 855
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
linzhijun
committed
Sep 25, 2024
1 parent
3e54f92
commit 3a459e3
Showing
8 changed files
with
700 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
|
||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio Version 17 | ||
VisualStudioVersion = 17.11.35312.102 | ||
MinimumVisualStudioVersion = 10.0.40219.1 | ||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ToolGood.Words", "ToolGood.Words\ToolGood.Words.vcxproj", "{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|x64 = Debug|x64 | ||
Debug|x86 = Debug|x86 | ||
Release|x64 = Release|x64 | ||
Release|x86 = Release|x86 | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Debug|x64.ActiveCfg = Debug|x64 | ||
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Debug|x64.Build.0 = Debug|x64 | ||
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Debug|x86.ActiveCfg = Debug|Win32 | ||
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Debug|x86.Build.0 = Debug|Win32 | ||
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Release|x64.ActiveCfg = Release|x64 | ||
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Release|x64.Build.0 = Release|x64 | ||
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Release|x86.ActiveCfg = Release|Win32 | ||
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Release|x86.Build.0 = Release|Win32 | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
GlobalSection(ExtensibilityGlobals) = postSolution | ||
SolutionGuid = {143AC62E-6044-4A30-9652-2DC31C3AEABE} | ||
EndGlobalSection | ||
EndGlobal |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,228 @@ | ||
#pragma | ||
|
||
#include "IntDictionary.cpp" | ||
#include <map> | ||
#include <vector> | ||
#include <string> | ||
#include "TrieNode.h" | ||
using std::map; | ||
using std::vector; | ||
using std::string; | ||
|
||
class BaseSearchEx | ||
{ | ||
protected: | ||
unsigned short* _dict; | ||
int* _first; | ||
|
||
IntDictionary* _nextIndex; | ||
int* _end; | ||
int* _resultIndex; | ||
int* _keywordLengths; | ||
|
||
protected: | ||
virtual void SetKeywords2(string _keywords[]) | ||
{ | ||
TrieNode root; | ||
map<int, vector<TrieNode>> allNodeLayers; | ||
int kindex = 0; | ||
|
||
for (size_t i = 0; i < _keywords->size(); i++) /// _keywords->size() 这个值有问题 | ||
{ | ||
string p = _keywords[i]; | ||
|
||
TrieNode nd = root; | ||
for (size_t j = 0; j < p.length(); j++) // 这个 p.length() 对中文有问题 | ||
{ | ||
nd = nd.Add((char)p.c_str()[j]); // 返回是byte 不是char | ||
if (nd.Layer == 0) { | ||
nd.Layer = j + 1; | ||
auto find = allNodeLayers.find(nd.Layer); | ||
if (find == allNodeLayers.end()) | ||
{ | ||
vector<TrieNode> trieNodes; | ||
trieNodes.push_back(nd); | ||
allNodeLayers[nd.Layer] = trieNodes; | ||
} | ||
else | ||
{ | ||
find->second.push_back(nd); | ||
} | ||
} | ||
} | ||
} | ||
vector<TrieNode> allNode; | ||
allNode.push_back(root); | ||
for (size_t i = 0; i < allNodeLayers.size(); i++) | ||
{ | ||
auto item = allNodeLayers.at(i); | ||
for (size_t j = 0; j < item.size(); j++) | ||
{ | ||
allNode.push_back(item[j]); | ||
} | ||
} | ||
allNodeLayers.clear(); | ||
|
||
for (size_t i = 0; i < allNode.size(); i++) | ||
{ | ||
TrieNode nd = allNode[i]; | ||
nd.Index = i; | ||
TrieNode r = *(nd.Parent->Failure); | ||
char c = nd.Char; | ||
while (&r != NULL && (r.m_values.size() == 0 || r.m_values.find(c) == r.m_values.end())) r = *r.Failure; | ||
if (&r == NULL) | ||
{ | ||
nd.Failure = &(root); | ||
} | ||
else | ||
{ | ||
nd.Failure = &(r.m_values[c]); | ||
if (nd.Failure->Results.size() > 0) { | ||
for (size_t i = 0; i < nd.Failure->Results.size(); i++) | ||
{ | ||
nd.SetResults(nd.Failure->Results[i]); | ||
} | ||
} | ||
} | ||
} | ||
root.Failure = &(root); | ||
|
||
string stringBuilder = ""; | ||
for (int i = 1; i < allNode.size(); i++) { | ||
stringBuilder += (allNode[i].Char); | ||
} | ||
int length = CreateDict(stringBuilder); | ||
stringBuilder.clear(); | ||
|
||
int first[0x10000]; | ||
if (allNode[0].m_values.size() > 0) { | ||
for (size_t i = 0; i < allNode[0].m_values.size(); i++) | ||
{ | ||
auto it = allNode[0].m_values.at(i); | ||
char key = (char)_dict[it.Char]; | ||
first[key] = it.Index; | ||
} | ||
} | ||
_first = first; | ||
|
||
vector<int> resultIndex2; | ||
vector<bool> isEndStart; | ||
int len = allNode.size(); | ||
IntDictionary* nextIndex2 = new IntDictionary[len]; | ||
|
||
for (int i = allNode.size() - 1; i >= 0; i--) { | ||
map<unsigned short, int> dict; | ||
vector<int> result; | ||
TrieNode oldNode = allNode[i]; | ||
|
||
if (oldNode.m_values.size() > 0) { | ||
for (size_t i = 0; i < oldNode.m_values.size(); i++) | ||
{ | ||
char key = (char)oldNode.m_values.at(i).Char; | ||
int index = oldNode.m_values.at(i).Index; | ||
dict[key] = index; | ||
} | ||
} | ||
if (oldNode.Results.size() > 0) { | ||
for (size_t i = 0; i < oldNode.Results.size(); i++) | ||
{ | ||
result.push_back(oldNode.Results[i]); | ||
} | ||
} | ||
|
||
oldNode = *oldNode.Failure; | ||
while (oldNode.Index != root.Index) { | ||
if (oldNode.m_values.size() > 0) { | ||
for (size_t i = 0; i < oldNode.m_values.size(); i++) | ||
{ | ||
char key = (char)oldNode.m_values.at(i).Char; | ||
int index = oldNode.m_values.at(i).Index; | ||
if (dict.find(key) == dict.end()) | ||
{ | ||
dict[key] = index; | ||
} | ||
} | ||
} | ||
if (oldNode.Results.size() > 0) { | ||
for (size_t i = 0; i < oldNode.Results.size(); i++) | ||
{ | ||
int idx = oldNode.Results[i]; | ||
bool find = false; | ||
for (size_t j = 0; j < result.size(); j++) | ||
{ | ||
if (result[i] == idx) | ||
{ | ||
find = true; | ||
break; | ||
} | ||
} | ||
if (find == false) | ||
{ | ||
result.push_back(oldNode.Results[i]); | ||
} | ||
} | ||
} | ||
oldNode = *(oldNode.Failure); | ||
} | ||
nextIndex2[i] = *(new IntDictionary(dict)); | ||
|
||
if (result.size() > 0) { | ||
for (int j = result.size() - 1; j >= 0; j--) { | ||
resultIndex2.push_back(result[j]); | ||
isEndStart.push_back(false); | ||
} | ||
isEndStart[isEndStart.size() - 1] = true; | ||
} | ||
else { | ||
resultIndex2.push_back(-1); | ||
isEndStart.push_back(true); | ||
} | ||
} | ||
|
||
allNode.clear(); | ||
_nextIndex = nextIndex2; | ||
|
||
vector<int> resultIndex; | ||
vector<int> end; | ||
|
||
for (int i = isEndStart.size() - 1; i >= 0; i--) { | ||
if (isEndStart[i]) { | ||
end.push_back(resultIndex.size()); | ||
} | ||
if (resultIndex2[i] > -1) { | ||
resultIndex.push_back(resultIndex2[i]); | ||
} | ||
} | ||
end.push_back(resultIndex.size()); | ||
|
||
_resultIndex = new int[resultIndex.size()]; | ||
for (size_t i = 0; i < resultIndex.size(); i++) | ||
{ | ||
_resultIndex[i] = resultIndex[i]; | ||
} | ||
_end = new int[end.size()]; | ||
for (size_t i = 0; i < end.size(); i++) | ||
{ | ||
_end[i] = end[i]; | ||
} | ||
} | ||
|
||
|
||
private: | ||
int CreateDict(string keywords) { | ||
_dict = new unsigned short[0x10000]; | ||
map<char, unsigned int> dictionary; | ||
int index = 1; | ||
for (size_t i = 0; i < keywords.size(); i++) | ||
{ | ||
char item = keywords.at(i); | ||
if (_dict[item] == 0) | ||
{ | ||
_dict[item] = index; | ||
index++; | ||
} | ||
} | ||
return index - 1; | ||
} | ||
|
||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
#pragma once | ||
|
||
#include <map> | ||
#include <vector> | ||
#include <string> | ||
#include <algorithm> | ||
using std::map; | ||
using std::vector; | ||
using std::string; | ||
using namespace std; | ||
|
||
|
||
typedef pair<unsigned short, int> PAIR; | ||
//int cmp2(const PAIR& x, const PAIR& y) { | ||
// return x.first < y.first; | ||
//} | ||
|
||
class IntDictionary | ||
{ | ||
public: | ||
unsigned short* _keys; | ||
int* _values; | ||
int last; | ||
|
||
public: | ||
|
||
IntDictionary() | ||
{ | ||
|
||
} | ||
IntDictionary(unsigned short* keys, int* values, int len) | ||
{ | ||
_keys = keys; | ||
_values = values; | ||
last = len - 1; | ||
} | ||
IntDictionary(map<unsigned short, int> dict) | ||
{ | ||
int len = dict.size(); | ||
_keys = new unsigned short[len]; | ||
_values = new int[len]; | ||
last = len - 1; | ||
|
||
vector<PAIR> vec(dict.begin(), dict.end()); | ||
sort(vec.begin(), vec.end()); | ||
|
||
for (size_t i = 0; i < vec.size(); i++) | ||
{ | ||
_keys[i] = vec[i].first; | ||
_values[i] = vec[i].second; | ||
} | ||
last = len - 1; | ||
} | ||
|
||
|
||
bool TryGetValue(unsigned short key, int& value) | ||
{ | ||
if (last == -1) { | ||
value = 0; | ||
return false; | ||
} | ||
if (_keys[0] == key) { | ||
value = _values[0]; | ||
return true; | ||
} | ||
else if (last == 0 || _keys[0] > key) { | ||
value = 0; | ||
return false; | ||
} | ||
|
||
if (_keys[last] == key) { | ||
value = _values[last]; | ||
return true; | ||
} | ||
else if (_keys[last] < key) { | ||
value = 0; | ||
return false; | ||
} | ||
|
||
int left = 1; | ||
int right = last - 1; | ||
while (left <= right) { | ||
int mid = (left + right) >> 1; | ||
int d = _keys[mid] - key; | ||
|
||
if (d == 0) { | ||
value = _values[mid]; | ||
return true; | ||
} | ||
else if (d > 0) { | ||
right = mid - 1; | ||
} | ||
else { | ||
left = mid + 1; | ||
} | ||
} | ||
value = 0; | ||
return false; | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// ToolGood.Words.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。 | ||
// | ||
|
||
#include <iostream> | ||
#include "WordsSearchEx.cpp" | ||
|
||
int main() | ||
{ | ||
WordsSearchEx search; | ||
auto ks = new string[2]; | ||
ks[0] = "中国"; | ||
ks[1] = "国人"; | ||
ks[2] = "zg人"; | ||
|
||
search.SetKeywords(ks); | ||
auto r = search.FindFirst("我是中国人"); | ||
|
||
std::cout << "Hello World!\n"; | ||
} | ||
|
||
// 运行程序: Ctrl + F5 或调试 >“开始执行(不调试)”菜单 | ||
// 调试程序: F5 或调试 >“开始调试”菜单 | ||
|
||
// 入门使用技巧: | ||
// 1. 使用解决方案资源管理器窗口添加/管理文件 | ||
// 2. 使用团队资源管理器窗口连接到源代码管理 | ||
// 3. 使用输出窗口查看生成输出和其他消息 | ||
// 4. 使用错误列表窗口查看错误 | ||
// 5. 转到“项目”>“添加新项”以创建新的代码文件,或转到“项目”>“添加现有项”以将现有代码文件添加到项目 | ||
// 6. 将来,若要再次打开此项目,请转到“文件”>“打开”>“项目”并选择 .sln 文件 |
Oops, something went wrong.