Skip to content

Commit

Permalink
CPP不能用。。。。
Browse files Browse the repository at this point in the history
  • Loading branch information
linzhijun committed Sep 25, 2024
1 parent 3e54f92 commit 3a459e3
Show file tree
Hide file tree
Showing 8 changed files with 700 additions and 0 deletions.
31 changes: 31 additions & 0 deletions cpp/ToolGood.Words.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.11.35312.102
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ToolGood.Words", "ToolGood.Words\ToolGood.Words.vcxproj", "{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Debug|x64.ActiveCfg = Debug|x64
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Debug|x64.Build.0 = Debug|x64
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Debug|x86.ActiveCfg = Debug|Win32
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Debug|x86.Build.0 = Debug|Win32
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Release|x64.ActiveCfg = Release|x64
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Release|x64.Build.0 = Release|x64
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Release|x86.ActiveCfg = Release|Win32
{AB38D1D8-AAC3-450E-9563-29FB3339ADB3}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {143AC62E-6044-4A30-9652-2DC31C3AEABE}
EndGlobalSection
EndGlobal
228 changes: 228 additions & 0 deletions cpp/ToolGood.Words/BaseSearchEx.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
#pragma

#include "IntDictionary.cpp"
#include <map>
#include <vector>
#include <string>
#include "TrieNode.h"
using std::map;
using std::vector;
using std::string;

class BaseSearchEx
{
protected:
unsigned short* _dict;
int* _first;

IntDictionary* _nextIndex;
int* _end;
int* _resultIndex;
int* _keywordLengths;

protected:
virtual void SetKeywords2(string _keywords[])
{
TrieNode root;
map<int, vector<TrieNode>> allNodeLayers;
int kindex = 0;

for (size_t i = 0; i < _keywords->size(); i++) /// _keywords->size() 这个值有问题
{
string p = _keywords[i];

TrieNode nd = root;
for (size_t j = 0; j < p.length(); j++) // 这个 p.length() 对中文有问题
{
nd = nd.Add((char)p.c_str()[j]); // 返回是byte 不是char
if (nd.Layer == 0) {
nd.Layer = j + 1;
auto find = allNodeLayers.find(nd.Layer);
if (find == allNodeLayers.end())
{
vector<TrieNode> trieNodes;
trieNodes.push_back(nd);
allNodeLayers[nd.Layer] = trieNodes;
}
else
{
find->second.push_back(nd);
}
}
}
}
vector<TrieNode> allNode;
allNode.push_back(root);
for (size_t i = 0; i < allNodeLayers.size(); i++)
{
auto item = allNodeLayers.at(i);
for (size_t j = 0; j < item.size(); j++)
{
allNode.push_back(item[j]);
}
}
allNodeLayers.clear();

for (size_t i = 0; i < allNode.size(); i++)
{
TrieNode nd = allNode[i];
nd.Index = i;
TrieNode r = *(nd.Parent->Failure);
char c = nd.Char;
while (&r != NULL && (r.m_values.size() == 0 || r.m_values.find(c) == r.m_values.end())) r = *r.Failure;
if (&r == NULL)
{
nd.Failure = &(root);
}
else
{
nd.Failure = &(r.m_values[c]);
if (nd.Failure->Results.size() > 0) {
for (size_t i = 0; i < nd.Failure->Results.size(); i++)
{
nd.SetResults(nd.Failure->Results[i]);
}
}
}
}
root.Failure = &(root);

string stringBuilder = "";
for (int i = 1; i < allNode.size(); i++) {
stringBuilder += (allNode[i].Char);
}
int length = CreateDict(stringBuilder);
stringBuilder.clear();

int first[0x10000];
if (allNode[0].m_values.size() > 0) {
for (size_t i = 0; i < allNode[0].m_values.size(); i++)
{
auto it = allNode[0].m_values.at(i);
char key = (char)_dict[it.Char];
first[key] = it.Index;
}
}
_first = first;

vector<int> resultIndex2;
vector<bool> isEndStart;
int len = allNode.size();
IntDictionary* nextIndex2 = new IntDictionary[len];

for (int i = allNode.size() - 1; i >= 0; i--) {
map<unsigned short, int> dict;
vector<int> result;
TrieNode oldNode = allNode[i];

if (oldNode.m_values.size() > 0) {
for (size_t i = 0; i < oldNode.m_values.size(); i++)
{
char key = (char)oldNode.m_values.at(i).Char;
int index = oldNode.m_values.at(i).Index;
dict[key] = index;
}
}
if (oldNode.Results.size() > 0) {
for (size_t i = 0; i < oldNode.Results.size(); i++)
{
result.push_back(oldNode.Results[i]);
}
}

oldNode = *oldNode.Failure;
while (oldNode.Index != root.Index) {
if (oldNode.m_values.size() > 0) {
for (size_t i = 0; i < oldNode.m_values.size(); i++)
{
char key = (char)oldNode.m_values.at(i).Char;
int index = oldNode.m_values.at(i).Index;
if (dict.find(key) == dict.end())
{
dict[key] = index;
}
}
}
if (oldNode.Results.size() > 0) {
for (size_t i = 0; i < oldNode.Results.size(); i++)
{
int idx = oldNode.Results[i];
bool find = false;
for (size_t j = 0; j < result.size(); j++)
{
if (result[i] == idx)
{
find = true;
break;
}
}
if (find == false)
{
result.push_back(oldNode.Results[i]);
}
}
}
oldNode = *(oldNode.Failure);
}
nextIndex2[i] = *(new IntDictionary(dict));

if (result.size() > 0) {
for (int j = result.size() - 1; j >= 0; j--) {
resultIndex2.push_back(result[j]);
isEndStart.push_back(false);
}
isEndStart[isEndStart.size() - 1] = true;
}
else {
resultIndex2.push_back(-1);
isEndStart.push_back(true);
}
}

allNode.clear();
_nextIndex = nextIndex2;

vector<int> resultIndex;
vector<int> end;

for (int i = isEndStart.size() - 1; i >= 0; i--) {
if (isEndStart[i]) {
end.push_back(resultIndex.size());
}
if (resultIndex2[i] > -1) {
resultIndex.push_back(resultIndex2[i]);
}
}
end.push_back(resultIndex.size());

_resultIndex = new int[resultIndex.size()];
for (size_t i = 0; i < resultIndex.size(); i++)
{
_resultIndex[i] = resultIndex[i];
}
_end = new int[end.size()];
for (size_t i = 0; i < end.size(); i++)
{
_end[i] = end[i];
}
}


private:
int CreateDict(string keywords) {
_dict = new unsigned short[0x10000];
map<char, unsigned int> dictionary;
int index = 1;
for (size_t i = 0; i < keywords.size(); i++)
{
char item = keywords.at(i);
if (_dict[item] == 0)
{
_dict[item] = index;
index++;
}
}
return index - 1;
}

};
100 changes: 100 additions & 0 deletions cpp/ToolGood.Words/IntDictionary.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#pragma once

#include <map>
#include <vector>
#include <string>
#include <algorithm>
using std::map;
using std::vector;
using std::string;
using namespace std;


typedef pair<unsigned short, int> PAIR;
//int cmp2(const PAIR& x, const PAIR& y) {
// return x.first < y.first;
//}

class IntDictionary
{
public:
unsigned short* _keys;
int* _values;
int last;

public:

IntDictionary()
{

}
IntDictionary(unsigned short* keys, int* values, int len)
{
_keys = keys;
_values = values;
last = len - 1;
}
IntDictionary(map<unsigned short, int> dict)
{
int len = dict.size();
_keys = new unsigned short[len];
_values = new int[len];
last = len - 1;

vector<PAIR> vec(dict.begin(), dict.end());
sort(vec.begin(), vec.end());

for (size_t i = 0; i < vec.size(); i++)
{
_keys[i] = vec[i].first;
_values[i] = vec[i].second;
}
last = len - 1;
}


bool TryGetValue(unsigned short key, int& value)
{
if (last == -1) {
value = 0;
return false;
}
if (_keys[0] == key) {
value = _values[0];
return true;
}
else if (last == 0 || _keys[0] > key) {
value = 0;
return false;
}

if (_keys[last] == key) {
value = _values[last];
return true;
}
else if (_keys[last] < key) {
value = 0;
return false;
}

int left = 1;
int right = last - 1;
while (left <= right) {
int mid = (left + right) >> 1;
int d = _keys[mid] - key;

if (d == 0) {
value = _values[mid];
return true;
}
else if (d > 0) {
right = mid - 1;
}
else {
left = mid + 1;
}
}
value = 0;
return false;
}
};
30 changes: 30 additions & 0 deletions cpp/ToolGood.Words/ToolGood.Words.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// ToolGood.Words.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//

#include <iostream>
#include "WordsSearchEx.cpp"

int main()
{
WordsSearchEx search;
auto ks = new string[2];
ks[0] = "中国";
ks[1] = "国人";
ks[2] = "zg人";

search.SetKeywords(ks);
auto r = search.FindFirst("我是中国人");

std::cout << "Hello World!\n";
}

// 运行程序: Ctrl + F5 或调试 >“开始执行(不调试)”菜单
// 调试程序: F5 或调试 >“开始调试”菜单

// 入门使用技巧:
// 1. 使用解决方案资源管理器窗口添加/管理文件
// 2. 使用团队资源管理器窗口连接到源代码管理
// 3. 使用输出窗口查看生成输出和其他消息
// 4. 使用错误列表窗口查看错误
// 5. 转到“项目”>“添加新项”以创建新的代码文件,或转到“项目”>“添加现有项”以将现有代码文件添加到项目
// 6. 将来,若要再次打开此项目,请转到“文件”>“打开”>“项目”并选择 .sln 文件
Loading

0 comments on commit 3a459e3

Please sign in to comment.