Skip to content

Commit

Permalink
Merge pull request #70 from kookmin-sw/feature/pe-code-refactoring
Browse files Browse the repository at this point in the history
refactor: experiment results and code cleanup
  • Loading branch information
SeokHyeon-Eom authored Jun 5, 2024
2 parents 05265f3 + babe363 commit e50ab1c
Show file tree
Hide file tree
Showing 12 changed files with 24,564 additions and 23,587 deletions.
20 changes: 13 additions & 7 deletions GIPS/iot_signature.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -24,7 +24,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 9,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -194,7 +194,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -203,7 +203,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand All @@ -212,17 +212,23 @@
"text": [
"('PING', 731)\n",
"('PONG\\n', 707)\n",
"('root', 239)\n",
"('Password: ', 26)\n"
]
}
],
"source": [
"for cluster, sig in signature.items():\n",
" for token in sig:\n",
" if token[1] not in filters:\n",
" if token[0] not in filters:\n",
" print(token)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
25 changes: 2 additions & 23 deletions PE/GIPS/core/JIG.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from tqdm import tqdm


def IORA(sum_vector_): # 이해 가능한 사람은 공유해주세요
def IORA(sum_vector_):
sum_vector = sorted(sum_vector_, reverse=True)
total = sum(sum_vector)

Expand All @@ -19,30 +19,9 @@ def IORA(sum_vector_): # 이해 가능한 사람은 공유해주세요
total -= sum_vector[idx]
return thetaC

'''
vectors: virtual vector
thetaJ: hyper parameter
'''

def JIG(vectors, thetaJ):
'''
의사 코드
M <= vectors의 크기
MV <= 빅그룹을 카운팅할 배열
big_group_indices <= 빅그룹으로 판별된 데이터를 저장할 집합
for vector in vectors
mv에 vector 추가
thetaC <= 빅그룹 식별 임계값 IOPA 이용
reatio <= vector의 1인 값이 추가 되었을 때 NV의 그 값이 thetaC보다 클때 카운트
if reatio / K > thetaJ
big_group_indices에 vector의 index추가
big_group_indices 반환
'''

M = len(vectors[0])
MV = np.zeros(M, dtype=np.int32)
big_group_indices = []
Expand Down
30 changes: 4 additions & 26 deletions PE/GIPS/core/MV2.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,15 @@
import numpy as np
from core.utils import AEchunking, minHash
from core.utils import minHash
from tqdm import tqdm


'''
paylaods: 입력 데이터(패킷 페이로드)
window_size: CDC(cotent defined chuncking)으로 청킹하는 모듈
K: minhash의 개수
M: bitmap의 사이즈
'''


def MV2(payloads, window_size, K, M):
'''
의사 코드
minhashed_virtual_vectors <= 배열
for payload in playloads
chunks <= payload를 청킹한 값
encode_pos <= chunks를 민해싱한 값
vector <= 비트맵
encoding <= 해싱값에 맞는 위치에 인코딩한 값
minhashed_virtual_vector에 vector 추가
minhashed_virtual_vectors 반환
'''
def MV2(payloads, K, M):

minhashed_virtual_vectors = []

print('make minhashed vector')
for payload in tqdm(payloads):
chunks = list(payload) # AEchunking(payload, W=window_size)
chunks = list(payload)
encode_pos = minHash(chunks, K) % M

vector = np.zeros(M, dtype=np.int8)
Expand All @@ -41,3 +18,4 @@ def MV2(payloads, window_size, K, M):
minhashed_virtual_vectors.append(vector)

return minhashed_virtual_vectors

7 changes: 3 additions & 4 deletions PE/GIPS/core/SG2.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,15 @@
from tqdm import tqdm


def SG2(payloads, window_size, vector_size, eps, minpts, ngram, hh1_size, hh2_size, hh3_size, ratio):
def SG2(payloads, vector_size, eps, minpts, hh1_size, hh2_size, ratio):
fine_vectors = []

print('chunking')
for payload in tqdm(payloads):
chunks = payload # AEchunking(payload, window_size)
chunks = payload
vector = np.zeros(vector_size, dtype=np.int8)
for chunk in chunks:
idx = int(hashlib.md5(chunk.encode()).hexdigest(),
16) % vector_size
idx = int(hashlib.md5(chunk.encode()).hexdigest(), 16) % vector_size
vector[idx] += 1

fine_vectors.append(vector)
Expand Down
11 changes: 5 additions & 6 deletions PE/GIPS/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@


def GIPS(str_feature,
window_size, K, M, # MV2 파라미터
K, M, # MV2 파라미터
thetaJ, # JIG 파라미터
vector_size, eps, minpts, ngram, hh1_size, hh2_size, hh3_size, ratio # SG2, AWL 파라미터
vector_size, eps, minpts, hh1_size, hh2_size, ratio # SG2
):

print(f'data no: {len(str_feature)}')
Expand All @@ -16,11 +16,10 @@ def GIPS(str_feature,
feature = list(feature)

# 빅 그룹 식별
minhashed_virtual_vectors = MV2(payloads=str_feature, window_size=window_size, K=K, M=M)
minhashed_virtual_vectors = MV2(payloads=str_feature, K=K, M=M)

big_group_indices = JIG(vectors=minhashed_virtual_vectors, thetaJ=thetaJ)


big_group_payloads = []
non_big_group_paylaods = []

Expand All @@ -31,7 +30,7 @@ def GIPS(str_feature,
non_big_group_paylaods.append(payload)

# 시그니처 생성
cluster_signatures = SG2(payloads=big_group_payloads, window_size=window_size, vector_size=vector_size,
eps=eps, minpts=minpts, ngram=ngram, hh1_size=hh1_size, hh2_size=hh2_size, hh3_size=hh3_size, ratio=ratio)
cluster_signatures = SG2(payloads=big_group_payloads, vector_size=vector_size, eps=eps,
minpts=minpts, hh1_size=hh1_size, hh2_size=hh2_size, ratio=ratio)

return cluster_signatures
Loading

0 comments on commit e50ab1c

Please sign in to comment.