Skip to content

Commit

Permalink
added js unpacking
Browse files Browse the repository at this point in the history
  • Loading branch information
DatL4g committed Sep 24, 2024
1 parent 98b7d8a commit dcfbff5
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 0 deletions.
19 changes: 19 additions & 0 deletions ksoup-test/test/com/fleeksoft/ksoup/nodes/DataNodeTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package com.fleeksoft.ksoup.nodes

import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertTrue

class DataNodeTest {

Expand Down Expand Up @@ -66,4 +67,22 @@ class DataNodeTest {
node.outerHtmlHead(accum, 0, Document.OutputSettings().syntax(Document.OutputSettings.Syntax.xml))
assertEquals("<![CDATA[other && <> data]]>", accum.toString())
}

@Test
fun recognizePacked() {
val node = DataNode("""
eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e=function(){return'\\w+'};c=1};while(c--)if(k[c])p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c]);return p}('0.1("2 3")',4,4,'console|log|Hello|World'.split('|'),0,{}))
""".trimIndent())
node._parentNode = Element("script")
assertTrue(node.isPacked)
}

@Test
fun unpackedData() {
val node = DataNode("""
eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e=function(){return'\\w+'};c=1};while(c--)if(k[c])p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c]);return p}('0.1("2 3")',4,4,'console|log|Hello|World'.split('|'),0,{}))
""".trimIndent())
node._parentNode = Element("script")
assertEquals("console.log(\"Hello World\")", node.getUnpackedData())
}
}
41 changes: 41 additions & 0 deletions ksoup/src/com/fleeksoft/ksoup/internal/Unbaser.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.fleeksoft.ksoup.internal

import kotlin.math.pow

internal data class Unbaser(
private val base: Int
) {
private val selector: Int = when {
base > 62 -> 95
base > 54 -> 62
base > 52 -> 54
else -> 52
}

fun unbase(value: String): Int {
return if (base in 2..36) {
value.toIntOrNull(base) ?: 0
} else {
val dict = ALPHABET[selector]?.toCharArray()?.mapIndexed { index, c ->
c to index
}?.toMap()
var returnVal = 0

val valArray = value.toCharArray().reversed()
for (i in valArray.indices) {
val cipher = valArray[i]
returnVal += (base.toFloat().pow(i) * (dict?.get(cipher) ?: 0)).toInt()
}
returnVal
}
}

companion object {
private val ALPHABET = mapOf<Int, String>(
52 to "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOP",
54 to "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQR",
62 to "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
95 to " !\"#\$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
)
}
}
50 changes: 50 additions & 0 deletions ksoup/src/com/fleeksoft/ksoup/nodes/DataNode.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package com.fleeksoft.ksoup.nodes

import com.fleeksoft.ksoup.internal.Unbaser

/**
* Create a new DataNode.
* A data node, for contents of style, script tags etc, where contents should not show in text().
Expand All @@ -8,6 +10,10 @@ package com.fleeksoft.ksoup.nodes
*/
public class DataNode(data: String) : LeafNode(data) {

public val isPacked: Boolean by lazy {
parentNameIs("script") && getWholeData().contains(packedRegex)
}

override fun nodeName(): String {
return "#data"
}
Expand All @@ -24,6 +30,32 @@ public class DataNode(data: String) : LeafNode(data) {
return this
}

public fun getUnpackedData(): String {
return if (isPacked) {
getWholeData().replace(packedRegex) { packed ->
packedExtractRegex.findAll(packed.value).mapNotNull { matchResult ->
val payload = matchResult.groups[1]?.value
val symtab = matchResult.groups[4]?.value?.split('|')
val radix = matchResult.groups[2]?.value?.toIntOrNull() ?: 10
val count = matchResult.groups[3]?.value?.toIntOrNull()
val unbaser = Unbaser(radix)

if (symtab == null || count == null || symtab.size != count) {
null
} else {
payload?.replace(unpackReplaceRegex) { match ->
val word = match.value
val unbased = symtab[unbaser.unbase(word)]
unbased.ifEmpty { word }
}
}
}.joinToString(separator = "")
}
} else {
getWholeData()
}
}

public override fun outerHtmlHead(accum: Appendable, depth: Int, out: Document.OutputSettings) {
/* For XML output, escape the DataNode in a CData section. The data may contain pseudo-CData content if it was
parsed as HTML, so don't double up Cdata. Output in polyglot HTML / XHTML / XML format. */
Expand Down Expand Up @@ -52,4 +84,22 @@ public class DataNode(data: String) : LeafNode(data) {
override fun clone(): DataNode {
return super.clone() as DataNode
}

companion object {
/**
* Regex to detect packed functions.
*/
private val packedRegex = Regex("eval[(]function[(]p,a,c,k,e,[rd][)][{].*?[}][)]{2}", setOf(RegexOption.IGNORE_CASE, RegexOption.MULTILINE))

/**
* Regex to get and group the packed javascript.
* Needed to get information and unpack the code.
*/
private val packedExtractRegex = Regex("[}][(]'(.*)', *(\\d+), *(\\d+), *'(.*?)'[.]split[(]'[|]'[)]", setOf(RegexOption.IGNORE_CASE, RegexOption.MULTILINE))

/**
* Matches function names and variables to de-obfuscate the code.
*/
private val unpackReplaceRegex = Regex("\\b\\w+\\b", setOf(RegexOption.IGNORE_CASE, RegexOption.MULTILINE))
}
}

0 comments on commit dcfbff5

Please sign in to comment.