Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added JS unpacking #86

Merged
merged 5 commits into from
Sep 28, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions ksoup-test/test/com/fleeksoft/ksoup/nodes/DataNodeTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package com.fleeksoft.ksoup.nodes

import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertTrue

class DataNodeTest {

Expand Down Expand Up @@ -66,4 +67,22 @@ class DataNodeTest {
node.outerHtmlHead(accum, 0, Document.OutputSettings().syntax(Document.OutputSettings.Syntax.xml))
assertEquals("<![CDATA[other && <> data]]>", accum.toString())
}

@Test
fun recognizePacked() {
val node = DataNode("""
eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e=function(){return'\\w+'};c=1};while(c--)if(k[c])p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c]);return p}('0.1("2 3")',4,4,'console|log|Hello|World'.split('|'),0,{}))
""".trimIndent())
node._parentNode = Element("script")
assertTrue(node.isPacked)
}

@Test
fun unpackedData() {
val node = DataNode("""
eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e=function(){return'\\w+'};c=1};while(c--)if(k[c])p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c]);return p}('0.1("2 3")',4,4,'console|log|Hello|World'.split('|'),0,{}))
""".trimIndent())
node._parentNode = Element("script")
assertEquals("console.log(\"Hello World\")", node.getUnpackedData())
}
}
41 changes: 41 additions & 0 deletions ksoup/src/com/fleeksoft/ksoup/internal/Unbaser.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.fleeksoft.ksoup.internal

import kotlin.math.pow

internal data class Unbaser(
private val base: Int
) {
private val selector: Int = when {
base > 62 -> 95
base > 54 -> 62
base > 52 -> 54
else -> 52
}

fun unbase(value: String): Int {
return if (base in 2..36) {
value.toIntOrNull(base) ?: 0
} else {
val dict = ALPHABET[selector]?.toCharArray()?.mapIndexed { index, c ->
DatL4g marked this conversation as resolved.
Show resolved Hide resolved
c to index
}?.toMap()
var returnVal = 0

val valArray = value.toCharArray().reversed()
DatL4g marked this conversation as resolved.
Show resolved Hide resolved
for (i in valArray.indices) {
val cipher = valArray[i]
returnVal += (base.toFloat().pow(i) * (dict?.get(cipher) ?: 0)).toInt()
}
returnVal
}
}

companion object {
private val ALPHABET = mapOf<Int, String>(
52 to "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOP",
54 to "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQR",
62 to "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
95 to " !\"#\$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
)
}
}
50 changes: 50 additions & 0 deletions ksoup/src/com/fleeksoft/ksoup/nodes/DataNode.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package com.fleeksoft.ksoup.nodes

import com.fleeksoft.ksoup.internal.Unbaser

/**
* Create a new DataNode.
* A data node, for contents of style, script tags etc, where contents should not show in text().
Expand All @@ -8,6 +10,10 @@ package com.fleeksoft.ksoup.nodes
*/
public class DataNode(data: String) : LeafNode(data) {

public val isPacked: Boolean by lazy {
parentNameIs("script") && getWholeData().contains(packedRegex)
}

override fun nodeName(): String {
return "#data"
}
Expand All @@ -24,6 +30,32 @@ public class DataNode(data: String) : LeafNode(data) {
return this
}

public fun getUnpackedData(): String {
return if (isPacked) {
getWholeData().replace(packedRegex) { packed ->
packedExtractRegex.findAll(packed.value).mapNotNull { matchResult ->
val payload = matchResult.groups[1]?.value
val symtab = matchResult.groups[4]?.value?.split('|')
val radix = matchResult.groups[2]?.value?.toIntOrNull() ?: 10
val count = matchResult.groups[3]?.value?.toIntOrNull()
val unbaser = Unbaser(radix)

if (symtab == null || count == null || symtab.size != count) {
null
} else {
payload?.replace(unpackReplaceRegex) { match ->
val word = match.value
val unbased = symtab[unbaser.unbase(word)]
unbased.ifEmpty { word }
}
}
}.joinToString(separator = "")
}
} else {
getWholeData()
}
}

public override fun outerHtmlHead(accum: Appendable, depth: Int, out: Document.OutputSettings) {
/* For XML output, escape the DataNode in a CData section. The data may contain pseudo-CData content if it was
parsed as HTML, so don't double up Cdata. Output in polyglot HTML / XHTML / XML format. */
Expand Down Expand Up @@ -52,4 +84,22 @@ public class DataNode(data: String) : LeafNode(data) {
override fun clone(): DataNode {
return super.clone() as DataNode
}

companion object {
/**
* Regex to detect packed functions.
*/
private val packedRegex = Regex("eval[(]function[(]p,a,c,k,e,[rd][)][{].*?[}][)]{2}", setOf(RegexOption.IGNORE_CASE, RegexOption.MULTILINE))

/**
* Regex to get and group the packed javascript.
* Needed to get information and unpack the code.
*/
private val packedExtractRegex = Regex("[}][(]'(.*)', *(\\d+), *(\\d+), *'(.*?)'[.]split[(]'[|]'[)]", setOf(RegexOption.IGNORE_CASE, RegexOption.MULTILINE))

/**
* Matches function names and variables to de-obfuscate the code.
*/
private val unpackReplaceRegex = Regex("\\b\\w+\\b", setOf(RegexOption.IGNORE_CASE, RegexOption.MULTILINE))
}
}
Loading