Skip to content

Commit

Permalink
Merge pull request #86 from DatL4g/release
Browse files Browse the repository at this point in the history
Added JS unpacking
  • Loading branch information
itboy87 authored Sep 28, 2024
2 parents 0382dc3 + 4a3cc72 commit 59651a1
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 0 deletions.
10 changes: 10 additions & 0 deletions ksoup-test/test/com/fleeksoft/ksoup/nodes/DataNodeTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package com.fleeksoft.ksoup.nodes

import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertTrue

class DataNodeTest {

Expand Down Expand Up @@ -66,4 +67,13 @@ class DataNodeTest {
node.outerHtmlHead(accum, 0, Document.OutputSettings().syntax(Document.OutputSettings.Syntax.xml))
assertEquals("<![CDATA[other && <> data]]>", accum.toString())
}

@Test
fun recognizePacked() {
val node = DataNode("""
eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e=function(){return'\\w+'};c=1};while(c--)if(k[c])p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c]);return p}('0.1("2 3")',4,4,'console|log|Hello|World'.split('|'),0,{}))
""".trimIndent())
node._parentNode = Element("script")
assertTrue(node.isPacked)
}
}
13 changes: 13 additions & 0 deletions ksoup-test/test/com/fleeksoft/ksoup/parser/HtmlParserTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2271,6 +2271,19 @@ class HtmlParserTest {
)
}

@Test
fun packedScript() {
val packedJs = "eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e=function(){return'\\w+'};c=1};while(c--)if(k[c])p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c]);return p}('0.1(\"2 3\")',4,4,'console|log|Hello|World'.split('|'),0,{}))"
val packedScript = "<script type=\"text/javascript\">$packedJs</script>"
val unpackedJs = "console.log(\"Hello World\")"
val doc = Ksoup.parse(packedScript)
val script = doc.selectFirst("script")!!
assertEquals(packedScript, script.outerHtml())
val dataNode = script.childNode(0) as DataNode
assertEquals(packedJs, dataNode.getWholeData())
assertEquals(unpackedJs, dataNode.getUnpackedData())
}

companion object {
private fun dupeAttributeData(): List<Pair<String, String>> {
return listOf(
Expand Down
44 changes: 44 additions & 0 deletions ksoup/src/com/fleeksoft/ksoup/internal/Unbaser.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package com.fleeksoft.ksoup.internal

import kotlin.math.pow

internal data class Unbaser(
private val base: Int
) {
private val selector: Int = when {
base > 62 -> 95
base > 54 -> 62
base > 52 -> 54
else -> 52
}

private val dict by lazy {
ALPHABET[selector]?.mapIndexed { index, c ->
c to index
}?.toMap()
}

fun unbase(value: String): Int {
return if (base in 2..36) {
value.toIntOrNull(base) ?: 0
} else {
var returnVal = 0

val valArray = value.reversed()
for (i in valArray.indices) {
val cipher = valArray[i]
returnVal += (base.toFloat().pow(i) * (dict?.get(cipher) ?: 0)).toInt()
}
returnVal
}
}

companion object {
private val ALPHABET = mapOf<Int, String>(
52 to "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOP",
54 to "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQR",
62 to "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
95 to " !\"#\$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
)
}
}
55 changes: 55 additions & 0 deletions ksoup/src/com/fleeksoft/ksoup/nodes/DataNode.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package com.fleeksoft.ksoup.nodes

import com.fleeksoft.ksoup.internal.Unbaser

/**
* Create a new DataNode.
* A data node, for contents of style, script tags etc, where contents should not show in text().
Expand All @@ -8,6 +10,9 @@ package com.fleeksoft.ksoup.nodes
*/
public class DataNode(data: String) : LeafNode(data) {

public val isPacked: Boolean
get() = isPacked(getWholeData())

override fun nodeName(): String {
return "#data"
}
Expand All @@ -24,6 +29,34 @@ public class DataNode(data: String) : LeafNode(data) {
return this
}

public fun getUnpackedData(): String {
val currentData = getWholeData()

return if (isPacked(currentData)) {
currentData.replace(packedRegex) { packed ->
packedExtractRegex.findAll(packed.value).mapNotNull { matchResult ->
val payload = matchResult.groups[1]?.value
val symtab = matchResult.groups[4]?.value?.split('|')
val radix = matchResult.groups[2]?.value?.toIntOrNull() ?: 10
val count = matchResult.groups[3]?.value?.toIntOrNull()
val unbaser = Unbaser(radix)

if (symtab == null || count == null || symtab.size != count) {
null
} else {
payload?.replace(unpackReplaceRegex) { match ->
val word = match.value
val unbased = symtab[unbaser.unbase(word)]
unbased.ifEmpty { word }
}
}
}.joinToString(separator = "")
}
} else {
currentData
}
}

public override fun outerHtmlHead(accum: Appendable, depth: Int, out: Document.OutputSettings) {
/* For XML output, escape the DataNode in a CData section. The data may contain pseudo-CData content if it was
parsed as HTML, so don't double up Cdata. Output in polyglot HTML / XHTML / XML format. */
Expand Down Expand Up @@ -52,4 +85,26 @@ public class DataNode(data: String) : LeafNode(data) {
override fun clone(): DataNode {
return super.clone() as DataNode
}

private fun isPacked(data: String): Boolean {
return parentNameIs("script") && data.contains(packedRegex)
}

companion object {
/**
* Regex to detect packed functions.
*/
private val packedRegex = Regex("eval[(]function[(]p,a,c,k,e,[rd][)][{].*?[}][)]{2}", setOf(RegexOption.IGNORE_CASE, RegexOption.MULTILINE))

/**
* Regex to get and group the packed javascript.
* Needed to get information and unpack the code.
*/
private val packedExtractRegex = Regex("[}][(]'(.*)', *(\\d+), *(\\d+), *'(.*?)'[.]split[(]'[|]'[)]", setOf(RegexOption.IGNORE_CASE, RegexOption.MULTILINE))

/**
* Matches function names and variables to de-obfuscate the code.
*/
private val unpackReplaceRegex = Regex("\\b\\w+\\b", setOf(RegexOption.IGNORE_CASE, RegexOption.MULTILINE))
}
}

0 comments on commit 59651a1

Please sign in to comment.