script_classifier.py
15. Script Analysis with Python — Full Walkthrough
Overview
In this walkthrough, we build a complete Python toolkit for Bitcoin script analysis. We will: fetch a raw transaction, parse its inputs and outputs, identify script types, decode witness data, check Miniscript compatibility, and produce a human-readable report.
Environment Setup
pip install requests base58 bech32 bitcoin-utils python-bitcoinlib
Module 1: Script Type Classifier
# script_classifier.py
from enum import Enum
class ScriptType(Enum):
P2PK = "P2PK"
P2PKH = "P2PKH"
P2SH = "P2SH"
P2WPKH = "P2WPKH"
P2WSH = "P2WSH"
P2TR = "P2TR"
OP_RETURN = "OP_RETURN"
MULTISIG = "MULTISIG"
UNKNOWN = "UNKNOWN"
def classify_script(script_hex: str) -> ScriptType:
"""Classify a scriptPubKey hex string into its script type."""
try:
data = bytes.fromhex(script_hex)
except ValueError:
return ScriptType.UNKNOWN
n = len(data)
# OP_RETURN
if n >= 1 and data[0] == 0x6a:
return ScriptType.OP_RETURN
# P2PKH: 76 a9 14 [20] 88 ac
if (n == 25 and data[0] == 0x76 and data[1] == 0xa9
and data[2] == 0x14 and data[23] == 0x88 and data[24] == 0xac):
return ScriptType.P2PKH
# P2SH: a9 14 [20] 87
if n == 23 and data[0] == 0xa9 and data[1] == 0x14 and data[22] == 0x87:
return ScriptType.P2SH
# P2WPKH: 00 14 [20]
if n == 22 and data[0] == 0x00 and data[1] == 0x14:
return ScriptType.P2WPKH
# P2WSH: 00 20 [32]
if n == 34 and data[0] == 0x00 and data[1] == 0x20:
return ScriptType.P2WSH
# P2TR: 51 20 [32]
if n == 34 and data[0] == 0x51 and data[1] == 0x20:
return ScriptType.P2TR
# P2PK compressed: 21 [33] ac
if n == 35 and data[0] == 0x21 and data[34] == 0xac:
return ScriptType.P2PK
# P2PK uncompressed: 41 [65] ac
if n == 67 and data[0] == 0x41 and data[66] == 0xac:
return ScriptType.P2PK
# Bare multisig detection: ends with OP_CHECKMULTISIG (ae)
if n > 3 and data[-1] == 0xae:
return ScriptType.MULTISIG
return ScriptType.UNKNOWN
Module 2: Script Disassembler
# disassembler.py
OPCODE_NAMES = {
0x00: "OP_0", 0x4c: "OP_PUSHDATA1", 0x4d: "OP_PUSHDATA2",
0x4e: "OP_PUSHDATA4", 0x4f: "OP_1NEGATE", 0x51: "OP_1",
0x52: "OP_2", 0x53: "OP_3", 0x61: "OP_NOP", 0x63: "OP_IF",
0x64: "OP_NOTIF", 0x67: "OP_ELSE", 0x68: "OP_ENDIF",
0x69: "OP_VERIFY", 0x6a: "OP_RETURN", 0x6b: "OP_TOALTSTACK",
0x6c: "OP_FROMALTSTACK", 0x73: "OP_IFDUP", 0x74: "OP_DEPTH",
0x75: "OP_DROP", 0x76: "OP_DUP", 0x87: "OP_EQUAL",
0x88: "OP_EQUALVERIFY", 0x8b: "OP_ADD", 0x8c: "OP_SUB",
0x92: "OP_HASH160" , 0xa8: "OP_SHA256", 0xa9: "OP_HASH160",
0xaa: "OP_HASH256", 0xab: "OP_CODESEPARATOR",
0xac: "OP_CHECKSIG", 0xad: "OP_CHECKSIGVERIFY",
0xae: "OP_CHECKMULTISIG", 0xaf: "OP_CHECKMULTISIGVERIFY",
0xb1: "OP_CHECKLOCKTIMEVERIFY", 0xb2: "OP_CHECKSEQUENCEVERIFY",
}
def disassemble(script_hex: str) -> str:
"""Convert a hex script to ASM representation."""
data = bytes.fromhex(script_hex)
parts = []
i = 0
while i < len(data):
byte = data[i]
i += 1
if byte in OPCODE_NAMES:
parts.append(OPCODE_NAMES[byte])
if byte == 0x4c: # OP_PUSHDATA1
length = data[i]; i += 1
parts.append(data[i:i+length].hex()); i += length
elif byte == 0x4d: # OP_PUSHDATA2
length = int.from_bytes(data[i:i+2], 'little'); i += 2
parts.append(data[i:i+length].hex()); i += length
elif 0x01 <= byte <= 0x4b:
parts.append(data[i:i+byte].hex())
i += byte
else:
parts.append(f"OP_UNKNOWN(0x{byte:02x})")
return " ".join(parts)
Module 3: Transaction Fetcher and Analyzer
# analyzer.py
import requests
from script_classifier import classify_script, ScriptType
from disassembler import disassemble
def fetch_transaction(txid: str, network: str = "mainnet") -> dict:
"""Fetch raw transaction from mempool.space API."""
base = "https://mempool.space" if network == "mainnet" else "https://mempool.space/testnet"
url = f"{base}/api/tx/{txid}"
resp = requests.get(url, timeout=10)
resp.raise_for_status()
return resp.json()
def analyze_transaction(txid: str, network: str = "mainnet") -> None:
"""Full analysis report for a transaction."""
tx = fetch_transaction(txid, network)
print(f"\n{'='*60}")
print(f"Transaction: {txid}")
print(f"Version: {tx['version']} | Locktime: {tx['locktime']}")
print(f"Size: {tx['size']} bytes | vSize: {tx['vsize']} vbytes")
print(f"Fee: {tx.get('fee', 'N/A')} sats")
print(f"{'='*60}")
# Analyze inputs
print(f"\nINPUTS ({len(tx['vin'])}):")
for i, vin in enumerate(tx["vin"]):
if vin.get("is_coinbase"):
print(f" [{i}] COINBASE")
continue
print(f" [{i}] {vin['txid']}:{vin['vout']}")
if vin.get("scriptsig"):
print(f" ScriptSig ASM: {vin.get('scriptsig_asm', 'N/A')}")
if vin.get("witness"):
print(f" Witness items: {len(vin['witness'])}")
for j, w in enumerate(vin["witness"]):
print(f" [{j}] ({len(w)//2} bytes): {w[:40]}{'...' if len(w)>40 else ''}")
print(f" Sequence: 0x{vin['sequence']:08x}")
# Analyze outputs
print(f"\nOUTPUTS ({len(tx['vout'])}):")
total_out = 0
for i, vout in enumerate(tx["vout"]):
value_btc = vout["value"] / 1e8
total_out += vout["value"]
spk = vout["scriptpubkey"]
script_type = classify_script(spk)
asm = disassemble(spk)
address = vout.get("scriptpubkey_address", "N/A")
print(f" [{i}] {value_btc:.8f} BTC")
print(f" Type: {script_type.value}")
print(f" Address: {address}")
print(f" ScriptPubKey: {spk}")
print(f" ASM: {asm}")
print(f"\nTotal Output: {total_out / 1e8:.8f} BTC")
print(f"{'='*60}\n")
if __name__ == "__main__":
# Analyze the first mainnet transaction (Satoshi to Hal Finney)
analyze_transaction("f4184fc596403b9d638783cf57adfe4c75c605f6356fbc91338530e9831e9e16")
Module 4: Witness Decoder
# witness_decoder.py
def decode_witness_p2wpkh(witness: list) -> dict:
"""Decode P2WPKH witness: [sig, pubkey]."""
if len(witness) != 2:
return {"error": f"Expected 2 items, got {len(witness)}"}
sig_hex = witness[0]
pubkey_hex = witness[1]
sighash = int(sig_hex[-2:], 16)
sighash_names = {1: "ALL", 2: "NONE", 3: "SINGLE",
0x81: "ALL|ANYONECANPAY", 0x82: "NONE|ANYONECANPAY"}
return {
"type": "P2WPKH",
"signature": sig_hex[:-2],
"sighash_type": sighash_names.get(sighash, f"0x{sighash:02x}"),
"pubkey": pubkey_hex,
"pubkey_compressed": pubkey_hex[0:2] in ("02", "03"),
}
def decode_witness_p2tr_keypath(witness: list) -> dict:
"""Decode Taproot key-path witness: [64-byte Schnorr sig] or [65-byte with sighash]."""
if len(witness) != 1:
return {"error": "Key-path spend should have exactly 1 witness item"}
sig_hex = witness[0]
has_sighash = len(sig_hex) == 130 # 65 bytes
return {
"type": "P2TR key-path",
"schnorr_signature": sig_hex[:128],
"sighash_type": "DEFAULT (ALL)" if not has_sighash else f"0x{sig_hex[128:]:>02}",
}
def decode_witness_p2tr_scriptpath(witness: list) -> dict:
"""Decode Taproot script-path witness."""
if len(witness) < 2:
return {"error": "Script-path spend needs at least script + control block"}
control_block = witness[-1]
tapscript = witness[-2]
script_data = witness[:-2]
leaf_version = int(control_block[:2], 16) & 0xfe
parity = int(control_block[:2], 16) & 0x01
internal_key = control_block[2:66]
merkle_path_len = (len(control_block) - 66) // 2 // 32
return {
"type": "P2TR script-path",
"tapscript": tapscript,
"internal_key": internal_key,
"leaf_version": f"0x{leaf_version:02x}",
"parity": parity,
"merkle_path_nodes": merkle_path_len,
"script_data": script_data,
}
Running the Full Analysis
# main.py — Complete analysis runner
from analyzer import analyze_transaction
from witness_decoder import decode_witness_p2wpkh, decode_witness_p2tr_keypath
import requests
def deep_analyze(txid: str):
tx = requests.get(f"https://mempool.space/api/tx/{txid}").json()
analyze_transaction(txid)
print("WITNESS DEEP DECODE:")
for i, vin in enumerate(tx["vin"]):
witness = vin.get("witness", [])
if not witness:
continue
print(f"\n Input [{i}]:")
# Determine witness type from the output being spent
# In practice, look up the scriptPubKey of the UTXO being spent
if len(witness) == 1 and len(witness[0]) in (128, 130):
decoded = decode_witness_p2tr_keypath(witness)
elif len(witness) == 2 and len(witness[1]) == 66:
decoded = decode_witness_p2wpkh(witness)
else:
decoded = {"type": "Unknown", "raw": witness}
for k, v in decoded.items():
print(f" {k}: {v}")
if __name__ == "__main__":
# Example: A Taproot transaction
EXAMPLE_TXID = "9a77a0b6e4bf553f1e36d0c3a73bf32571024cd2d15ca1c2ac20baa4e11cdf01"
deep_analyze(EXAMPLE_TXID)
Output Example
============================================================
Transaction: 9a77a0b6...
Version: 2 | Locktime: 0
Size: 153 bytes | vSize: 102 vbytes
Fee: 210 sats
============================================================
INPUTS (1):
[0] abcd1234...:0
Witness items: 1
[0] (64 bytes): d4e7f2a1b3...
OUTPUTS (2):
[0] 0.00010000 BTC
Type: P2TR
Address: bc1p...
ScriptPubKey: 5120...
ASM: OP_1 <32-byte-key>
[1] 0.00089790 BTC
Type: P2TR
Address: bc1p...
WITNESS DEEP DECODE:
Input [0]:
type: P2TR key-path
schnorr_signature: d4e7f2a1...
sighash_type: DEFAULT (ALL)
Summary
This walkthrough demonstrates how Python can be used to build a complete Bitcoin script analysis pipeline: from raw hex parsing and script type identification, through disassembly and witness decoding, to human-readable reporting. Combining these modules gives you a powerful custom analysis tool that goes beyond what standard block explorers expose, and provides the low-level detail needed for serious script debugging.
Pro Tip
When debugging scripts, always start with a high-level disassembly before diving into the stack trace. Tools like bitcoin-cli decodescript are your first line of defense in identifying standard script patterns.
TeachMeBitcoin is an ad-free, open-source educational repository curated by a passionate team of Bitcoin researchers and educators for public benefit. If you found our articles helpful, please consider supporting our hosting and ongoing content updates with a clean donation: