TeachMeBitcoin

script_classifier.py

From TeachMeBitcoin, the free encyclopedia Reading time: 6 min

15. Script Analysis with Python — Full Walkthrough

Overview

In this walkthrough, we build a complete Python toolkit for Bitcoin script analysis. We will: fetch a raw transaction, parse its inputs and outputs, identify script types, decode witness data, check Miniscript compatibility, and produce a human-readable report.

Environment Setup

pip install requests base58 bech32 bitcoin-utils python-bitcoinlib

Module 1: Script Type Classifier

# script_classifier.py

from enum import Enum

class ScriptType(Enum):
    P2PK = "P2PK"
    P2PKH = "P2PKH"
    P2SH = "P2SH"
    P2WPKH = "P2WPKH"
    P2WSH = "P2WSH"
    P2TR = "P2TR"
    OP_RETURN = "OP_RETURN"
    MULTISIG = "MULTISIG"
    UNKNOWN = "UNKNOWN"

def classify_script(script_hex: str) -> ScriptType:
    """Classify a scriptPubKey hex string into its script type."""
    try:
        data = bytes.fromhex(script_hex)
    except ValueError:
        return ScriptType.UNKNOWN

    n = len(data)

    # OP_RETURN
    if n >= 1 and data[0] == 0x6a:
        return ScriptType.OP_RETURN

    # P2PKH: 76 a9 14 [20] 88 ac
    if (n == 25 and data[0] == 0x76 and data[1] == 0xa9
            and data[2] == 0x14 and data[23] == 0x88 and data[24] == 0xac):
        return ScriptType.P2PKH

    # P2SH: a9 14 [20] 87
    if n == 23 and data[0] == 0xa9 and data[1] == 0x14 and data[22] == 0x87:
        return ScriptType.P2SH

    # P2WPKH: 00 14 [20]
    if n == 22 and data[0] == 0x00 and data[1] == 0x14:
        return ScriptType.P2WPKH

    # P2WSH: 00 20 [32]
    if n == 34 and data[0] == 0x00 and data[1] == 0x20:
        return ScriptType.P2WSH

    # P2TR: 51 20 [32]
    if n == 34 and data[0] == 0x51 and data[1] == 0x20:
        return ScriptType.P2TR

    # P2PK compressed: 21 [33] ac
    if n == 35 and data[0] == 0x21 and data[34] == 0xac:
        return ScriptType.P2PK

    # P2PK uncompressed: 41 [65] ac
    if n == 67 and data[0] == 0x41 and data[66] == 0xac:
        return ScriptType.P2PK

    # Bare multisig detection: ends with OP_CHECKMULTISIG (ae)
    if n > 3 and data[-1] == 0xae:
        return ScriptType.MULTISIG

    return ScriptType.UNKNOWN

Module 2: Script Disassembler

# disassembler.py

OPCODE_NAMES = {
    0x00: "OP_0", 0x4c: "OP_PUSHDATA1", 0x4d: "OP_PUSHDATA2",
    0x4e: "OP_PUSHDATA4", 0x4f: "OP_1NEGATE", 0x51: "OP_1",
    0x52: "OP_2", 0x53: "OP_3", 0x61: "OP_NOP", 0x63: "OP_IF",
    0x64: "OP_NOTIF", 0x67: "OP_ELSE", 0x68: "OP_ENDIF",
    0x69: "OP_VERIFY", 0x6a: "OP_RETURN", 0x6b: "OP_TOALTSTACK",
    0x6c: "OP_FROMALTSTACK", 0x73: "OP_IFDUP", 0x74: "OP_DEPTH",
    0x75: "OP_DROP", 0x76: "OP_DUP", 0x87: "OP_EQUAL",
    0x88: "OP_EQUALVERIFY", 0x8b: "OP_ADD", 0x8c: "OP_SUB",
    0x92: "OP_HASH160" , 0xa8: "OP_SHA256", 0xa9: "OP_HASH160",
    0xaa: "OP_HASH256", 0xab: "OP_CODESEPARATOR",
    0xac: "OP_CHECKSIG", 0xad: "OP_CHECKSIGVERIFY",
    0xae: "OP_CHECKMULTISIG", 0xaf: "OP_CHECKMULTISIGVERIFY",
    0xb1: "OP_CHECKLOCKTIMEVERIFY", 0xb2: "OP_CHECKSEQUENCEVERIFY",
}

def disassemble(script_hex: str) -> str:
    """Convert a hex script to ASM representation."""
    data = bytes.fromhex(script_hex)
    parts = []
    i = 0
    while i < len(data):
        byte = data[i]
        i += 1

        if byte in OPCODE_NAMES:
            parts.append(OPCODE_NAMES[byte])
            if byte == 0x4c:  # OP_PUSHDATA1
                length = data[i]; i += 1
                parts.append(data[i:i+length].hex()); i += length
            elif byte == 0x4d:  # OP_PUSHDATA2
                length = int.from_bytes(data[i:i+2], 'little'); i += 2
                parts.append(data[i:i+length].hex()); i += length
        elif 0x01 <= byte <= 0x4b:
            parts.append(data[i:i+byte].hex())
            i += byte
        else:
            parts.append(f"OP_UNKNOWN(0x{byte:02x})")

    return " ".join(parts)

Module 3: Transaction Fetcher and Analyzer

# analyzer.py
import requests
from script_classifier import classify_script, ScriptType
from disassembler import disassemble

def fetch_transaction(txid: str, network: str = "mainnet") -> dict:
    """Fetch raw transaction from mempool.space API."""
    base = "https://mempool.space" if network == "mainnet" else "https://mempool.space/testnet"
    url = f"{base}/api/tx/{txid}"
    resp = requests.get(url, timeout=10)
    resp.raise_for_status()
    return resp.json()

def analyze_transaction(txid: str, network: str = "mainnet") -> None:
    """Full analysis report for a transaction."""
    tx = fetch_transaction(txid, network)

    print(f"\n{'='*60}")
    print(f"Transaction: {txid}")
    print(f"Version: {tx['version']}  |  Locktime: {tx['locktime']}")
    print(f"Size: {tx['size']} bytes  |  vSize: {tx['vsize']} vbytes")
    print(f"Fee: {tx.get('fee', 'N/A')} sats")
    print(f"{'='*60}")

    # Analyze inputs
    print(f"\nINPUTS ({len(tx['vin'])}):")
    for i, vin in enumerate(tx["vin"]):
        if vin.get("is_coinbase"):
            print(f"  [{i}] COINBASE")
            continue
        print(f"  [{i}] {vin['txid']}:{vin['vout']}")
        if vin.get("scriptsig"):
            print(f"       ScriptSig ASM: {vin.get('scriptsig_asm', 'N/A')}")
        if vin.get("witness"):
            print(f"       Witness items: {len(vin['witness'])}")
            for j, w in enumerate(vin["witness"]):
                print(f"         [{j}] ({len(w)//2} bytes): {w[:40]}{'...' if len(w)>40 else ''}")
        print(f"       Sequence: 0x{vin['sequence']:08x}")

    # Analyze outputs
    print(f"\nOUTPUTS ({len(tx['vout'])}):")
    total_out = 0
    for i, vout in enumerate(tx["vout"]):
        value_btc = vout["value"] / 1e8
        total_out += vout["value"]
        spk = vout["scriptpubkey"]
        script_type = classify_script(spk)
        asm = disassemble(spk)
        address = vout.get("scriptpubkey_address", "N/A")

        print(f"  [{i}] {value_btc:.8f} BTC")
        print(f"       Type: {script_type.value}")
        print(f"       Address: {address}")
        print(f"       ScriptPubKey: {spk}")
        print(f"       ASM: {asm}")

    print(f"\nTotal Output: {total_out / 1e8:.8f} BTC")
    print(f"{'='*60}\n")

if __name__ == "__main__":
    # Analyze the first mainnet transaction (Satoshi to Hal Finney)
    analyze_transaction("f4184fc596403b9d638783cf57adfe4c75c605f6356fbc91338530e9831e9e16")

Module 4: Witness Decoder

# witness_decoder.py

def decode_witness_p2wpkh(witness: list) -> dict:
    """Decode P2WPKH witness: [sig, pubkey]."""
    if len(witness) != 2:
        return {"error": f"Expected 2 items, got {len(witness)}"}
    sig_hex = witness[0]
    pubkey_hex = witness[1]
    sighash = int(sig_hex[-2:], 16)
    sighash_names = {1: "ALL", 2: "NONE", 3: "SINGLE",
                     0x81: "ALL|ANYONECANPAY", 0x82: "NONE|ANYONECANPAY"}
    return {
        "type": "P2WPKH",
        "signature": sig_hex[:-2],
        "sighash_type": sighash_names.get(sighash, f"0x{sighash:02x}"),
        "pubkey": pubkey_hex,
        "pubkey_compressed": pubkey_hex[0:2] in ("02", "03"),
    }

def decode_witness_p2tr_keypath(witness: list) -> dict:
    """Decode Taproot key-path witness: [64-byte Schnorr sig] or [65-byte with sighash]."""
    if len(witness) != 1:
        return {"error": "Key-path spend should have exactly 1 witness item"}
    sig_hex = witness[0]
    has_sighash = len(sig_hex) == 130  # 65 bytes
    return {
        "type": "P2TR key-path",
        "schnorr_signature": sig_hex[:128],
        "sighash_type": "DEFAULT (ALL)" if not has_sighash else f"0x{sig_hex[128:]:>02}",
    }

def decode_witness_p2tr_scriptpath(witness: list) -> dict:
    """Decode Taproot script-path witness."""
    if len(witness) < 2:
        return {"error": "Script-path spend needs at least script + control block"}
    control_block = witness[-1]
    tapscript = witness[-2]
    script_data = witness[:-2]
    leaf_version = int(control_block[:2], 16) & 0xfe
    parity = int(control_block[:2], 16) & 0x01
    internal_key = control_block[2:66]
    merkle_path_len = (len(control_block) - 66) // 2 // 32
    return {
        "type": "P2TR script-path",
        "tapscript": tapscript,
        "internal_key": internal_key,
        "leaf_version": f"0x{leaf_version:02x}",
        "parity": parity,
        "merkle_path_nodes": merkle_path_len,
        "script_data": script_data,
    }

Running the Full Analysis

# main.py — Complete analysis runner

from analyzer import analyze_transaction
from witness_decoder import decode_witness_p2wpkh, decode_witness_p2tr_keypath
import requests

def deep_analyze(txid: str):
    tx = requests.get(f"https://mempool.space/api/tx/{txid}").json()

    analyze_transaction(txid)

    print("WITNESS DEEP DECODE:")
    for i, vin in enumerate(tx["vin"]):
        witness = vin.get("witness", [])
        if not witness:
            continue
        print(f"\n  Input [{i}]:")
        # Determine witness type from the output being spent
        # In practice, look up the scriptPubKey of the UTXO being spent
        if len(witness) == 1 and len(witness[0]) in (128, 130):
            decoded = decode_witness_p2tr_keypath(witness)
        elif len(witness) == 2 and len(witness[1]) == 66:
            decoded = decode_witness_p2wpkh(witness)
        else:
            decoded = {"type": "Unknown", "raw": witness}

        for k, v in decoded.items():
            print(f"    {k}: {v}")

if __name__ == "__main__":
    # Example: A Taproot transaction
    EXAMPLE_TXID = "9a77a0b6e4bf553f1e36d0c3a73bf32571024cd2d15ca1c2ac20baa4e11cdf01"
    deep_analyze(EXAMPLE_TXID)

Output Example

============================================================
Transaction: 9a77a0b6...
Version: 2  |  Locktime: 0
Size: 153 bytes  |  vSize: 102 vbytes
Fee: 210 sats
============================================================

INPUTS (1):
  [0] abcd1234...:0
       Witness items: 1
         [0] (64 bytes): d4e7f2a1b3...

OUTPUTS (2):
  [0] 0.00010000 BTC
       Type: P2TR
       Address: bc1p...
       ScriptPubKey: 5120...
       ASM: OP_1 <32-byte-key>
  [1] 0.00089790 BTC
       Type: P2TR
       Address: bc1p...

WITNESS DEEP DECODE:

  Input [0]:
    type: P2TR key-path
    schnorr_signature: d4e7f2a1...
    sighash_type: DEFAULT (ALL)

Summary

This walkthrough demonstrates how Python can be used to build a complete Bitcoin script analysis pipeline: from raw hex parsing and script type identification, through disassembly and witness decoding, to human-readable reporting. Combining these modules gives you a powerful custom analysis tool that goes beyond what standard block explorers expose, and provides the low-level detail needed for serious script debugging.

Pro Tip

When debugging scripts, always start with a high-level disassembly before diving into the stack trace. Tools like bitcoin-cli decodescript are your first line of defense in identifying standard script patterns.

☕ Help support TeachMeBitcoin

TeachMeBitcoin is an ad-free, open-source educational repository curated by a passionate team of Bitcoin researchers and educators for public benefit. If you found our articles helpful, please consider supporting our hosting and ongoing content updates with a clean donation:

Ethereum: 0x578417C51783663D8A6A811B3544E1f779D39A85
Bitcoin: bc1q77k9e95rn669kpzyjr8ke9w95zhk7pa5s63qzz
Solana: 4ycT2ayqeMucixj3wS8Ay8Tq9NRDYRPKYbj3UGESyQ4J
Address copied to clipboard!