Source code for etk.extractors.bitcoin_address_extractor
from etk.extractors.regex_extractor import RegexExtractor
[docs]class BitcoinAddressExtractor(RegexExtractor):
"""
**Description**
This class inherits RegexExtractor by predefining the regex pattern based on conditions
Examples:
::
bitcoin_addr_extractor = BitcoinAddressExtractor(support_Bech32=True)
bitcoin_addr_extractor.extract(text=input_doc)
"""
def __init__(self, support_Bech32: bool=False):
if support_Bech32:
# a regex support Bech32 type (which is not supported for most applications)
bitcoin_address_pattern = r"\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}|bc1[a-zA-HJ-NP-Z0-9]{39}|bc1[a-zA-HJ-NP-Z0-9]{59}\b"
else:
# simple version supporting P2PKH and P2SH
bitcoin_address_pattern = r"\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b"
RegexExtractor.__init__(self, pattern=bitcoin_address_pattern, extractor_name="bitcoin address extractor")