Source code for etk.extractors.cryptographic_hash_extractor

from etk.extractors.regex_extractor import RegexExtractor
from etk.extractor import Extractor, InputType
from typing import List
from etk.extraction import Extraction

[docs]class CryptographicHashExtractor(Extractor): """ **Description** This class inherits RegexExtractor by predefine the regex patterns for md5, sha1 and sha256 Examples: :: cryptographic_hash_extractor = CryptographicHashExtractor() cryptographic_hash_extractor.extract(text=input_doc) """ def __init__(self): e_name = 'cryptographic hash extractor' self._regex_extractors = [ RegexExtractor(r"(\b[a-fA-F\d]{32}\b)", 'md5 '+e_name, general_tag='md5'), RegexExtractor(r"(\b[0-9a-f]{40}\b)", 'sha1 '+e_name, general_tag='sha1'), RegexExtractor(r"(\b[A-Fa-f0-9]{64}\b)", 'sha256 '+e_name, general_tag='sha256'), ] Extractor.__init__(self, input_type=InputType.TEXT, category="regex", name=e_name) @property def regex_extractors(self): return self._regex_extractors
[docs] def extract(self, text: str) -> List[Extraction]: """ Args: text (str): The input source to be processed Returns: List[Extraction]: The list of extractions returned by CryptographicHashExtractor """ res = list() for e in self.regex_extractors: res = res+e.extract(text) return res