#!/usr/bin/env python
import argparse
import ast
import logging
import re
import traceback
import warnings

from regexploit.ast.sre import SreOpParser
from regexploit.bin.files import file_generator
from regexploit.languages.python_node_visitor import PythonNodeVisitor
from regexploit.output.text import TextOutput
from regexploit.redos import find


def handle_file(filename: str, output: TextOutput):
    with open(filename, "rb") as f:
        code = f.read()
    try:
        code_ast = ast.parse(code)
        pnv = PythonNodeVisitor()
        pnv.visit(code_ast)
    except RecursionError:
        print(f"RecursionError parsing AST for {filename}")
        return
    except SyntaxError as e:
        print(f"Bad Python3 syntax in {filename}: {e}")
        return
    for regex in pnv.patterns:
        try:
            parsed = SreOpParser().parse_sre(regex.pattern, regex.flags)
        except re.error:
            continue  # We will have many strings which aren't actually regexes
        try:
            output.next()
            for redos in find(parsed):
                if redos.starriness > 2:
                    context = None
                    try:
                        context = code.splitlines()[regex.lineno - 1].decode().strip()
                    except UnicodeDecodeError:
                        pass
                    output.record(
                        redos,
                        regex.pattern,
                        filename=filename,
                        lineno=regex.lineno,
                        context=context,
                    )
        except Exception:
            print(
                f"Error finding ReDoS: {regex.pattern} from {filename} #{regex.lineno}"
            )
            print(traceback.format_exc())


def main():
    with warnings.catch_warnings():
        # Some weird regexes emit warnings
        warnings.simplefilter("ignore", category=FutureWarning)
        warnings.simplefilter("ignore", category=DeprecationWarning)
        parser = argparse.ArgumentParser(
            description="Parse regexes out of python files and scan them for ReDoS"
        )
        parser.add_argument("files", nargs="+", help="Python files or directories")
        parser.add_argument(
            "--glob", action="store_true", help="Glob the input filenames (**/*)"
        )
        parser.add_argument("--verbose", action="store_true", help="Verbose logging")
        parser.add_argument(
            "--ignore", action="append", help="Paths containing this string are ignored"
        )
        args = parser.parse_args()

        if args.verbose:
            logging.basicConfig(level=logging.DEBUG)

        files = file_generator(args.files, args.glob, ["*.py"], args.ignore)
        output = TextOutput()
        for filename in files:
            logging.debug(filename)
            handle_file(filename, output)
        print(f"Processed {output.regexes} regexes")


if __name__ == "__main__":
    main()
