Commit 8b5b199e authored by Aymeric Augustin's avatar Aymeric Augustin
Browse files

Fixed #3214 -- Stopped parsing SQL with regex.

Avoided introducing a new regex-based SQL splitter in the migrations
framework, before we're bound by backwards compatibility.

Adapted this change to the legacy "initial SQL data" feature, even
though it's already deprecated, in order to facilitate the transition
to migrations.

sqlparse becomes mandatory for RunSQL on some databases (all but
PostgreSQL). There's no API to provide a single statement and tell
Django not to attempt splitting. Since we have a more robust splitting
implementation, that seems like a good tradeoff. It's easier to add a
new keyword argument later if necessary than to remove one.

Many people contributed to both tickets, thank you all, and especially
Claude for the review.

Refs #22401.
parent 2128b3a6
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -155,6 +155,7 @@ def sql_all(app_config, style, connection):


def _split_statements(content):
    # Private API only called from code that emits a RemovedInDjango19Warning.
    comment_re = re.compile(r"^((?:'[^']*'|[^'])*?)--.*$")
    statements = []
    statement = []
@@ -202,9 +203,7 @@ def custom_sql_for_model(model, style, connection):
    for sql_file in sql_files:
        if os.path.exists(sql_file):
            with codecs.open(sql_file, 'r' if six.PY3 else 'U', encoding=settings.FILE_CHARSET) as fp:
                # Some backends can't execute more than one SQL statement at a time,
                # so split into separate statements.
                output.extend(_split_statements(fp.read()))
                output.extend(connection.ops.prepare_sql_script(fp.read(), _allow_fallback=True))
    return output


+34 −0
Original line number Diff line number Diff line
import datetime
import time
import warnings

try:
    from django.utils.six.moves import _thread as thread
@@ -16,6 +17,7 @@ from django.db.backends.signals import connection_created
from django.db.backends import utils
from django.db.transaction import TransactionManagementError
from django.db.utils import DatabaseError, DatabaseErrorWrapper, ProgrammingError
from django.utils.deprecation import RemovedInDjango19Warning
from django.utils.functional import cached_property
from django.utils import six
from django.utils import timezone
@@ -599,6 +601,10 @@ class BaseDatabaseFeatures(object):
    # Does 'a' LIKE 'A' match?
    has_case_insensitive_like = True

    # Does the backend require the sqlparse library for splitting multi-line
    # statements before executing them?
    requires_sqlparse_for_splitting = True

    def __init__(self, connection):
        self.connection = connection

@@ -867,6 +873,34 @@ class BaseDatabaseOperations(object):
        """
        return 'DEFAULT'

    def prepare_sql_script(self, sql, _allow_fallback=False):
        """
        Takes a SQL script that may contain multiple lines and returns a list
        of statements to feed to successive cursor.execute() calls.

        Since few databases are able to process raw SQL scripts in a single
        cursor.execute() call and PEP 249 doesn't talk about this use case,
        the default implementation is conservative.
        """
        # Remove _allow_fallback and keep only 'return ...' in Django 1.9.
        try:
            # This import must stay inside the method because it's optional.
            import sqlparse
        except ImportError:
            if _allow_fallback:
                # Without sqlparse, fall back to the legacy (and buggy) logic.
                warnings.warn(
                    "Providing intial SQL data on a %s database will require "
                    "sqlparse in Django 1.9." % self.connection.vendor,
                    RemovedInDjango19Warning)
                from django.core.management.sql import _split_statements
                return _split_statements(sql)
            else:
                raise
        else:
            return [sqlparse.format(statement, strip_comments=True)
                    for statement in sqlparse.split(sql) if statement]

    def process_clob(self, value):
        """
        Returns the value of a CLOB column, for backends that return a locator
+1 −0
Original line number Diff line number Diff line
@@ -58,6 +58,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
    nulls_order_largest = True
    closed_cursor_error_class = InterfaceError
    has_case_insensitive_like = False
    requires_sqlparse_for_splitting = False


class DatabaseWrapper(BaseDatabaseWrapper):
+3 −0
Original line number Diff line number Diff line
@@ -93,6 +93,9 @@ class DatabaseOperations(BaseDatabaseOperations):
    def no_limit_value(self):
        return None

    def prepare_sql_script(self, sql, _allow_fallback=False):
        return [sql]

    def quote_name(self, name):
        if name.startswith('"') and name.endswith('"'):
            return name  # Quoting once is enough.
+4 −24
Original line number Diff line number Diff line
import re
from .base import Operation


@@ -43,20 +42,16 @@ class SeparateDatabaseAndState(Operation):

class RunSQL(Operation):
    """
    Runs some raw SQL - a single statement by default, but it will attempt
    to parse and split it into multiple statements if multiple=True.

    A reverse SQL statement may be provided.
    Runs some raw SQL. A reverse SQL statement may be provided.

    Also accepts a list of operations that represent the state change effected
    by this SQL change, in case it's custom column/table creation/deletion.
    """

    def __init__(self, sql, reverse_sql=None, state_operations=None, multiple=False):
    def __init__(self, sql, reverse_sql=None, state_operations=None):
        self.sql = sql
        self.reverse_sql = reverse_sql
        self.state_operations = state_operations or []
        self.multiple = multiple

    @property
    def reversible(self):
@@ -66,30 +61,15 @@ class RunSQL(Operation):
        for state_operation in self.state_operations:
            state_operation.state_forwards(app_label, state)

    def _split_sql(self, sql):
        regex = r"(?mx) ([^';]* (?:'[^']*'[^';]*)*)"
        comment_regex = r"(?mx) (?:^\s*$)|(?:--.*$)"
        # First, strip comments
        sql = "\n".join([x.strip().replace("%", "%%") for x in re.split(comment_regex, sql) if x.strip()])
        # Now get each statement
        for st in re.split(regex, sql)[1:][::2]:
            yield st

    def database_forwards(self, app_label, schema_editor, from_state, to_state):
        if self.multiple:
            statements = self._split_sql(self.sql)
        else:
            statements = [self.sql]
        statements = schema_editor.connection.ops.prepare_sql_script(self.sql)
        for statement in statements:
            schema_editor.execute(statement)

    def database_backwards(self, app_label, schema_editor, from_state, to_state):
        if self.reverse_sql is None:
            raise NotImplementedError("You cannot reverse this operation")
        if self.multiple:
            statements = self._split_sql(self.reverse_sql)
        else:
            statements = [self.reverse_sql]
        statements = schema_editor.connection.ops.prepare_sql_script(self.reverse_sql)
        for statement in statements:
            schema_editor.execute(statement)

Loading