feat(DX): normalize queries in recorder (#21735)

Attempt to normalize query by removing "variables"
This gives a different view of similar duplicate queries.

These two are distinct queries:
```sql
select * from user where name = 'x'
select * from user where name = 'z'
```

But their "normalized" form would be same:
```sql
select * from user where name = ?
```

This helps highlight queries ran in loop which might not register as
duplicate but are possibly "duplicate".
This commit is contained in:
Ankush Menat 2023-07-19 16:45:30 +05:30 committed by GitHub
parent ce79dd0b89
commit 42aff950ce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 70 additions and 4 deletions

View file

@ -55,9 +55,12 @@
<div class="col grid-static-col col-xs-2">
<div class="static-area ellipsis text-right">{{ __("Duration (ms)") }}</div>
</div>
<div class="col grid-static-col col-xs-2">
<div class="col grid-static-col col-xs-1">
<div class="static-area ellipsis text-right">{{ __("Exact Copies") }}</div>
</div>
<div class="col grid-static-col col-xs-1">
<div class="static-area ellipsis text-right">{{ __("Normalized Copies") }}</div>
</div>
</div>
</div>
</div>
@ -72,9 +75,12 @@
<div class="col grid-static-col col-xs-2">
<div class="static-area ellipsis text-right">{{ call.duration }}</div>
</div>
<div class="col grid-static-col col-xs-2">
<div class="col grid-static-col col-xs-1">
<div class="static-area ellipsis text-right">{{ call.exact_copies }}</div>
</div>
<div class="col grid-static-col col-xs-1">
<div class="static-area ellipsis text-right">{{ call.normalized_copies }}</div>
</div>
<div class="col col-xs-1"><a class="close btn-open-row">
<span class="octicon" :class="showing == call.index? 'octicon-triangle-up' : 'octicon-triangle-down'"></span></a>
</div>
@ -99,6 +105,12 @@
<div class="control-value like-disabled-input for-description"><pre>{{ call.query }}</pre></div>
</div>
</div>
<div class="frappe-control">
<div class="form-group">
<div class="clearfix"><label class="control-label">{{ __("Normalized Query") }}</label></div>
<div class="control-value like-disabled-input for-description"><pre>{{ call.normalized_query }}</pre></div>
</div>
</div>
<div class="frappe-control input-max-width">
<div class="form-group">
<div class="clearfix"><label class="control-label">{{ __("Duration (ms)") }}"</label></div>
@ -111,6 +123,13 @@
<div class="control-value like-disabled-input">{{ call.exact_copies }}</div>
</div>
</div>
<div class="frappe-control input-max-width">
<div class="form-group">
<div
class="clearfix"><label class="control-label">{{ __("Normalized Copies") }}</label></div>
<div class="control-value like-disabled-input">{{ call.normalized_copies }}</div>
</div>
</div>
<div class="frappe-control">
<div class="form-group">
<div class="clearfix"><label class="control-label">{{ __("Stack Trace") }}</label></div>

View file

@ -86,10 +86,42 @@ def post_process():
def mark_duplicates(request):
counts = Counter([call["query"] for call in request["calls"]])
exact_duplicates = Counter([call["query"] for call in request["calls"]])
for sql_call in request["calls"]:
sql_call["normalized_query"] = normalize_query(sql_call["query"])
normalized_duplicates = Counter([call["normalized_query"] for call in request["calls"]])
for index, call in enumerate(request["calls"]):
call["index"] = index
call["exact_copies"] = counts[call["query"]]
call["exact_copies"] = exact_duplicates[call["query"]]
call["normalized_copies"] = normalized_duplicates[call["normalized_query"]]
def normalize_query(query: str) -> str:
"""Attempt to normalize query by removing variables.
This gives a different view of similar duplicate queries.
Example:
These two are distinct queries:
`select * from user where name = 'x'`
`select * from user where name = 'z'`
But their "normalized" form would be same:
`select * from user where name = ?`
"""
try:
q = sqlparse.parse(query)[0]
for token in q.flatten():
if "Token.Literal" in str(token.ttype):
token.value = "?"
return str(q)
except Exception as e:
print("Failed to normalize query ", e)
return query
def record(force=False):

View file

@ -5,6 +5,7 @@ import sqlparse
import frappe
import frappe.recorder
from frappe.recorder import normalize_query
from frappe.tests.utils import FrappeTestCase
from frappe.utils import set_request
from frappe.website.serve import get_response_content
@ -138,3 +139,17 @@ class TestRecorderDeco(FrappeTestCase):
test()
self.assertTrue(frappe.recorder.get())
class TestQueryNormalization(FrappeTestCase):
def test_query_normalization(self):
test_cases = {
"select * from user where name = 'x'": "select * from user where name = ?",
"select * from user where a > 5": "select * from user where a > ?",
"select * from `user` where a > 5": "select * from `user` where a > ?",
"select `name` from `user`": "select `name` from `user`",
"select `name` from `user` limit 10": "select `name` from `user` limit ?",
}
for query, normalized in test_cases.items():
self.assertEqual(normalize_query(query), normalized)