From d7bc848a522e236d84377d88b733df286cc0484c Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Thu, 11 Mar 2021 23:11:14 +0530 Subject: [PATCH] test(semgrep): python and javascript translation - Move rules to .github folder to reduce clutter in root folder - separate out rules - add tests for rules - docs for writing more rules --- .github/helper/semgrep_rules/README.md | 38 +++++++++++++ .github/helper/semgrep_rules/security.py | 6 +++ .github/helper/semgrep_rules/security.yml | 10 ++++ .github/helper/semgrep_rules/translate.js | 37 +++++++++++++ .github/helper/semgrep_rules/translate.py | 53 ++++++++++++++++++ .github/helper/semgrep_rules/translate.yml | 63 ++++++++++++++++++++++ .github/workflows/semgrep.yml | 2 +- .semgrep.yml | 29 ---------- 8 files changed, 208 insertions(+), 30 deletions(-) create mode 100644 .github/helper/semgrep_rules/README.md create mode 100644 .github/helper/semgrep_rules/security.py create mode 100644 .github/helper/semgrep_rules/security.yml create mode 100644 .github/helper/semgrep_rules/translate.js create mode 100644 .github/helper/semgrep_rules/translate.py create mode 100644 .github/helper/semgrep_rules/translate.yml delete mode 100644 .semgrep.yml diff --git a/.github/helper/semgrep_rules/README.md b/.github/helper/semgrep_rules/README.md new file mode 100644 index 0000000000..670d8d280f --- /dev/null +++ b/.github/helper/semgrep_rules/README.md @@ -0,0 +1,38 @@ +# Semgrep linting + +## What is semgrep? +Semgrep or "semantic grep" is language agnostic static analysis tool. In simple terms semgrep is syntax-aware `grep`, so unlike regex it doesn't get confused by different ways of writing same thing or whitespaces or code split in multiple lines etc. + +Example: + +To check if a translate function is using f-string or not the regex would be `r"_\(\s*f[\"']"` while equivalent rule in semgrep would be `_(f"...")`. As semgrep knows grammer of language it takes care of unnecessary whitespace, type of quotation marks etc. + +You can read more such examples in `.github/helper/semgrep_rules` directory. + +# Why/when to use this? +We want to maintain quality of contributions, at the same time remembering all the good practices can be pain to deal with while evaluating contributions. Using semgrep if you can translate "best practice" into a rule then it can automate the task for us. + +## Running locally + +Install semgrep using homebrew `brew install semgrep` or pip `pip install semgrep`. + +To run locally use following command: + +`semgrep --config=.github/helper/semgrep_rules [file/folder names]` + +## Testing +semgrep allows testing the tests. Refer to this page: https://semgrep.dev/docs/writing-rules/testing-rules/ + +When writing new rules you should write few positive and few negative cases as shown in the guide and current tests. + +To run current tests: `semgrep --test --test-ignore-todo .github/helper/semgrep_rules` + + +## Reference + +If you are new to Semgrep read following pages to get started on writing/modifying rules: + +- https://semgrep.dev/docs/getting-started/ +- https://semgrep.dev/docs/writing-rules/rule-syntax +- https://semgrep.dev/docs/writing-rules/pattern-examples/ +- https://semgrep.dev/docs/writing-rules/rule-ideas/#common-use-cases diff --git a/.github/helper/semgrep_rules/security.py b/.github/helper/semgrep_rules/security.py new file mode 100644 index 0000000000..f477d7c176 --- /dev/null +++ b/.github/helper/semgrep_rules/security.py @@ -0,0 +1,6 @@ +def function_name(input): + # ruleid: frappe-codeinjection-eval + eval(input) + +# ok: frappe-codeinjection-eval +eval("1 + 1") diff --git a/.github/helper/semgrep_rules/security.yml b/.github/helper/semgrep_rules/security.yml new file mode 100644 index 0000000000..8b21979208 --- /dev/null +++ b/.github/helper/semgrep_rules/security.yml @@ -0,0 +1,10 @@ +rules: +- id: frappe-codeinjection-eval + patterns: + - pattern-not: eval("...") + - pattern: eval(...) + message: | + Detected the use of eval(). eval() can be dangerous if used to evaluate + dynamic content. Avoid it or use safe_eval(). + languages: [python] + severity: ERROR diff --git a/.github/helper/semgrep_rules/translate.js b/.github/helper/semgrep_rules/translate.js new file mode 100644 index 0000000000..7b92fe2dff --- /dev/null +++ b/.github/helper/semgrep_rules/translate.js @@ -0,0 +1,37 @@ +// ruleid: frappe-translation-empty-string +__("") +// ruleid: frappe-translation-empty-string +__('') + +// ok: frappe-translation-js-formatting +__('Welcome {0}, get started with ERPNext in just a few clicks.', [full_name]); + +// ruleid: frappe-translation-js-formatting +__(`Welcome ${full_name}, get started with ERPNext in just a few clicks.`); + +// ok: frappe-translation-js-formatting +__('This is fine'); + + +// ok: frappe-translation-trailing-spaces +__('This is fine'); + +// ruleid: frappe-translation-trailing-spaces +__(' this is not ok '); +// ruleid: frappe-translation-trailing-spaces +__('this is not ok '); +// ruleid: frappe-translation-trailing-spaces +__(' this is not ok'); + +// ok: frappe-translation-js-splitting +__('You have {0} subscribers in your mailing list.', [subscribers.length]) + +// todoruleid: frappe-translation-js-splitting +__('You have') + subscribers.length + __('subscribers in your mailing list.') + +// ruleid: frappe-translation-js-splitting +__('You have' + 'subscribers in your mailing list.') + +// ruleid: frappe-translation-js-splitting +__('You have {0} subscribers' + + 'in your mailing list', [subscribers.length]) diff --git a/.github/helper/semgrep_rules/translate.py b/.github/helper/semgrep_rules/translate.py new file mode 100644 index 0000000000..bd6cd9126c --- /dev/null +++ b/.github/helper/semgrep_rules/translate.py @@ -0,0 +1,53 @@ +# Examples taken from https://frappeframework.com/docs/user/en/translations +# This file is used for testing the tests. + +from frappe import _ + +full_name = "Jon Doe" +# ok: frappe-translation-python-formatting +_('Welcome {0}, get started with ERPNext in just a few clicks.').format(full_name) + +# ruleid: frappe-translation-python-formatting +_('Welcome %s, get started with ERPNext in just a few clicks.' % full_name) +# ruleid: frappe-translation-python-formatting +_('Welcome %(name)s, get started with ERPNext in just a few clicks.' % {'name': full_name}) + +# ruleid: frappe-translation-python-formatting +_('Welcome {0}, get started with ERPNext in just a few clicks.'.format(full_name)) + + +subscribers = ["Jon", "Doe"] +# ok: frappe-translation-python-formatting +_('You have {0} subscribers in your mailing list.').format(len(subscribers)) + +# ruleid: frappe-translation-python-splitting +_('You have') + len(subscribers) + _('subscribers in your mailing list.') + +# ruleid: frappe-translation-python-splitting +_('You have {0} subscribers \ + in your mailing list').format(len(subscribers)) + +# ok: frappe-translation-python-splitting +_('You have {0} subscribers') \ + + 'in your mailing list' + +# ruleid: frappe-translation-trailing-spaces +msg = _(" You have {0} pending invoice ") +# ruleid: frappe-translation-trailing-spaces +msg = _("You have {0} pending invoice ") +# ruleid: frappe-translation-trailing-spaces +msg = _(" You have {0} pending invoice") + +# ok: frappe-translation-trailing-spaces +msg = ' ' + _("You have {0} pending invoices") + ' ' + +# ruleid: frappe-translation-python-formatting +_(f"can not format like this - {subscribers}") +# ruleid: frappe-translation-python-splitting +_(f"what" + f"this is also not cool") + + +# ruleid: frappe-translation-empty-string +_("") +# ruleid: frappe-translation-empty-string +_('') diff --git a/.github/helper/semgrep_rules/translate.yml b/.github/helper/semgrep_rules/translate.yml new file mode 100644 index 0000000000..3737da5a7e --- /dev/null +++ b/.github/helper/semgrep_rules/translate.yml @@ -0,0 +1,63 @@ +rules: +- id: frappe-translation-empty-string + pattern-either: + - pattern: _("") + - pattern: __("") + message: | + Empty string is useless for translation. + Please refer: https://frappeframework.com/docs/user/en/translations + languages: [python, javascript, json] + severity: ERROR + +- id: frappe-translation-trailing-spaces + pattern-either: + - pattern: _("=~/(^[ \t]+|[ \t]+$)/") + - pattern: __("=~/(^[ \t]+|[ \t]+$)/") + message: | + Trailing or leading whitespace not allowed in translate strings. + Please refer: https://frappeframework.com/docs/user/en/translations + languages: [python, javascript, json] + severity: ERROR + +- id: frappe-translation-python-formatting + pattern-either: + - pattern: _("..." % ...) + - pattern: _("...".format(...)) + - pattern: _(f"...") + message: | + Only positional formatters are allowed and formatting should not be done before translating. + Please refer: https://frappeframework.com/docs/user/en/translations + languages: [python] + severity: ERROR + +- id: frappe-translation-js-formatting + patterns: + - pattern: __(`...`) + - pattern-not: __("...") + message: | + Template strings are not allowed for text formatting. + Please refer: https://frappeframework.com/docs/user/en/translations + languages: [javascript, json] + severity: ERROR + +- id: frappe-translation-python-splitting + pattern-either: + - pattern: _(...) + ... + _(...) + - pattern: _("..." + "...") + - pattern-regex: '_\([^\)]*\\\s*' + message: | + Do not split strings inside translate function. Do not concatenate using translate functions. + Please refer: https://frappeframework.com/docs/user/en/translations + languages: [python] + severity: ERROR + +- id: frappe-translation-js-splitting + pattern-either: + - pattern-regex: '__\([^\)]*[\+\\]\s*' + - pattern: __('...' + '...') + - pattern: __('...') + __('...') + message: | + Do not split strings inside translate function. Do not concatenate using translate functions. + Please refer: https://frappeframework.com/docs/user/en/translations + languages: [javascript, json] + severity: ERROR diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml index 321dfb567b..1d5694f521 100644 --- a/.github/workflows/semgrep.yml +++ b/.github/workflows/semgrep.yml @@ -19,4 +19,4 @@ jobs: python -m pip install -q semgrep git fetch origin $GITHUB_BASE_REF:$GITHUB_BASE_REF -q files=$(git diff --name-only --diff-filter=d $GITHUB_BASE_REF) - if [ -f .semgrep.yml ]; then semgrep --config=.semgrep.yml --quiet --error $files; fi + [[ -d .github/helper/semgrep_rules ]] && semgrep --config=.github/helper/semgrep_rules --quiet --error $files diff --git a/.semgrep.yml b/.semgrep.yml deleted file mode 100644 index 99d237251e..0000000000 --- a/.semgrep.yml +++ /dev/null @@ -1,29 +0,0 @@ -#Reference: https://semgrep.dev/docs/writing-rules/rule-syntax/ - -rules: -- id: eval - patterns: - - pattern-not: eval("...") - - pattern: eval(...) - message: | - Detected the use of eval(). eval() can be dangerous if used to evaluate - dynamic content. Avoid it or use safe_eval(). - languages: - - python - severity: ERROR - -# translations -- id: frappe-translation-syntax-python - pattern-either: - - pattern: _(f"...") # f-strings not allowed - - pattern: _("..." + "...") # concatenation not allowed - - pattern: _("") # empty string is meaningless - - pattern: _("..." % ...) # Only positional formatters are allowed. - - pattern: _("...".format(...)) # format should not be used before translating - - pattern: _("...") + ... + _("...") # don't split strings - message: | - Incorrect use of translation function detected. - Please refer: https://frappeframework.com/docs/user/en/translations - languages: - - python - severity: ERROR