From 7d5c98c20621408a5ec79291741d89fcee7cc044 Mon Sep 17 00:00:00 2001 From: Suraj Shetty Date: Thu, 23 Feb 2023 16:19:07 +0530 Subject: [PATCH] feat: Add UTM tracking parameters to internal links in newsletters - Also, extend web page view to store UTM tracking data. --- frappe/email/doctype/newsletter/newsletter.py | 23 +++++++++- frappe/utils/data.py | 29 +++++++++++- .../doctype/web_page_view/web_page_view.json | 45 ++++++++++++++++++- .../doctype/web_page_view/web_page_view.py | 19 +++++++- frappe/www/website_script.js | 8 +++- 5 files changed, 118 insertions(+), 6 deletions(-) diff --git a/frappe/email/doctype/newsletter/newsletter.py b/frappe/email/doctype/newsletter/newsletter.py index da6a2d7f90..55a3b03473 100644 --- a/frappe/email/doctype/newsletter/newsletter.py +++ b/frappe/email/doctype/newsletter/newsletter.py @@ -198,7 +198,28 @@ class Newsletter(WebsiteGenerator): if self.content_type == "HTML": message = self.message_html - return frappe.render_template(message, {"doc": self.as_dict()}) + html = frappe.render_template(message, {"doc": self.as_dict()}) + + return self.add_utm(html) + + def add_utm(self, html: str) -> str: + """Add UTM parameters to internal links in the newsletter.""" + from bs4 import BeautifulSoup + + soup = BeautifulSoup(html, "html.parser") + + links = soup.find_all("a") + for link in links: + href = link.get("href") + if href and not href.startswith("#"): + if not frappe.utils.is_internal_link(href): + continue + new_href = frappe.utils.add_utm_to_url( + href, source="Newsletter", medium="Email", campaign=self.name + ) + link["href"] = new_href + + return str(soup) def get_recipients(self) -> list[str]: """Get recipients from Email Group""" diff --git a/frappe/utils/data.py b/frappe/utils/data.py index b2ef5920a6..bf8e697c67 100644 --- a/frappe/utils/data.py +++ b/frappe/utils/data.py @@ -12,7 +12,7 @@ import typing from code import compile_command from enum import Enum from typing import Any, Literal, Optional, TypeVar, Union -from urllib.parse import quote, urljoin +from urllib.parse import parse_qsl, quote, urlencode, urljoin, urlparse, urlunparse from click import secho @@ -2179,3 +2179,30 @@ def get_imaginary_pixel_response(): b"\xa0\x00\x00\x00\x00IEND\xaeB`\x82" ), } + + +def is_internal_link(link: str) -> bool: + if link.startswith("/"): + return True + return urlparse(link).netloc == urlparse(frappe.utils.get_url()).netloc + + +def add_utm_to_url(url: str, source: str, medium: str, campaign: str) -> str: + """Add utm parameters to url. + + Args: + url (str): URL to add utm parameters to. + utm (dict[str, str]): Dictionary of utm parameters. + + Returns: + str: URL with utm parameters added. + """ + url_parts = list(urlparse(url)) + query = dict(parse_qsl(url_parts[4])) | { + "utm_source": source, + "utm_medium": medium, + "utm_campaign": campaign, + } + + url_parts[4] = urlencode(query) + return urlunparse(url_parts) diff --git a/frappe/website/doctype/web_page_view/web_page_view.json b/frappe/website/doctype/web_page_view/web_page_view.json index 7548ed5f83..d6783aec8f 100644 --- a/frappe/website/doctype/web_page_view/web_page_view.json +++ b/frappe/website/doctype/web_page_view/web_page_view.json @@ -12,7 +12,13 @@ "browser_version", "is_unique", "time_zone", - "user_agent" + "user_agent", + "utm_trackers_section", + "utm_source", + "utm_medium", + "utm_campaign", + "utm_content", + "utm_term" ], "fields": [ { @@ -53,11 +59,46 @@ "fieldname": "user_agent", "fieldtype": "Data", "label": "User Agent" + }, + { + "fieldname": "utm_trackers_section", + "fieldtype": "Section Break", + "label": "UTM Trackers" + }, + { + "fieldname": "utm_source", + "fieldtype": "Data", + "label": "Source", + "read_only": 1 + }, + { + "fieldname": "utm_medium", + "fieldtype": "Data", + "label": "Medium", + "read_only": 1 + }, + { + "fieldname": "utm_campaign", + "fieldtype": "Data", + "label": "Campaign", + "read_only": 1 + }, + { + "fieldname": "utm_content", + "fieldtype": "Data", + "label": "Content", + "read_only": 1 + }, + { + "fieldname": "utm_term", + "fieldtype": "Data", + "label": "Term", + "read_only": 1 } ], "in_create": 1, "links": [], - "modified": "2022-09-13 15:38:25.401797", + "modified": "2023-02-23 15:10:14.134789", "modified_by": "Administrator", "module": "Website", "name": "Web Page View", diff --git a/frappe/website/doctype/web_page_view/web_page_view.py b/frappe/website/doctype/web_page_view/web_page_view.py index 40c11782f5..e60ff0134b 100644 --- a/frappe/website/doctype/web_page_view/web_page_view.py +++ b/frappe/website/doctype/web_page_view/web_page_view.py @@ -10,7 +10,19 @@ class WebPageView(Document): @frappe.whitelist(allow_guest=True) -def make_view_log(path, referrer=None, browser=None, version=None, url=None, user_tz=None): +def make_view_log( + path, + referrer=None, + browser=None, + version=None, + url=None, + user_tz=None, + utm_source=None, + utm_medium=None, + utm_campaign=None, + utm_term=None, + utm_content=None, +): if not is_tracking_enabled(): return @@ -35,6 +47,11 @@ def make_view_log(path, referrer=None, browser=None, version=None, url=None, use view.time_zone = user_tz view.user_agent = user_agent view.is_unique = is_unique + view.utm_source = utm_source + view.utm_medium = utm_medium + view.utm_campaign = utm_campaign + view.utm_term = utm_term + view.utm_content = utm_content try: if frappe.flags.read_only: diff --git a/frappe/www/website_script.js b/frappe/www/website_script.js index ce9c28e9d9..391862e685 100644 --- a/frappe/www/website_script.js +++ b/frappe/www/website_script.js @@ -20,13 +20,19 @@ ga('send', 'pageview'); if (navigator.doNotTrack != 1 && !window.is_404) { frappe.ready(() => { let browser = frappe.utils.get_browser(); + let query_params = frappe.utils.get_query_params(); frappe.call("frappe.website.doctype.web_page_view.web_page_view.make_view_log", { path: location.pathname, referrer: document.referrer, browser: browser.name, version: browser.version, url: location.origin, - user_tz: Intl.DateTimeFormat().resolvedOptions().timeZone + user_tz: Intl.DateTimeFormat().resolvedOptions().timeZone, + utm_source: query_params.utm_source, + utm_medium: query_params.utm_medium, + utm_campaign: query_params.utm_campaign, + utm_term: query_params.utm_term, + utm_content: query_params.utm_content, }) }) }