S3Q1 · URL Link Analytics¶
⚡ Quick Reference
Four functions on a list of URL strings:
def count_secure_links(url_list):
return sum(1 for url in url_list if url.startswith("https://"))
def most_common_domain(url_list):
from collections import Counter
domains = [url.split("//")[1].split("/")[0] for url in url_list]
freq = Counter(domains)
max_count = max(freq.values())
# preserve first-occurrence for ties
for domain in domains:
if freq[domain] == max_count:
return domain
def extract_unique_paths(url_list):
paths = set()
for url in url_list:
after_domain = url.split("//")[1]
path = "/" + after_domain.split("/", 1)[1] if "/" in after_domain else ""
paths.add(path)
return len(paths)
def longest_url(url_list):
return max(url_list, key=len)
Key rules:
- Domain = part after // and before first /
- Path = everything from the first / after the domain (empty string if none)
- most_common_domain ties → first occurring domain
- longest_url ties → first occurring URL (max preserves first on tie)
Template Code¶
def count_secure_links(url_list: list) -> int:
'''Returns number of URLs starting with "https://".'''
...
def most_common_domain(url_list: list) -> str:
'''Returns the most frequently occurring domain.
Ties resolved by first occurrence in url_list.'''
...
def extract_unique_paths(url_list: list) -> int:
'''Returns the count of unique paths across all URLs.
URLs without a path contribute an empty string.'''
...
def longest_url(url_list: list) -> str:
'''Returns the longest URL string.
Ties resolved by first occurrence in url_list.'''
...
Problem Statement¶
Problem
Implement four URL analytics functions on a list of URL strings.
Sample data:
Expected outputs:
count_secure_links(url_list) → 2
most_common_domain(url_list) → "example.com"
extract_unique_paths(url_list) → 3
longest_url(url_list) → "https://example.com/page1"
Parsing a URL¶
"https://example.com/page1"
↑ ↑
after "//" first "/" after domain
split("//")[1] → "example.com/page1"
.split("/")[0] → "example.com" ← domain
.split("/", 1)[1] → "page1" ← path (without leading /)
Full path (with leading /): "/" + "page1" = "/page1"
URLs without a path like "https://example.com":
- split("//")[1] = "example.com"
- No / after domain → path = ""
Function 1 - count_secure_links¶
def count_secure_links(url_list: list) -> int:
return sum(1 for url in url_list if url.startswith("https://"))
From sample: "https://example.com/page1" ✅, "http://test.com/home" ❌, "https://example.com/page2" ✅ → 2 ✓
Function 2 - most_common_domain¶
def most_common_domain(url_list: list) -> str:
from collections import Counter
domains = [url.split("//")[1].split("/")[0] for url in url_list]
freq = Counter(domains)
max_count = max(freq.values())
for domain in domains: # iterate in original order for tie-breaking
if freq[domain] == max_count:
return domain
From sample: ["example.com", "test.com", "example.com"] → example.com count=2 → "example.com" ✓
Why not use Counter.most_common()?
Counter.most_common(1) returns the most frequent element, but for ties it doesn't guarantee first-appearance order. Iterating over domains in original order and returning the first with max count handles ties correctly.
Function 3 - extract_unique_paths¶
def extract_unique_paths(url_list: list) -> int:
paths = set()
for url in url_list:
after_domain = url.split("//")[1]
if "/" in after_domain:
path = "/" + after_domain.split("/", 1)[1]
else:
path = ""
paths.add(path)
return len(paths)
From sample:
- "https://example.com/page1" → /page1
- "http://test.com/home" → /home
- "https://example.com/page2" → /page2
All three are distinct → 3 ✓
Function 4 - longest_url¶
max() scans left to right and only updates on strictly longer - ties return the first occurrence.
From sample: lengths = 26, 22, 26 → first with length 26 = "https://example.com/page1" ✓
Complete solution approaches¶
from collections import Counter
def count_secure_links(url_list: list) -> int:
return sum(1 for url in url_list if url.startswith("https://"))
def most_common_domain(url_list: list) -> str:
domains = [url.split("//")[1].split("/")[0] for url in url_list]
freq = Counter(domains)
max_count = max(freq.values())
return next(d for d in domains if freq[d] == max_count)
def extract_unique_paths(url_list: list) -> int:
def get_path(url):
after = url.split("//")[1]
return "/" + after.split("/", 1)[1] if "/" in after else ""
return len(set(map(get_path, url_list)))
def longest_url(url_list: list) -> str:
return max(url_list, key=len)
def count_secure_links(url_list: list) -> int:
count = 0
for url in url_list:
if url.startswith("https://"):
count += 1
return count
def most_common_domain(url_list: list) -> str:
freq = {}
order = []
for url in url_list:
domain = url.split("//")[1].split("/")[0]
if domain not in freq:
freq[domain] = 0
order.append(domain)
freq[domain] += 1
max_count = max(freq.values())
for domain in order:
if freq[domain] == max_count:
return domain
def extract_unique_paths(url_list: list) -> int:
paths = set()
for url in url_list:
after = url.split("//")[1]
if "/" in after:
path = "/" + after.split("/", 1)[1]
else:
path = ""
paths.add(path)
return len(paths)
def longest_url(url_list: list) -> str:
best = url_list[0]
for url in url_list[1:]:
if len(url) > len(best):
best = url
return best
from collections import Counter
def count_secure_links(url_list: list) -> int:
return len(list(filter(lambda u: u.startswith("https://"), url_list)))
def most_common_domain(url_list: list) -> str:
get_domain = lambda u: u.split("//")[1].split("/")[0]
domains = list(map(get_domain, url_list))
freq = Counter(domains)
max_count = max(freq.values())
return next(filter(lambda d: freq[d] == max_count, domains))
def extract_unique_paths(url_list: list) -> int:
get_path = lambda u: ("/" + u.split("//")[1].split("/", 1)[1]
if "/" in u.split("//")[1] else "")
return len(set(map(get_path, url_list)))
def longest_url(url_list: list) -> str:
return max(url_list, key=lambda u: len(u))
Key takeaways¶
split("//")[1].split("/")[0] extracts the domain
Split on // to skip the protocol, then split on / to separate the domain from the path. Taking index [0] gives just the domain regardless of whether a path exists.
next(d for d in domains if ...) for first-occurrence tie-breaking
Iterating over domains in original order and returning the first match with max frequency correctly handles ties - no sorting or secondary key needed.
set() deduplicates paths automatically
Adding all paths to a set removes duplicates. len(set(paths)) counts unique paths in one step - no need to check for membership manually.