S3Q1 · URL Link Analytics¶

⚡ Quick Reference

Four functions on a list of URL strings:

def count_secure_links(url_list):
    return sum(1 for url in url_list if url.startswith("https://"))

def most_common_domain(url_list):
    from collections import Counter
    domains = [url.split("//")[1].split("/")[0] for url in url_list]
    freq = Counter(domains)
    max_count = max(freq.values())
    # preserve first-occurrence for ties
    for domain in domains:
        if freq[domain] == max_count:
            return domain

def extract_unique_paths(url_list):
    paths = set()
    for url in url_list:
        after_domain = url.split("//")[1]
        path = "/" + after_domain.split("/", 1)[1] if "/" in after_domain else ""
        paths.add(path)
    return len(paths)

def longest_url(url_list):
    return max(url_list, key=len)

Key rules: - Domain = part after // and before first / - Path = everything from the first / after the domain (empty string if none) - most_common_domain ties → first occurring domain - longest_url ties → first occurring URL (max preserves first on tie)

Template Code¶

def count_secure_links(url_list: list) -> int:
    '''Returns number of URLs starting with "https://".'''
    ...

def most_common_domain(url_list: list) -> str:
    '''Returns the most frequently occurring domain.
    Ties resolved by first occurrence in url_list.'''
    ...

def extract_unique_paths(url_list: list) -> int:
    '''Returns the count of unique paths across all URLs.
    URLs without a path contribute an empty string.'''
    ...

def longest_url(url_list: list) -> str:
    '''Returns the longest URL string.
    Ties resolved by first occurrence in url_list.'''
    ...

Problem Statement¶

Problem

Implement four URL analytics functions on a list of URL strings.

Sample data:

url_list = [
    "https://example.com/page1",
    "http://test.com/home",
    "https://example.com/page2",
]

Expected outputs:

count_secure_links(url_list)   →  2
most_common_domain(url_list)   →  "example.com"
extract_unique_paths(url_list) →  3
longest_url(url_list)          →  "https://example.com/page1"

Parsing a URL¶

"https://example.com/page1"
         ↑          ↑
    after "//"     first "/" after domain

split("//")[1]      → "example.com/page1"
.split("/")[0]      → "example.com"          ← domain
.split("/", 1)[1]   → "page1"                ← path (without leading /)

Full path (with leading /): "/" + "page1" = "/page1"

URLs without a path like "https://example.com": - split("//")[1] = "example.com" - No / after domain → path = ""

Function 1 - `count_secure_links`¶

def count_secure_links(url_list: list) -> int:
    return sum(1 for url in url_list if url.startswith("https://"))

From sample: "https://example.com/page1" ✅, "http://test.com/home" ❌, "https://example.com/page2" ✅ → 2 ✓

Function 2 - `most_common_domain`¶

def most_common_domain(url_list: list) -> str:
    from collections import Counter
    domains = [url.split("//")[1].split("/")[0] for url in url_list]
    freq = Counter(domains)
    max_count = max(freq.values())
    for domain in domains:           # iterate in original order for tie-breaking
        if freq[domain] == max_count:
            return domain

From sample: ["example.com", "test.com", "example.com"] → example.com count=2 → "example.com" ✓

Why not use Counter.most_common()?

Counter.most_common(1) returns the most frequent element, but for ties it doesn't guarantee first-appearance order. Iterating over domains in original order and returning the first with max count handles ties correctly.

Function 3 - `extract_unique_paths`¶

def extract_unique_paths(url_list: list) -> int:
    paths = set()
    for url in url_list:
        after_domain = url.split("//")[1]
        if "/" in after_domain:
            path = "/" + after_domain.split("/", 1)[1]
        else:
            path = ""
        paths.add(path)
    return len(paths)

From sample: - "https://example.com/page1" → /page1 - "http://test.com/home" → /home - "https://example.com/page2" → /page2

All three are distinct → 3 ✓

Function 4 - `longest_url`¶

def longest_url(url_list: list) -> str:
    return max(url_list, key=len)

max() scans left to right and only updates on strictly longer - ties return the first occurrence.

From sample: lengths = 26, 22, 26 → first with length 26 = "https://example.com/page1" ✓

Complete solution approaches¶

PythonicExplanatoryUsing lambda + filter/map

from collections import Counter

def count_secure_links(url_list: list) -> int:
    return sum(1 for url in url_list if url.startswith("https://"))

def most_common_domain(url_list: list) -> str:
    domains = [url.split("//")[1].split("/")[0] for url in url_list]
    freq = Counter(domains)
    max_count = max(freq.values())
    return next(d for d in domains if freq[d] == max_count)

def extract_unique_paths(url_list: list) -> int:
    def get_path(url):
        after = url.split("//")[1]
        return "/" + after.split("/", 1)[1] if "/" in after else ""
    return len(set(map(get_path, url_list)))

def longest_url(url_list: list) -> str:
    return max(url_list, key=len)

def count_secure_links(url_list: list) -> int:
    count = 0
    for url in url_list:
        if url.startswith("https://"):
            count += 1
    return count

def most_common_domain(url_list: list) -> str:
    freq = {}
    order = []
    for url in url_list:
        domain = url.split("//")[1].split("/")[0]
        if domain not in freq:
            freq[domain] = 0
            order.append(domain)
        freq[domain] += 1
    max_count = max(freq.values())
    for domain in order:
        if freq[domain] == max_count:
            return domain

def extract_unique_paths(url_list: list) -> int:
    paths = set()
    for url in url_list:
        after = url.split("//")[1]
        if "/" in after:
            path = "/" + after.split("/", 1)[1]
        else:
            path = ""
        paths.add(path)
    return len(paths)

def longest_url(url_list: list) -> str:
    best = url_list[0]
    for url in url_list[1:]:
        if len(url) > len(best):
            best = url
    return best

from collections import Counter

def count_secure_links(url_list: list) -> int:
    return len(list(filter(lambda u: u.startswith("https://"), url_list)))

def most_common_domain(url_list: list) -> str:
    get_domain = lambda u: u.split("//")[1].split("/")[0]
    domains = list(map(get_domain, url_list))
    freq = Counter(domains)
    max_count = max(freq.values())
    return next(filter(lambda d: freq[d] == max_count, domains))

def extract_unique_paths(url_list: list) -> int:
    get_path = lambda u: ("/" + u.split("//")[1].split("/", 1)[1]
                          if "/" in u.split("//")[1] else "")
    return len(set(map(get_path, url_list)))

def longest_url(url_list: list) -> str:
    return max(url_list, key=lambda u: len(u))

Key takeaways¶

01

split("//")[1].split("/")[0] extracts the domain

Split on // to skip the protocol, then split on / to separate the domain from the path. Taking index [0] gives just the domain regardless of whether a path exists.

02

next(d for d in domains if ...) for first-occurrence tie-breaking

Iterating over domains in original order and returning the first match with max frequency correctly handles ties - no sorting or secondary key needed.

03

set() deduplicates paths automatically

Adding all paths to a set removes duplicates. len(set(paths)) counts unique paths in one step - no need to check for membership manually.