Skip to content

S3Q1 · Batsman Performance Analysis

⚡ Quick Reference

Five functions on cricket career data (dict of year → list of runs):

def overall_run_stats(batsman_data: dict) -> dict:
    all_runs = [r for runs in batsman_data.values() for r in runs]
    return {"min": min(all_runs), "max": max(all_runs),
            "total": sum(all_runs), "average": round(sum(all_runs) / len(all_runs))}

def century_rate(runs: list) -> int:
    return round(sum(1 for r in runs if r >= 100) / len(runs) * 100)

def average_yearly_century_rate(batsman_data: dict) -> int:
    rates = [century_rate(runs) for runs in batsman_data.values()]
    return round(sum(rates) / len(rates))

def years_with_more_than_average_yearly_century_rate(batsman_data: dict) -> set:
    avg = average_yearly_century_rate(batsman_data)
    return {year for year, runs in batsman_data.items() if century_rate(runs) > avg}

def year_with_most_average_runs(batsman_data: dict) -> int:
    return min(batsman_data, key=lambda y: (-sum(batsman_data[y])/len(batsman_data[y]), y))

Key rules: - Century = runs >= 100 - century_rate = (centuries / matches) * 100, rounded to nearest int - average_yearly_century_rate = mean of per-year century rates - years_with_more_than_average uses > (strict), not >= - year_with_most_average_runs -ties broken by earliest year


Problem Statement

Problem

Given batsman_data: dict where keys are years and values are lists of runs scored in each match that year, implement five analysis functions.

Sample data:

data = {
    2016: [88, 66, 130, 122, 117, 95, 86],
    2017: [149, 66, 110],
    2018: [157, 84],
    2019: [148, 127, 71, 117],
    2020: [91, 156, 80, 135, 152, 109]
}

Function 1 -overall_run_stats

Flatten all runs across all years into one list, then compute stats.

Call
overall_run_stats(data)
Output
{'min': 66, 'max': 157, 'total': 2456, 'average': 112}
all_runs = [r for runs in batsman_data.values() for r in runs]
# flattens: [[88,66,...], [149,66,110], ...] → [88, 66, 130, ...]

The nested comprehension for runs in batsman_data.values() for r in runs flattens the 2D structure into a single list. Then min, max, sum, and round(sum/len) give the four stats.


Function 2 -century_rate

Call
century_rate(data[2016])
Output
43

2016 runs: [88, 66, 130, 122, 117, 95, 86] Centuries (≥100): 130, 122, 117 → 3 out of 7 matches 3/7 * 100 = 42.857...round(42.857) = 43

def century_rate(runs: list) -> int:
    return round(sum(1 for r in runs if r >= 100) / len(runs) * 100)

Function 3 -average_yearly_century_rate

Call
average_yearly_century_rate(data)
Output
60

Compute century_rate for each year, then average:

Year Runs Centuries Matches Rate
2016 3 centuries 7 43
2017 2 centuries 3 67
2018 1 century 2 50
2019 3 centuries 4 75
2020 4 centuries 6 67

Average = (43+67+50+75+67) / 5 = 302/5 = 60.4round(60.4) = 60


Function 4 -years_with_more_than_average_yearly_century_rate

Call
years_with_more_than_average_yearly_century_rate(data)
Output
{2017, 2019, 2020}

Average = 60. Years with rate strictly greater than 60: - 2016: 43 ❌ - 2017: 67 ✅ - 2018: 50 ❌ - 2019: 75 ✅ - 2020: 67 ✅

Result: {2017, 2019, 2020}


Function 5 -year_with_most_average_runs

Call
year_with_most_average_runs(data)
Output
2018
Year Runs Average
2016 704 100.57
2017 325 108.33
2018 241 120.5 ← highest
2019 463 115.75
2020 723 120.5

2018 and 2020 tie at 120.5 -return the earliest2018

The tie-breaking trick: sort by (-average, year) -highest average first (negated), then earliest year for ties.


Complete solution approaches

def overall_run_stats(batsman_data: dict) -> dict:
    all_runs = [r for runs in batsman_data.values() for r in runs]
    return {
        "min":     min(all_runs),
        "max":     max(all_runs),
        "total":   sum(all_runs),
        "average": round(sum(all_runs) / len(all_runs))
    }

def century_rate(runs: list) -> int:
    return round(sum(1 for r in runs if r >= 100) / len(runs) * 100)

def average_yearly_century_rate(batsman_data: dict) -> int:
    rates = [century_rate(runs) for runs in batsman_data.values()]
    return round(sum(rates) / len(rates))

def years_with_more_than_average_yearly_century_rate(batsman_data: dict) -> set:
    avg = average_yearly_century_rate(batsman_data)
    return {year for year, runs in batsman_data.items() if century_rate(runs) > avg}

def year_with_most_average_runs(batsman_data: dict) -> int:
    return min(
        batsman_data,
        key=lambda y: (-sum(batsman_data[y]) / len(batsman_data[y]), y)
    )
def overall_run_stats(batsman_data: dict) -> dict:
    all_runs = []
    for runs in batsman_data.values():
        for r in runs:
            all_runs.append(r)
    total = sum(all_runs)
    return {
        "min":     min(all_runs),
        "max":     max(all_runs),
        "total":   total,
        "average": round(total / len(all_runs))
    }

def century_rate(runs: list) -> int:
    centuries = sum(1 for r in runs if r >= 100)
    return round(centuries / len(runs) * 100)

def average_yearly_century_rate(batsman_data: dict) -> int:
    total_rate = 0
    for runs in batsman_data.values():
        total_rate += century_rate(runs)
    return round(total_rate / len(batsman_data))

def years_with_more_than_average_yearly_century_rate(batsman_data: dict) -> set:
    avg = average_yearly_century_rate(batsman_data)
    result = set()
    for year, runs in batsman_data.items():
        if century_rate(runs) > avg:
            result.add(year)
    return result

def year_with_most_average_runs(batsman_data: dict) -> int:
    best_year = None
    best_avg = -1
    for year, runs in batsman_data.items():
        avg = sum(runs) / len(runs)
        if avg > best_avg or (avg == best_avg and year < best_year):
            best_avg = avg
            best_year = year
    return best_year
def overall_run_stats(batsman_data: dict) -> dict:
    all_runs = list(map(lambda r: r,
                   [r for runs in batsman_data.values() for r in runs]))
    total = sum(all_runs)
    return {"min": min(all_runs), "max": max(all_runs),
            "total": total, "average": round(total / len(all_runs))}

def century_rate(runs: list) -> int:
    return round(len(list(filter(lambda r: r >= 100, runs))) / len(runs) * 100)

def average_yearly_century_rate(batsman_data: dict) -> int:
    rates = list(map(lambda runs: century_rate(runs), batsman_data.values()))
    return round(sum(rates) / len(rates))

def years_with_more_than_average_yearly_century_rate(batsman_data: dict) -> set:
    avg = average_yearly_century_rate(batsman_data)
    return set(filter(lambda y: century_rate(batsman_data[y]) > avg, batsman_data))

def year_with_most_average_runs(batsman_data: dict) -> int:
    avg_runs = lambda y: sum(batsman_data[y]) / len(batsman_data[y])
    return min(batsman_data, key=lambda y: (-avg_runs(y), y))

filter(lambda r: r >= 100, runs) selects centuries. map(lambda runs: century_rate(runs), ...) applies century rate to all years. filter(lambda y: ..., batsman_data) iterates over keys checking the condition.


Key takeaways

01

Nested comprehension to flatten

[r for runs in data.values() for r in runs] flattens a dict of lists into one list. Read as: "for each year's runs, for each run in those runs".

02

Tie-breaking with tuple key

min(data, key=lambda y: (-avg(y), y)) sorts by highest average first (negated), then by earliest year for ties. Tuple comparison in Python is lexicographic -first element takes priority.

03

Reuse functions -don't recompute

average_yearly_century_rate calls century_rate, and years_with_more_than_average calls both. Build on your own functions -it reduces repetition and makes each function testable independently.