g.get_repo("pola-rs/polars") releases = repo.get_releases() data = [] for release in tqdm(releases): version = release.tag_name date = release.published_at data.append( { "Version": version, "Date": date.strftime("%Y-%m-%d"), "Month": date.strftime("%Y-%m"), "Description": release.body, } ) df = pl.DataFrame(data) release_notes = df["Description"].to_list() pattern = r"##\s*([^\n]+)\s*((?:(?!##).|\n)*)" dict_list = [] for note in release_notes: dic = {} matches = re.findall(pattern, note) for match in matches: dic[remove_emoji(match[0]).strip()] \ = len(re.findall(r"- ", match[1])) dict_list.append(dic) release_cnt_df = pl.from_dicts(dict_list) df = ( pl.concat( [df,release_cnt_df], how="horizontal" ) .filter(pl.col("Language") == "py") .drop("Description") )