Github orgs analysis
Section
orgs_structs =
Sanbase.Repo.all(Sanbase.Project.GithubOrganization) |> Sanbase.Repo.preload(:project)
orgs = orgs_structs |> Enum.map(& &1.organization)
orgs = ["caffeinepub"] ++ orgs
org_to_slug =
Map.new(orgs_structs, fn s -> {s.organization, s.project.slug} end)
|> Map.put("caffeinepub", "caffeinepub")
sql = """
SELECT
owner,
count(*) AS all,
countIf(actor like '%[bot]') AS bots,
floor(bots / all, 2) AS bot_ratio
FROM github_v2
WHERE owner IN ({{orgs}}) and dt >= now() - interval 7 day
GROUP BY owner
ORDER BY bot_ratio DESC
"""
params = %{orgs: orgs}
query_struct = Sanbase.Clickhouse.Query.new(sql, params)
{:ok, result} = Sanbase.ClickhouseRepo.query_transform(query_struct, & &1)
data = Enum.filter(result, fn [_org, total, _, _] -> total > 30 end)
|> Enum.take(150)
|> Enum.map(fn [owner, all, bot, ratio] ->
[org_to_slug[owner], owner, all, bot, ratio]
end)
NimbleCSV.RFC4180.dump_to_iodata(data)
|> IO.puts()