RSS experiments
Mix.install([
{:explorer, "~> 0.10.1"},
{:fast_rss, "~> 0.5.0"},
{:req, "~> 0.5.10"},
{:bumblebee, "~> 0.6.0"},
{:exla, ">= 0.0.0"},
{:nx, "~> 0.9.0"}
])
Section
alias Req
defmodule Montage.ImageEmbedding do
@model_repo "openai/clip-vit-base-patch32"
def serving() do
{:ok, clip} = Bumblebee.load_model({:hf, @model_repo},
module: Bumblebee.Vision.ClipVision,
architecture: :for_embedding
)
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, @model_repo})
Bumblebee.Vision.image_embedding(clip, featurizer,
defn_options: [compiler: EXLA],
embedding_processor: :l2_norm,
output_attribute: :embedding
)
end
def predict(image) do
Nx.Serving.batched_run(__MODULE__, image)
end
end
defmodule DeathDetector do
# A module to detect death of a famous person from a list of RSS feeds
# defining a type, for now is just a string maybe later it can be expanded
@type rss_feed() :: String.t()
@spec recent_deaths(list(rss_feed)) :: list(String.t())
def recent_deaths(_rss_feeds) do
# takes a list of
[]
end
@spec get_rss_body(String.t()) :: list(String.t())
def get_rss_body(url) do
# returns
resp = Req.get!(url)
case resp do
%{status: 200, body: body} ->
{:ok, map_of_rss} = FastRSS.parse_rss(body)
%{status: _} ->
IO.puts("RSS feed unreachable")
end
end
@spec process_parsed_rss(map()) :: list(String.t())
def process_parsed_rss(rss_map) do
Map.get(rss_map, :items)
|> Enum.map(fn x ->
{String.contains?(x["description"], "Ukraine"), x["description"]}
end)
|> Enum.filter(fn {dead, _descript} -> dead end)
end
@spec detect_death(String, float()) :: bool()
def detect_death(description, threshold \\ 0.8) do
# using a small model
model_id = "facebook/bart-large-mnli"
{:ok, bart} = Bumblebee.load_model({:hf, model_id})
{:ok, tokenizer_info} = Bumblebee.load_tokenizer({:hf, model_id})
label_positive = "mentions death of a famous person"
label_negative = "does not mention death of person by proper name"
labels = [label_positive, label_negative]
serving = Bumblebee.Text.zero_shot_classification(bart, tokenizer_info, labels)
output = Nx.Serving.run(serving, description)
IO.inspect(output)
%{
predictions: preds
# %{label: "does not mention death of person by proper name", score: score},
# %{label: "mentions death of a famous person", score: _}
} = output
score =
preds
|> Enum.find(fn x -> Map.get(x, :label) == label_positive end)
|> Map.get(:score)
if score >= threshold do
true
else
false
end
end
end
url = "https://rss.nytimes.com/services/xml/rss/nyt/World.xml"
resp = Req.get!(url)
body = Map.get(resp, :body)
Nx.global_default_backend(EXLA.Backend)
DeathDetector.detect_death("Donald Trump launches a new war against the cats")
model_id = "facebook/bart-large-mnli"
{:ok, bart} = Bumblebee.load_model({:hf, model_id })
{:ok, tokenizer_info} = Bumblebee.load_tokenizer({:hf, model_id})
labels = ["mentions death of a famous", "does not mention death of famous"]
zero_shot_serving = Bumblebee.Text.zero_shot_classification(bart, tokenizer_info, labels)
output = Nx.Serving.run(zero_shot_serving, "Pope Francis died this morning")
output = Nx.Serving.run(zero_shot_serving, "The president of Venezuela has passed away")
output = Nx.Serving.run(zero_shot_serving, "Accident kill 100 passagers on board of train")
%{predictions: [%{label: "mentions death of a person", score: score_for_label1} | _]} = output
score_for_label1
url = "https://rss.nytimes.com/services/xml/rss/nyt/World.xml"
resp = Req.get!(url)
body = Map.get(resp, :body)
alias FastRSS
{:ok, map_of_rss} = FastRSS.parse_rss(body)
Map.keys(map_of_rss)
map_of_rss["items"]
Enum.at(map_of_rss["items"] , 0 )
Map.keys(Enum.at(map_of_rss["items"] , 0 ))
Enum.at(map_of_rss["items"] , 0 )["link"]
Enum.map(
map_of_rss["items"] , fn x ->
{ String.contains?(x["description"] , "Ukraine" ), x["description"] }
end
) |> Enum.filter( fn { dead, _descript } -> dead end )