Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

RSS experiments

rss_experiments.livemd

RSS experiments

Mix.install([
  {:explorer, "~> 0.10.1"},
  {:fast_rss, "~> 0.5.0"},
  {:req, "~> 0.5.10"},
  {:bumblebee, "~> 0.6.0"},
  {:exla, ">= 0.0.0"},
  {:nx, "~> 0.9.0"}
])

Section

alias Req
defmodule Montage.ImageEmbedding do
  @model_repo "openai/clip-vit-base-patch32"

  def serving() do
    {:ok, clip} = Bumblebee.load_model({:hf, @model_repo},
      module: Bumblebee.Vision.ClipVision,
      architecture: :for_embedding
    )
    {:ok, featurizer} = Bumblebee.load_featurizer({:hf, @model_repo})
    Bumblebee.Vision.image_embedding(clip, featurizer,
      defn_options: [compiler: EXLA],
      embedding_processor: :l2_norm,
      output_attribute: :embedding
    )
  end

  def predict(image) do
    Nx.Serving.batched_run(__MODULE__, image)
  end
end
defmodule DeathDetector do
  # A module to detect death of a famous person from a list of RSS feeds

  # defining a type, for now is just a string maybe later it can be expanded
  @type rss_feed() :: String.t()

  @spec recent_deaths(list(rss_feed)) :: list(String.t())
  def recent_deaths(_rss_feeds) do
    # takes a list of 
    []
  end

  @spec get_rss_body(String.t()) :: list(String.t())
  def get_rss_body(url) do
    # returns 
    resp = Req.get!(url)

    case resp do
      %{status: 200, body: body} ->
        {:ok, map_of_rss} = FastRSS.parse_rss(body)

      %{status: _} ->
        IO.puts("RSS feed unreachable")
    end
  end

  @spec process_parsed_rss(map()) :: list(String.t())
  def process_parsed_rss(rss_map) do
    Map.get(rss_map, :items)
    |> Enum.map(fn x ->
      {String.contains?(x["description"], "Ukraine"), x["description"]}
    end)
    |> Enum.filter(fn {dead, _descript} -> dead end)
  end

  @spec detect_death(String, float()) :: bool()
  def detect_death(description, threshold \\ 0.8) do
    # using a small model
    model_id = "facebook/bart-large-mnli"
    {:ok, bart} = Bumblebee.load_model({:hf, model_id})
    {:ok, tokenizer_info} = Bumblebee.load_tokenizer({:hf, model_id})
    label_positive = "mentions death of a famous person"
    label_negative = "does not mention death of person by proper name"
    labels = [label_positive, label_negative]
    serving = Bumblebee.Text.zero_shot_classification(bart, tokenizer_info, labels)
    output = Nx.Serving.run(serving, description)
    IO.inspect(output)

    %{
      predictions: preds
      # %{label: "does not mention death of person by proper name", score: score},
      # %{label: "mentions death of a famous person", score: _}
    } = output

    score =
      preds
      |> Enum.find(fn x -> Map.get(x, :label) == label_positive end)
      |> Map.get(:score)

    if score >= threshold do
      true
    else
      false
    end
  end
end

url = "https://rss.nytimes.com/services/xml/rss/nyt/World.xml"
resp = Req.get!(url)
body = Map.get(resp, :body)
Nx.global_default_backend(EXLA.Backend)
DeathDetector.detect_death("Donald Trump launches a new war against the cats")
model_id = "facebook/bart-large-mnli"
{:ok, bart} = Bumblebee.load_model({:hf, model_id })
{:ok, tokenizer_info} = Bumblebee.load_tokenizer({:hf, model_id})
labels = ["mentions death of a famous", "does not mention death of famous"]
zero_shot_serving = Bumblebee.Text.zero_shot_classification(bart, tokenizer_info, labels)

output = Nx.Serving.run(zero_shot_serving, "Pope Francis died this morning")

output = Nx.Serving.run(zero_shot_serving, "The president of Venezuela has passed away")

output = Nx.Serving.run(zero_shot_serving, "Accident kill 100 passagers on board of train")
%{predictions: [%{label: "mentions death of a person", score: score_for_label1} | _]} = output
score_for_label1
url = "https://rss.nytimes.com/services/xml/rss/nyt/World.xml"
resp = Req.get!(url)
body = Map.get(resp, :body)
alias FastRSS
{:ok, map_of_rss} = FastRSS.parse_rss(body)
Map.keys(map_of_rss)
map_of_rss["items"]

Enum.at(map_of_rss["items"] , 0 )
Map.keys(Enum.at(map_of_rss["items"] , 0 ))
Enum.at(map_of_rss["items"] , 0 )["link"]

Enum.map(
  map_of_rss["items"] , fn x ->  
    { String.contains?(x["description"] , "Ukraine" ), x["description"] }
    
  end
) |> Enum.filter( fn  { dead, _descript } -> dead end )