Scratchpad
Timex date parsing
Mix.install([
{:spider_man, "~> 0.3"},
{:floki, "~> 0.31"},
{:timex, "~> 3.0"}
])
{:ok, date} = Timex.parse("10 Aug", "{0D} {Mshort}")
{:ok, full_date} = Date.new(2021 + date.year, date.month, date.day)
full_date |> Date.to_string()
{:ok, date} = Timex.parse("10 Aug", "{0D} {Mshort} {YYYY}")
{:ok, date} = Timex.parse("10 Aug 2019", "{0D} {Mshort}")
{:ok, date} = Timex.parse("10 Aug 2019", "{0D} {Mshort} {YYYY}")
date |> Date.to_string()
date = "10 Aug 2019"
# date = "10 Aug"
full_date =
case Timex.parse(date, "{0D} {Mshort}") do
{:ok, date} ->
{:ok, full_date} = Date.new(2021 + date.year, date.month, date.day)
full_date |> Date.to_string()
{:error, "Expected end of input at line 1, column 6"} ->
{:ok, date} = Timex.parse(date, "{0D} {Mshort} {YYYY}")
date |> Date.to_string()
end
full_date
require Logger
# sub_title = "BrainCheck - Austin, TX"
sub_title = "Peek - US - Remote"
[company, location | others] = sub_title |> String.split(" - ")
Logger.info("company: #{company}")
Logger.info("location: #{location}")
Company parsing
company_parts = "Peek"
# company_parts = "(Remote) Peek"
# [company_name | rest] = company_parts |> String.split("(Remote) ")
# IO.inspect(company_name)
# IO.inspect(rest)
# [type, company] = Regex.run(~r/(\(Remote\)) (\w+)/, company_parts, capture: :all_but_first)
# IO.inspect(type)
# IO.inspect(company)
[workplace, company] =
case Regex.run(~r/(\(Remote\)) (.*)/, company_parts, capture: :all_but_first) do
[workplace, company] -> [workplace, company]
nil -> ["Onsite", company]
end
# [parts] = Regex.run(~r/(\w+)/, company_parts, capture: :all_but_first)
# IO.inspect(parts)
Elixir Companies - Parse Information Section
import SpiderMan.Utils
require Logger
require Floki
body = """
Technology Consulting
100starlings.com/
GitHub
Remote
Add a job
We are a cooperative of like minded professionals skilled at solving problemsand creating excellent products for our clients.We have experience with Elixir, Ruby, JavaScript, AWS, Mobile development and more.Our fully remote team is currently split between London, Berlin, and Hungary.We also have a retreat every year and a training budget.
Technology Consulting
10pines.com/
blog.10pines.com/
GitHub
Buenos Aires, Argentina
Add a job
We craft elegant software using Elixir / Phoenix among other modern technologies. Organizers and sponsors of Erlang/Elixir local conference and organizers of local Elixir meetup.
Gaming
22cans.com/
GitHub
Guildford, Surrey, United Kingdom
Add a job
22cans is an independent game studio founded by the industry legend and creator of God Game genre, Peter Molyneux. Creators of Curiosity and Godus.
Education Technology
2u.com
GitHub
Brooklyn, NY, USA
Add a job
2U powers the world’s best digital higher education. When students win, universities win, and by coming together around one shared goal—delivering great student outcomes—we are transforming the future of higher education for the better. We use Elixir, Phoenix, and Absinthe to develop high performance data pipelines, and abstractions on difficult-to-integrate third party services.
Technology Consulting
361.de
Stuttgart, Germany
Add a job
We are a digital agency based in Stuttgart, Germany. We create web and mobile apps using Elixir, Phoenix, React and React Native.
Computer Software
361.de
GitHub
Stuttgart
Add a job
We are a digital agency based in Stuttgart, Germany. We create web and mobile apps using Elixir, Phoenix, React and React Native.
Financial Services
500.co
500.co/blog/
GitHub
San Francisco, CA
Add a job
500 Startups is a venture capital firm on a mission to discover and back the world’s most talented entrepreneurs, help them create successful companies at scale, and build thriving global ecosystems. It is one of the most active venture capital firms in the world.
Mobile App & Game Development
9to5.software/
9to5.software/nieuws
Delft, The Netherlands
Add a job
9to5 is a software development company based in Delft, The Netherlands. Our company develops tailored native apps for backend, iOS, Android and web.
9to5 differentiates itself from other developers by offering all the required expertises in-house. From concept and building the API and backend, to designing the user interface and beta testing your app. Also, 9to5 can support you and help you optimise your app after launch.
By offering this development process entirely in-house less mistakes are made and the process is accelerated. This method is supported by our broad expertise on the area of design and development technics.
Customers are directly in contact with developers, without any sales or project managers in between. This direct contact allows the developers to actively think along with the customers to help bring the ideas to life.
The final result is a quick delivery of a high-end and user friendly app, that meets all the requirements of the customer.
Communication
ably.io/
blog.ably.io
GitHub
London, United Kingdom
Add a job
Ably is a realtime data stream network PaaS. Elixir is used for some of the backend realtime services, such as the realtime protocol adapter layer (https://www.ably.io/adapters)
Computer Software/Engineering
activesphere.com
activesphere.com/blog.html
GitHub
Bangalore, India
Add a job
We are a boutique software consulting company. We have built and scaled complex web-apps, visualizations and high-throughput messaging systems.
Enterprise Software
adjust.com/
big-elephants.com
GitHub
Berlin, Germany
Add a job
Mobile user attribution and advanced app analytics, combined with store stats.
Computer Software
adobe.com/
GitHub
San Jose, CA, USA
Add a job
Adobe provides digital media and digital marketing solutions. Adobe is using Elixir to build a combined client/cloud application for collaborative photography workflow.
Marketing/Sales
adroll.com
GitHub
San Francisco, CA, USA
Add a job
AdRoll is a marketing platform enabling brands of all sizes to create personalized ad campaigns based on their own website data. They use Elixir in their high-volume data processing pipeline.
Advance India Projects Limited
Printing
aipl.com/
Gurugram, Haryana
Add a job
Advance India Projects Limited (AIPL) is a leading real estate development company in India. With various residential and commercial spaces located in the prime locations of the city, one can easily buy a villa or shop in Amritsar. With premium properties like Dreamcity, Dreamcity NXT, and Celebration Mall, Advance India Projects Limited is one of the most sought-after real estate developers in Punjab.
Technology Consulting
agilion.com
GitHub
Burlington, VT, USA
Add a job
Software development and consulting.
Business Intelligence
aircloak.com/
GitHub
Berlin, Gemany and San Francisco, CA, USA
Add a job
Aircloak’s first-in-class real-time database anonymization solution provides instant privacy compliance and enables high-quality analytics for any data set and any use case. Aircloak’s from Berlin. Saša Jurić, the author of Elixir in Action is software engineer at Aircloak.
"""
{:ok, document} = Floki.parse_fragment(body)
companies =
Floki.find(document, "#company-index")
|> hd()
|> Floki.children(include_text: false)
|> Enum.filter(&match?({"div", _, _}, &1))
items =
Enum.map(companies, fn company ->
link = Floki.attribute(company, ".company .title a", "href") |> hd()
name = Floki.find(company, ".company .title a") |> Floki.text() |> String.trim()
info =
Floki.find(company, ".company .company-info p")
|> Floki.text()
|> String.trim()
|> String.split("\n")
# Trim the last 1 items from the list
parts = info |> Enum.reverse() |> tl() |> Enum.reverse()
industry = parts |> List.first() |> String.trim()
location = parts |> List.last() |> String.trim()
urls = Floki.attribute(company, ".company .company-info p a", "href")
# Trim the last item from the list
links = urls |> Enum.reverse() |> tl() |> Enum.reverse()
website_url = links |> List.first()
# Process github or blog url from the remaining list items
remaining_links = links |> tl()
github_url =
remaining_links |> Enum.filter(&String.match?(&1, ~r/github.com/)) |> List.first()
blog_url =
remaining_links
|> Enum.filter(&(String.match?(&1, ~r/github.com/) == false))
|> List.first()
description =
Floki.find(company, ".company .company-description p")
|> Floki.text()
|> String.trim()
|> String.replace(~r/\s+/, " ")
|> String.replace(~r/\t/, " ")
Logger.info("name: #{name}")
Logger.info("industry: #{industry}")
Logger.info("location: #{location}")
Logger.info("website_url: #{website_url}")
Logger.info("github_url: #{github_url}")
Logger.info("blog_url: #{blog_url}")
Logger.info("link: #{String.slice(link, 1..-1)}")
Logger.info("description: #{description}")
Logger.info("page_number: #{1}")
# industry: First element in the list
# Trim last text, always "Add a job" and pointless
# location: Last element in the list after trim
# urls = Floki.attribute(company, ".company .company-info p a", "href")
# links = urls |> Enum.reverse() |> tl() |> Enum.reverse()
# website_url: First element in the list
# github_url: Has github.com in the url (start with https://github.com)
# blog_url: Optionally > after first element and NOT github_url
# Trim last url, starting with `/en/jobs/new?`
end)
String.match?("https://github.com/w0rd-driven", ~r/github.com/)
String.match?("http://github.com/w0rd-driven", ~r/github.com/)
info = [46, 238, 64, 30, 105, 136, 98, 75, 23, 157, 11, 20]
List.delete_at(info, length(info) - 1)