Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Amazon Transcribe

livebooks/aws/transcribe.livemd

Amazon Transcribe

Mix.install([
  {:aws, "~> 0.13"},
  {:hackney, "~> 1.20"},
  {:req, "~> 0.4"},
  {:kino, "~> 0.14"}
])

クライアントの準備

access_key_id_input = Kino.Input.password("ACCESS_KEY_ID")
secret_access_key_input = Kino.Input.password("SECRET_ACCESS_KEY")
region_input = Kino.Input.text("REGION")

[
  access_key_id_input,
  secret_access_key_input,
  region_input
]
|> Kino.Layout.grid(columns: 3)
client =
  AWS.Client.create(
    Kino.Input.read(access_key_id_input),
    Kino.Input.read(secret_access_key_input),
    Kino.Input.read(region_input)
  )

バケット選択

buckets =
  client
  |> AWS.S3.list_buckets()
  |> elem(1)
  |> Map.get("ListAllMyBucketsResult")
  |> Map.get("Buckets")
  |> Map.get("Bucket")

Kino.DataTable.new(buckets)
s3_bucket_input = Kino.Input.text("S3_BUCKET")

音声ファイルアップロード

audio_input = Kino.Input.audio("Audio", format: :wav)
audio_file_path =
  audio_input
  |> Kino.Input.read()
  |> Map.get(:file_ref)
  |> Kino.Input.file_path()
bucket_name = Kino.Input.read(s3_bucket_input)
s3_key = "audio_for_transcribe/input.wav"
body = File.read!(audio_file_path)
md5 = :crypto.hash(:md5, body) |> Base.encode64()

client
|> AWS.S3.put_object(
  bucket_name,
  s3_key,
  %{"Body" => body, "ContentMD5" => md5}
)

音声認識ジョブの開始

transcription_job =
  client
  |> AWS.Transcribe.start_transcription_job(%{
    "TranscriptionJobName" => "sample-transcription-job",
    "Media" => %{
      "MediaFileUri" => "s3://#{bucket_name}/#{s3_key}"
    },
    "LanguageCode" => "ja-JP"
  })
  |> elem(1)
  |> Map.get("TranscriptionJob")
result =
  client
  |> AWS.Transcribe.get_transcription_job(%{
    "TranscriptionJobName" => transcription_job["TranscriptionJobName"]
  })
  |> elem(1)
  |> Map.get("TranscriptionJob")

音声認識結果の取得

transcript_file_uri =
  result
  |> Map.get("Transcript")
  |> Map.get("TranscriptFileUri")
transcript_json =
  transcript_file_uri
  |> Req.get!()
  |> Map.get(:body)
transcript_json["results"]["transcripts"]
|> Enum.at(0)
|> Map.get("transcript")
transcript_json["results"]["items"]
|> Enum.map(fn item ->
  top = Enum.at(item["alternatives"], 0)

  item
  |> Map.put("content", top["content"])
  |> Map.put("confidence", top["confidence"])
  |> Map.put("start_time", Map.get(item, "start_time", nil))
  |> Map.put("end_time", Map.get(item, "end_time", nil))
  |> Map.drop(["alternatives"])
end)
|> Kino.DataTable.new()

音声認識ジョブの削除

client
|> AWS.Transcribe.delete_transcription_job(%{
  "TranscriptionJobName" => transcription_job["TranscriptionJobName"]
})