Innopolis University DevOps Playground
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Follow My Reading
Manage
Activity
Members
Labels
Plan
Issues
6
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
Anton Kudryavtsev
Follow My Reading
Commits
41dc26f1
Verified
Commit
41dc26f1
authored
1 year ago
by
Anton Kudryavtsev
Browse files
Options
Downloads
Patches
Plain Diff
feat: extracting audio by given text
parent
5f664fbc
No related branches found
No related tags found
1 merge request
!46
Feature/split audio into segments by given array of phrases
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
api/v1/audio.py
+54
-0
54 additions, 0 deletions
api/v1/audio.py
api/v1/models.py
+16
-0
16 additions, 0 deletions
api/v1/models.py
core/plugins/base.py
+10
-0
10 additions, 0 deletions
core/plugins/base.py
core/task_system.py
+73
-2
73 additions, 2 deletions
core/task_system.py
with
153 additions
and
2 deletions
api/v1/audio.py
+
54
−
0
View file @
41dc26f1
...
...
@@ -8,6 +8,9 @@ from pydantic.error_wrappers import ValidationError
from
config
import
get_config
from
core.plugins.no_mem
import
get_audio_plugins
from
core
import
task_system
from
huey.api
import
Result
from
.auth
import
get_current_active_user
from
.models
import
(
...
...
@@ -17,6 +20,7 @@ from .models import (
ModelsDataReponse
,
TaskCreateResponse
,
UploadFileResponse
,
AudioExtractPhrasesRequest
,
)
from
.task_utils
import
_get_job_result
,
_get_job_status
,
create_audio_task
...
...
@@ -241,3 +245,53 @@ async def get_response(task_id: UUID) -> AudioProcessingResponse:
status_code
=
status
.
HTTP_422_UNPROCESSABLE_ENTITY
,
detail
=
"
There is no such audio processing task
"
,
)
from
error
@router.post
(
"
/extract
"
,
response_model
=
TaskCreateResponse
,
status_code
=
200
,
summary
=
"""
The endpoint `/split` extract specified phrases from given audio
file using specified given audio model
"""
,
responses
=
{
200
:
{
"
description
"
:
"
Task was successfully created and scheduled
"
},
404
:
{
"
description
"
:
"
The specified file or model was not found.
"
,
"
content
"
:
{
"
application/json
"
:
{
"
example
"
:
{
"
detail
"
:
"
No such audio file available
"
,
}
}
},
},
},
)
async
def
extract_text_from_audio
(
request
:
AudioExtractPhrasesRequest
,
)
->
TaskCreateResponse
:
"""
Parameters:
- **audio_file**: an uuid of file to process
- **audio_model**: an audio processing model name (check
'
_/models_
'
for available models)
Responses:
- 404, No such audio file available
- 404, No such audio model available
"""
audio_plugin_info
=
get_audio_plugins
().
get
(
request
.
audio_model
)
audio_file_path
=
config
.
storage
.
audio_dir
/
str
(
request
.
audio_file
)
if
audio_plugin_info
is
None
:
raise
HTTPException
(
status_code
=
status
.
HTTP_404_NOT_FOUND
,
detail
=
"
No such audio model available
"
,
)
if
not
audio_file_path
.
exists
():
raise
HTTPException
(
status_code
=
status
.
HTTP_404_NOT_FOUND
,
detail
=
"
No such audio file available
"
)
job
:
Result
=
task_system
.
extact_phrases_from_audio
(
audio_plugin_info
.
class_name
,
audio_file_path
.
as_posix
(),
request
.
phrases
)
# type: ignore
return
TaskCreateResponse
(
task_id
=
UUID
(
job
.
id
))
This diff is collapsed.
Click to expand it.
api/v1/models.py
+
16
−
0
View file @
41dc26f1
...
...
@@ -112,3 +112,19 @@ class AudioTextComparisonResultsResponse(BaseModel):
class
MultipleTasksStatusResponse
(
BaseModel
):
data
:
List
[
TaskStatusResponse
]
class
AudioExtractPhrasesRequest
(
BaseModel
):
audio_file
:
UUID
audio_model
:
str
phrases
:
List
[
str
]
class
AudioPhrase
(
BaseModel
):
audio_segment
:
AudioChunk
|
None
found
:
bool
phrase
:
str
class
AudioExtractPhrasesResponse
(
BaseModel
):
data
:
List
[
AudioPhrase
]
This diff is collapsed.
Click to expand it.
core/plugins/base.py
+
10
−
0
View file @
41dc26f1
...
...
@@ -71,6 +71,16 @@ class AudioToTextComparisonResponse(BaseModel):
errors
:
List
[
TextDiff
]
class
AudioPhrase
(
BaseModel
):
audio_segment
:
AudioSegment
|
None
found
:
bool
phrase
:
str
class
AudioExtractPhrasesResponse
(
BaseModel
):
data
:
List
[
AudioPhrase
]
@runtime_checkable
class
BasePlugin
(
Protocol
):
"""
...
...
This diff is collapsed.
Click to expand it.
core/task_system.py
+
73
−
2
View file @
41dc26f1
import
logging
from
typing
import
Any
,
Dict
from
typing
import
Any
,
Dict
,
Tuple
,
List
from
huey
import
RedisHuey
from
typing
import
List
...
...
@@ -17,10 +17,14 @@ from core.plugins.base import (
AudioToImageComparisonResponse
,
AudioToTextComparisonResponse
,
TextDiff
,
AudioPhrase
,
AudioExtractPhrasesResponse
,
AudioProcessingFunction
,
)
from
core.plugins.loader
import
PluginInfo
from
core.processing.audio_split
import
split_audio
from
core.processing.text
import
match_phrases
from
core.processing.text
import
match_phrases
,
find_phrases
scheduler
=
RedisHuey
()
...
...
@@ -232,3 +236,70 @@ def _get_image_plugins() -> Dict[str, PluginInfo]:
loaded into the worker image plugins.
"""
return
IMAGE_PLUGINS
def
_extact_phrases_from_audio
(
audio_class
:
str
,
audio_path
:
str
,
phrases
:
List
[
str
]
)
->
AudioExtractPhrasesResponse
:
# extract text from audio
audio_processing_result
=
_audio_process
(
audio_class
,
AudioProcessingFunction
,
audio_path
)
audio_segments
=
audio_processing_result
.
segments
extracted_phrases
=
[
s
.
text
for
s
in
audio_segments
]
# intermediate results
intervals
:
List
[
Tuple
[
float
,
float
]
|
None
]
=
[]
audio_chunks
:
List
[
AudioSegment
|
None
]
=
[]
# search each phrase
for
search_phrase
in
phrases
:
segment_indexes
=
find_phrases
(
extracted_phrases
,
search_phrase
)
if
len
(
segment_indexes
)
==
0
:
intervals
.
append
(
None
)
audio_chunks
.
append
(
None
)
continue
# join segments
start
=
audio_segments
[
segment_indexes
[
0
]].
start
end
=
audio_segments
[
segment_indexes
[
-
1
]].
end
joined_segments
=
audio_segments
[
segment_indexes
[
0
]]
for
index
in
segment_indexes
[
1
:]:
joined_segments
.
text
+=
"
"
+
audio_segments
[
index
].
text
joined_segments
.
start
=
start
joined_segments
.
end
=
end
intervals
.
append
((
start
,
end
))
audio_chunks
.
append
(
joined_segments
)
# split by non-none intervals
non_none_intevals
:
List
[
Tuple
[
float
,
float
]]
=
list
(
filter
(
lambda
x
:
x
is
not
None
,
intervals
)
# type: ignore
)
files
=
split_audio
(
audio_path
,
non_none_intevals
)
# assign splitted files
index
=
0
for
segment
in
audio_chunks
:
if
segment
is
not
None
:
segment
.
file
=
files
[
index
]
index
+=
1
data
:
List
[
AudioPhrase
]
=
[
AudioPhrase
(
audio_segment
=
segment
,
found
=
segment
is
not
None
,
phrase
=
phrases
[
index
]
)
for
index
,
segment
in
enumerate
(
audio_chunks
)
]
return
AudioExtractPhrasesResponse
(
data
=
data
)
@scheduler.task
()
def
extact_phrases_from_audio
(
audio_class
:
str
,
audio_path
:
str
,
phrases
:
List
[
str
]
)
->
AudioExtractPhrasesResponse
:
return
_extact_phrases_from_audio
(
audio_class
,
audio_path
,
phrases
)
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment