Module heyvi.system
Expand source code Browse git
import os
import vipy
import gc
import torch
import heyvi.recognition
import heyvi.detection
import heyvi.version
import heyvi.label
import contextlib
import itertools
import pycollector.version
from heyvi.util import timestamp
class YoutubeLive():
"""Youtube Live stream.
>>> Y = heyvi.system.YoutubeLive(encoder='480p')
>>> v = heyvi.sensor.rtsp()
>>> Y(v)
Args:
encoder [str]['480p, '720p', '360p']: The encoder settings for the youtube live stream
fps [float]: The framerate in frames per second of the output stream.
streamkey [str]: The youtube live key (https://support.google.com/youtube/answer/9854503?hl=en), or set as envronment variable VIPY_YOUTUBE_STREAMKEY
"""
def __init__(self, streamkey=None, url='rtmp://a.rtmp.youtube.com/live2', fps=30, encoder='480p'):
assert streamkey is not None or 'VIPY_YOUTUBE_STREAMKEY' in os.environ
streamkey = streamkey if streamkey is not None else os.environ['VIPY_YOUTUBE_STREAMKEY']
# https://support.google.com/youtube/answer/2853702?hl=en#zippy=%2Cp
self._encoder_recommended = {'720p':{'width':1280, 'height':720, 'bitrate': '4000k'},
'480p':{'width':854, 'height':480, 'bitrate': '1000k'},
'360p':{'width':640, 'height':360, 'bitrate': '1000k'}}
assert encoder in self._encoder_recommended
self._encoder = self._encoder_recommended[encoder]
self._url = '%s/%s' % (url, streamkey)
assert vipy.util.isurl(self._url)
self._vo = vipy.video.Scene(url=self._url, framerate=fps)
def __repr__(self):
return '<heyvi.system.YoutubeLive: url=%s, framerate=%2.1f>' % (str(self._vo.url()), self._vo.framerate())
def __enter__(self):
(h,w,br) = (self._encoder['height'], self._encoder['width'], self._encoder['bitrate'])
self._vs = self._vo.stream(write=True, bitrate=br)
return lambda im: self._vs.write(im.rgb() if im.shape() == (h,w) else im.rgb().resize(height=h, width=w)) # quiet anisotropic resize to stream dimensions
def __exit__(self, type, value, tb):
self._vs.__exit__(type, value, tb)
def __call__(self, vi, verbose=True):
assert isinstance(vi, vipy.video.Scene)
(h,w,fps) = (self._encoder['height'], self._encoder['width'], self._vo.framerate())
with self as s:
for (k,im) in enumerate(vi.framerate(fps).resize(height=h, width=w)):
if verbose:
print('[heyvi.system.YoutubeLive][%s][%d]: %s' % (timestamp(), k,im), end='\r')
s(im) # write frame to live stream
return self
class Recorder():
"""Record a livestream to an output video file
This will record an out streaming to the provided outfile
>>> v = vipy.video.Scene(url='rtsp://...', framerate=30)
>>> R = Recorder('/tmp/out.mp4', framerate=5)
>>> R(v, seconds=60*60)
To buffer to memory, you do not need this recorder, use (for small durations):
>>> v = v.duration(seconds=3).load().saveas('/tmp/out.mp4')
This will record three seconds from the provided RTSP stream and save in the usual way to the output file
To record frame by frame:
>>> v = vipy.video.RandomScene()
>>> with Recorder('out.mp4') as r:
>>> for im in v:
>>> r(im.annotate().rgb()) # write individual frames from video v
"""
def __init__(self, outfile, fps=30, overwrite=False):
assert vipy.util.isvideo(outfile)
self._vo = vipy.video.Scene(filename=outfile, framerate=fps)
self._overwrite = overwrite
def __enter__(self):
self._vs = self._vo.stream(write=True, overwrite=self._overwrite)
return lambda im: self._vs.write(im.rgb())
def __exit__(self, type, value, tb):
self._vs.__exit__(type, value, tb)
def __repr__(self):
return '<heyvi.system.Recorder: %s>' % str(self._vo)
def __call__(self, vi, seconds=None, verbose=True):
assert isinstance(vi, vipy.video.Scene)
vi = vi if seconds is None else vi.clone().duration(seconds=seconds)
vi = vi.framerate(self._vo.framerate())
with self._vo.stream(overwrite=True) as s:
for (k,im) in enumerate(vi.stream()):
if verbose:
print('[heyvi.system.Recorder][%s][%d]: %s' % (timestamp(), k, im), end='\r')
s.write(im)
return self._vo
class Tracker():
"""heyvi.system.Tracker() class. Run a video object tracker on a video or live stream.
To run on a livestream:
```python
v = heyvi.sensor.rtsp()
T = heyvi.system.Tracker()
with heyvi.system.YoutubeLive(fps=5, encoder='480p') as s:
T(v, frame_callback=lambda im: s(im.pixelize().annotate(fontsize=15, timestamp=heyvi.util.timestamp(), timestampoffset=(6,10))), minconf=0.5)
```
To run on an input file as a batch:
```python
v = vipy.video.Scene(filename=/path/to/infile.mp4', framerate=5)
T = heyvi.system.Tracker()
v_tracked = T(v)
v_tracked.annotate('annotation.mp4')
```
To stream tracks computed per frame
```python
vi = vipy.video.Scene(filename=/path/to/infile.mp4', framerate=5)
for (f,vo) in enumerate(T.stream(vi)):
print(vo) # tracking result at frame f
```
To stream tracks computed per frame, along with pixels for current frame
```python
vi = vipy.video.Scene(filename=/path/to/infile.mp4', framerate=5)
for (f,(im,vo)) in enumerate(zip(vi, T.stream(vi)))
print(vo) # tracking result at frame f
print(im) # `vipy.image.Image` with pixels available as im.numpy()
```
To stream tracks computed per frame, along with the most recent video clip of length 16:
```python
vi = vipy.video.Scene(filename=/path/to/infile.mp4', framerate=5)
for (f,(vc,vo)) in enumerate(zip(vi.stream().clip(16), T.stream(vi)))
print(vo) # tracking result at frame f
print(vc) # `vipy.video.Scene` with pixels for clips of length 16
```
For additional use cases for streaming batches, clips, frames, delays see the [vipy documentation](https://visym.github.io/vipy)
Returns:
`vipy.video.Scene` objects with tracks corresponding to objects in `heyvi.detection.MultiscaleVideoTracker.classlist`. Object tracks are "person", "vehicle", "bicycle".
"""
def __init__(self, verbose=False):
assert vipy.version.is_at_least('1.11.11')
assert heyvi.version.is_at_least('0.0.5')
assert torch.cuda.device_count() >= 4
objects = ['person', ('car','vehicle'), ('truck','vehicle'), ('bus', 'vehicle'), 'bicycle'] # merge truck/bus/car to vehicle
self._tracker = heyvi.detection.MultiscaleVideoTracker(gpu=[0,1,2,3], batchsize=9, minconf=0.05, trackconf=0.2, maxhistory=5, objects=objects, overlapfrac=0, gate=64, detbatchsize=None)
self._verbose = verbose
def __call__(self, vi, frame_callback=None):
"""Batch tracking of input video file"""
assert isinstance(vi, vipy.video.Scene)
for (k, (im,v)) in enumerate(zip(vi.stream(buffered=True).frame(delay=5), self._tracker(vi, stride=3, buffered=vi.islive()))):
if callable(frame_callback) and im is not None:
frame_callback(im)
if self._verbose and v is not None:
print('[heyvi.system.Tracker][%s][%d]: %s' % (timestamp(), k, str(v)+' '*100), end='\r')
return vi
def stream(self, vi):
"""Tracking iterator of input video"""
for (k, (im,v)) in enumerate(zip(vi.stream(buffered=True).frame(delay=5), self._tracker(vi, stride=3, buffered=vi.islive()))):
yield v
class Actev21():
"""heyvi.system.Actev21() class
Real time activity detection for the 37 MEVA (https://mevadata.org) activity classes
>>> v = heyvi.sensor.rtsp().framerate(5)
>>> S = heyvi.system.Actev21()
>>> with heyvi.system.YoutubeLive(fps=5, encoder='480p') as s:
>>> S(v, frame_callback=lambda im, imraw, v: s(im), minconf=0.2)
"""
def __init__(self):
assert vipy.version.is_at_least('1.11.11')
assert heyvi.version.is_at_least('0.0.5')
assert torch.cuda.device_count() >= 4
self._activitymodel = vipy.downloader.downloadif('https://dl.dropboxusercontent.com/s/ntvjg352b0fwnah/mlfl_v5_epoch_41-step_59279.ckpt',
vipy.util.tocache('mlfl_v5_epoch_41-step_59279.ckpt'), # set VIPY_CACHE env
sha1='c4457e5b2e4fa1462d552070c47cac9eb2833e47')
self._annotator = lambda im, f=vipy.image.mutator_show_trackindex_verbonly(confidence=True): f(im).annotate(timestamp=heyvi.util.timestamp(), timestampoffset=(6,10), fontsize=15).rgb()
def __call__(self, vi, vs=None, minconf=0.04, verbose=True, frame_callback=None, livestream=False):
assert isinstance(vi, vipy.video.Scene)
assert vs is None or (isinstance(vs, vipy.video.Stream) and vs.framerate() == 5)
livedelay = 2*15*5 if vi.islive() or livestream else 5
objects = ['person', ('car','vehicle'), ('truck','vehicle'), ('bus', 'vehicle'), 'bicycle'] # merge truck/bus/car to vehicle, no motorcycles
track = heyvi.detection.MultiscaleVideoTracker(gpu=[0,1,2,3], batchsize=9, minconf=0.05, trackconf=0.2, maxhistory=5, objects=objects, overlapfrac=6, gate=64, detbatchsize=None)
activities = list(heyvi.label.pip_plus_meva_to_meva.items())
detect = heyvi.recognition.Actev21_AD(gpus=[0,1,2,3], batchsize=64, modelfile=self._activitymodel, stride=3, activities=activities) # stride should match tracker stride 4->3
gc.disable()
(srcdim, srcfps) = (vi.mindim(), vi.framerate())
vs = vs if vs is not None else contextlib.nullcontext()
vi = vi.mindim(960).framerate(5)
for (f, (im,vi)) in enumerate(zip(vi.stream(buffered=True).frame(delay=livedelay), # live stream delay (must be >= 2x finalized period)
detect(track(vi, stride=3, buffered=vi.islive()),
mirror=False, trackconf=0.2, minprob=minconf, maxdets=105, avgdets=70, throttle=True, activityiou=0.1, buffered=vi.islive(), finalized=(livedelay//2) if vi.islive() or livestream else True))):
if callable(frame_callback) and im is not None:
frame_callback(self._annotator(im.clone()), im, vi)
if verbose:
print('[heyvi.system.Actev21][%s][%d]: %s' % (timestamp(), f, vi), end='\r')
vi.activityfilter(lambda a: a.category() not in ['person', 'person_walks', 'vehicle', 'car_moves']) # remove background activities
vo = vi.framerate(srcfps) # upsample tracks/activities back to source framerate
vo = vo.mindim(srcdim) # upscale tracks back to source resolution
gc.enable()
return vo
def annotate(self, v, outfile, minconf=0.1, trackonly=False, nounonly=False, mindim=512):
return (v.mindim(mindim).activityfilter(lambda a: a.confidence() >= float(minconf))
.annotate(mutator=vipy.image.mutator_show_trackindex_verbonly(confidence=True) if (not trackonly and not nounonly) else (vipy.image.mutator_show_trackonly() if trackonly else vipy.image.mutator_show_nounonly(nocaption=True)),
timestamp=True,
fontsize=6,
outfile=outfile)) # colored boxes by track id, activity captions with confidence, 5Hz, 512x(-1) resolution
class CAP():
"""heyvi.system.CAP() class
Real time activity detection for the 512 CAP (https://visym.github.io.cap) activity classes
"""
def __init__(self, modelfile, labelset='cap', verbose=True):
assert vipy.version.is_at_least('1.12.4')
assert heyvi.version.is_at_least('0.2.26')
assert pycollector.version.is_at_least('0.4.2')
assert torch.cuda.device_count() >= 4
self._unitnorm = False
self._verbose = verbose
self._activitymodel = modelfile # local testing only
self._labelset = labelset
self._annotator = lambda im, f=vipy.image.mutator_show_trackindex_verbonly(confidence=True): f(im).annotate(timestamp=heyvi.util.timestamp(), timestampoffset=(6,10), fontsize=15).rgb()
self._objects = ['person', ('car','vehicle'), ('truck','vehicle'), ('bus', 'vehicle'), 'bicycle'] # merge truck/bus/car to vehicle, no motorcycles
self._tracker = heyvi.detection.MultiscaleVideoTracker(gpu=[0,1,2,3], batchsize=9, minconf=0.05, trackconf=0.2, maxhistory=5, objects=self._objects, overlapfrac=6, gate=64, detbatchsize=None)
self._ad = heyvi.recognition.CAP_AD(gpus=[0,1,2,3], batchsize=64, modelfile=self._activitymodel, stride=3, unitnorm=self._unitnorm, labelset=self._labelset, verbose=self._verbose) # stride should match tracker stride 4->3
def __call__(self, vi, minconf=0.04, frame_callback=None, livestream=False, mintracklen=None, finalized=True):
assert isinstance(vi, vipy.video.Scene)
(srcdim, srcfps, detect, track, livedelay) = (vi.mindim(), vi.framerate(), self._ad, self._tracker, 2*15*5 if vi.islive() or livestream else 5)
gc.disable()
vi = vi.mindim(960).framerate(5)
for (f, (im,vi)) in enumerate(zip(vi.stream(buffered=True).frame(delay=livedelay), # live stream delay (must be >= 2x finalized period)
detect(track(vi, stride=3, buffered=vi.islive()),
mirror=False, trackconf=0.2, minprob=minconf, maxdets=105, avgdets=70, throttle=True, activityiou=0.1, buffered=vi.islive(), finalized=(livedelay//2) if vi.islive() or livestream else finalized, mintracklen=mintracklen))):
if callable(frame_callback) and im is not None:
frame_callback(self._annotator(im.clone()), im, vi)
if self._verbose:
print('[heyvi.system.CAP][%s][%d]: %s' % (heyvi.util.timestamp(), f, vi), end='\r')
vi.activityfilter(lambda a: a.category() not in ['person', 'person_walks', 'vehicle', 'car_moves']) # remove background activities
vo = vi.framerate(srcfps).clone() # upsample tracks/activities back to source framerate
vo = vo.mindim(srcdim) # upscale tracks back to source resolution
gc.enable()
return vo
def annotate(self, v, outfile, minconf=0.1, trackonly=False, nounonly=False, mindim=512):
return (v.mindim(mindim).activityfilter(lambda a: a.confidence() >= float(minconf))
.annotate(mutator=vipy.image.mutator_show_trackindex_verbonly(confidence=True) if (not trackonly and not nounonly) else (vipy.image.mutator_show_trackonly() if trackonly else vipy.image.mutator_show_nounonly(nocaption=True)),
timestamp=True,
fontsize=6,
outfile=outfile)) # colored boxes by track id, activity captions with confidence, 5Hz, 512x(-1) resolution
def detect(self, vi, minconf=0.01):
assert isinstance(vi, vipy.video.Scene)
return self.__call__(vi.clone().clear(), minconf=minconf, finalized=False)
def classify(self, vi, minconf=0.001, topk=5, repeat=1):
assert isinstance(vi, vipy.video.Scene)
v = vi.clone().clear().framerate(5)
v = v.load().fromframes([vj for k in range(repeat) for vj in v.framelist()], copy=True) if repeat>0 else v # repeat to achieve minimums
vo = self.__call__(v, minconf=minconf, finalized=False)
aid = set({a.category():a.id() for a in sorted(vo.activitylist(), key=lambda a: a.confidence())}.values()) # keep highest confidence activity id deduped per class
ai = set([a.id() for a in sorted(vo.activitylist(), key=lambda a: a.confidence()) if a.id() in aid][-topk:]) if topk is not None else aid # top-k unique activity id
return vo.flush().activityfilter(lambda a: a.id() in ai).activities({a.id():a for a in sorted(vo.activities().values(), key=lambda a: a.confidence(), reverse=True)}) # reordered for primary_activity() highest confidence
Classes
class Actev21
-
heyvi.system.Actev21() class
Real time activity detection for the 37 MEVA (https://mevadata.org) activity classes
>>> v = heyvi.sensor.rtsp().framerate(5) >>> S = heyvi.system.Actev21() >>> with heyvi.system.YoutubeLive(fps=5, encoder='480p') as s: >>> S(v, frame_callback=lambda im, imraw, v: s(im), minconf=0.2)
Expand source code Browse git
class Actev21(): """heyvi.system.Actev21() class Real time activity detection for the 37 MEVA (https://mevadata.org) activity classes >>> v = heyvi.sensor.rtsp().framerate(5) >>> S = heyvi.system.Actev21() >>> with heyvi.system.YoutubeLive(fps=5, encoder='480p') as s: >>> S(v, frame_callback=lambda im, imraw, v: s(im), minconf=0.2) """ def __init__(self): assert vipy.version.is_at_least('1.11.11') assert heyvi.version.is_at_least('0.0.5') assert torch.cuda.device_count() >= 4 self._activitymodel = vipy.downloader.downloadif('https://dl.dropboxusercontent.com/s/ntvjg352b0fwnah/mlfl_v5_epoch_41-step_59279.ckpt', vipy.util.tocache('mlfl_v5_epoch_41-step_59279.ckpt'), # set VIPY_CACHE env sha1='c4457e5b2e4fa1462d552070c47cac9eb2833e47') self._annotator = lambda im, f=vipy.image.mutator_show_trackindex_verbonly(confidence=True): f(im).annotate(timestamp=heyvi.util.timestamp(), timestampoffset=(6,10), fontsize=15).rgb() def __call__(self, vi, vs=None, minconf=0.04, verbose=True, frame_callback=None, livestream=False): assert isinstance(vi, vipy.video.Scene) assert vs is None or (isinstance(vs, vipy.video.Stream) and vs.framerate() == 5) livedelay = 2*15*5 if vi.islive() or livestream else 5 objects = ['person', ('car','vehicle'), ('truck','vehicle'), ('bus', 'vehicle'), 'bicycle'] # merge truck/bus/car to vehicle, no motorcycles track = heyvi.detection.MultiscaleVideoTracker(gpu=[0,1,2,3], batchsize=9, minconf=0.05, trackconf=0.2, maxhistory=5, objects=objects, overlapfrac=6, gate=64, detbatchsize=None) activities = list(heyvi.label.pip_plus_meva_to_meva.items()) detect = heyvi.recognition.Actev21_AD(gpus=[0,1,2,3], batchsize=64, modelfile=self._activitymodel, stride=3, activities=activities) # stride should match tracker stride 4->3 gc.disable() (srcdim, srcfps) = (vi.mindim(), vi.framerate()) vs = vs if vs is not None else contextlib.nullcontext() vi = vi.mindim(960).framerate(5) for (f, (im,vi)) in enumerate(zip(vi.stream(buffered=True).frame(delay=livedelay), # live stream delay (must be >= 2x finalized period) detect(track(vi, stride=3, buffered=vi.islive()), mirror=False, trackconf=0.2, minprob=minconf, maxdets=105, avgdets=70, throttle=True, activityiou=0.1, buffered=vi.islive(), finalized=(livedelay//2) if vi.islive() or livestream else True))): if callable(frame_callback) and im is not None: frame_callback(self._annotator(im.clone()), im, vi) if verbose: print('[heyvi.system.Actev21][%s][%d]: %s' % (timestamp(), f, vi), end='\r') vi.activityfilter(lambda a: a.category() not in ['person', 'person_walks', 'vehicle', 'car_moves']) # remove background activities vo = vi.framerate(srcfps) # upsample tracks/activities back to source framerate vo = vo.mindim(srcdim) # upscale tracks back to source resolution gc.enable() return vo def annotate(self, v, outfile, minconf=0.1, trackonly=False, nounonly=False, mindim=512): return (v.mindim(mindim).activityfilter(lambda a: a.confidence() >= float(minconf)) .annotate(mutator=vipy.image.mutator_show_trackindex_verbonly(confidence=True) if (not trackonly and not nounonly) else (vipy.image.mutator_show_trackonly() if trackonly else vipy.image.mutator_show_nounonly(nocaption=True)), timestamp=True, fontsize=6, outfile=outfile)) # colored boxes by track id, activity captions with confidence, 5Hz, 512x(-1) resolution
Methods
def annotate(self, v, outfile, minconf=0.1, trackonly=False, nounonly=False, mindim=512)
-
Expand source code Browse git
def annotate(self, v, outfile, minconf=0.1, trackonly=False, nounonly=False, mindim=512): return (v.mindim(mindim).activityfilter(lambda a: a.confidence() >= float(minconf)) .annotate(mutator=vipy.image.mutator_show_trackindex_verbonly(confidence=True) if (not trackonly and not nounonly) else (vipy.image.mutator_show_trackonly() if trackonly else vipy.image.mutator_show_nounonly(nocaption=True)), timestamp=True, fontsize=6, outfile=outfile)) # colored boxes by track id, activity captions with confidence, 5Hz, 512x(-1) resolution
class CAP (modelfile, labelset='cap', verbose=True)
-
heyvi.system.CAP() class
Real time activity detection for the 512 CAP (https://visym.github.io.cap) activity classes
Expand source code Browse git
class CAP(): """heyvi.system.CAP() class Real time activity detection for the 512 CAP (https://visym.github.io.cap) activity classes """ def __init__(self, modelfile, labelset='cap', verbose=True): assert vipy.version.is_at_least('1.12.4') assert heyvi.version.is_at_least('0.2.26') assert pycollector.version.is_at_least('0.4.2') assert torch.cuda.device_count() >= 4 self._unitnorm = False self._verbose = verbose self._activitymodel = modelfile # local testing only self._labelset = labelset self._annotator = lambda im, f=vipy.image.mutator_show_trackindex_verbonly(confidence=True): f(im).annotate(timestamp=heyvi.util.timestamp(), timestampoffset=(6,10), fontsize=15).rgb() self._objects = ['person', ('car','vehicle'), ('truck','vehicle'), ('bus', 'vehicle'), 'bicycle'] # merge truck/bus/car to vehicle, no motorcycles self._tracker = heyvi.detection.MultiscaleVideoTracker(gpu=[0,1,2,3], batchsize=9, minconf=0.05, trackconf=0.2, maxhistory=5, objects=self._objects, overlapfrac=6, gate=64, detbatchsize=None) self._ad = heyvi.recognition.CAP_AD(gpus=[0,1,2,3], batchsize=64, modelfile=self._activitymodel, stride=3, unitnorm=self._unitnorm, labelset=self._labelset, verbose=self._verbose) # stride should match tracker stride 4->3 def __call__(self, vi, minconf=0.04, frame_callback=None, livestream=False, mintracklen=None, finalized=True): assert isinstance(vi, vipy.video.Scene) (srcdim, srcfps, detect, track, livedelay) = (vi.mindim(), vi.framerate(), self._ad, self._tracker, 2*15*5 if vi.islive() or livestream else 5) gc.disable() vi = vi.mindim(960).framerate(5) for (f, (im,vi)) in enumerate(zip(vi.stream(buffered=True).frame(delay=livedelay), # live stream delay (must be >= 2x finalized period) detect(track(vi, stride=3, buffered=vi.islive()), mirror=False, trackconf=0.2, minprob=minconf, maxdets=105, avgdets=70, throttle=True, activityiou=0.1, buffered=vi.islive(), finalized=(livedelay//2) if vi.islive() or livestream else finalized, mintracklen=mintracklen))): if callable(frame_callback) and im is not None: frame_callback(self._annotator(im.clone()), im, vi) if self._verbose: print('[heyvi.system.CAP][%s][%d]: %s' % (heyvi.util.timestamp(), f, vi), end='\r') vi.activityfilter(lambda a: a.category() not in ['person', 'person_walks', 'vehicle', 'car_moves']) # remove background activities vo = vi.framerate(srcfps).clone() # upsample tracks/activities back to source framerate vo = vo.mindim(srcdim) # upscale tracks back to source resolution gc.enable() return vo def annotate(self, v, outfile, minconf=0.1, trackonly=False, nounonly=False, mindim=512): return (v.mindim(mindim).activityfilter(lambda a: a.confidence() >= float(minconf)) .annotate(mutator=vipy.image.mutator_show_trackindex_verbonly(confidence=True) if (not trackonly and not nounonly) else (vipy.image.mutator_show_trackonly() if trackonly else vipy.image.mutator_show_nounonly(nocaption=True)), timestamp=True, fontsize=6, outfile=outfile)) # colored boxes by track id, activity captions with confidence, 5Hz, 512x(-1) resolution def detect(self, vi, minconf=0.01): assert isinstance(vi, vipy.video.Scene) return self.__call__(vi.clone().clear(), minconf=minconf, finalized=False) def classify(self, vi, minconf=0.001, topk=5, repeat=1): assert isinstance(vi, vipy.video.Scene) v = vi.clone().clear().framerate(5) v = v.load().fromframes([vj for k in range(repeat) for vj in v.framelist()], copy=True) if repeat>0 else v # repeat to achieve minimums vo = self.__call__(v, minconf=minconf, finalized=False) aid = set({a.category():a.id() for a in sorted(vo.activitylist(), key=lambda a: a.confidence())}.values()) # keep highest confidence activity id deduped per class ai = set([a.id() for a in sorted(vo.activitylist(), key=lambda a: a.confidence()) if a.id() in aid][-topk:]) if topk is not None else aid # top-k unique activity id return vo.flush().activityfilter(lambda a: a.id() in ai).activities({a.id():a for a in sorted(vo.activities().values(), key=lambda a: a.confidence(), reverse=True)}) # reordered for primary_activity() highest confidence
Methods
def annotate(self, v, outfile, minconf=0.1, trackonly=False, nounonly=False, mindim=512)
-
Expand source code Browse git
def annotate(self, v, outfile, minconf=0.1, trackonly=False, nounonly=False, mindim=512): return (v.mindim(mindim).activityfilter(lambda a: a.confidence() >= float(minconf)) .annotate(mutator=vipy.image.mutator_show_trackindex_verbonly(confidence=True) if (not trackonly and not nounonly) else (vipy.image.mutator_show_trackonly() if trackonly else vipy.image.mutator_show_nounonly(nocaption=True)), timestamp=True, fontsize=6, outfile=outfile)) # colored boxes by track id, activity captions with confidence, 5Hz, 512x(-1) resolution
def classify(self, vi, minconf=0.001, topk=5, repeat=1)
-
Expand source code Browse git
def classify(self, vi, minconf=0.001, topk=5, repeat=1): assert isinstance(vi, vipy.video.Scene) v = vi.clone().clear().framerate(5) v = v.load().fromframes([vj for k in range(repeat) for vj in v.framelist()], copy=True) if repeat>0 else v # repeat to achieve minimums vo = self.__call__(v, minconf=minconf, finalized=False) aid = set({a.category():a.id() for a in sorted(vo.activitylist(), key=lambda a: a.confidence())}.values()) # keep highest confidence activity id deduped per class ai = set([a.id() for a in sorted(vo.activitylist(), key=lambda a: a.confidence()) if a.id() in aid][-topk:]) if topk is not None else aid # top-k unique activity id return vo.flush().activityfilter(lambda a: a.id() in ai).activities({a.id():a for a in sorted(vo.activities().values(), key=lambda a: a.confidence(), reverse=True)}) # reordered for primary_activity() highest confidence
def detect(self, vi, minconf=0.01)
-
Expand source code Browse git
def detect(self, vi, minconf=0.01): assert isinstance(vi, vipy.video.Scene) return self.__call__(vi.clone().clear(), minconf=minconf, finalized=False)
class Recorder (outfile, fps=30, overwrite=False)
-
Record a livestream to an output video file
This will record an out streaming to the provided outfile
>>> v = vipy.video.Scene(url='rtsp://...', framerate=30) >>> R = Recorder('/tmp/out.mp4', framerate=5) >>> R(v, seconds=60*60)
To buffer to memory, you do not need this recorder, use (for small durations):
>>> v = v.duration(seconds=3).load().saveas('/tmp/out.mp4')
This will record three seconds from the provided RTSP stream and save in the usual way to the output file
To record frame by frame:
>>> v = vipy.video.RandomScene() >>> with Recorder('out.mp4') as r: >>> for im in v: >>> r(im.annotate().rgb()) # write individual frames from video v
Expand source code Browse git
class Recorder(): """Record a livestream to an output video file This will record an out streaming to the provided outfile >>> v = vipy.video.Scene(url='rtsp://...', framerate=30) >>> R = Recorder('/tmp/out.mp4', framerate=5) >>> R(v, seconds=60*60) To buffer to memory, you do not need this recorder, use (for small durations): >>> v = v.duration(seconds=3).load().saveas('/tmp/out.mp4') This will record three seconds from the provided RTSP stream and save in the usual way to the output file To record frame by frame: >>> v = vipy.video.RandomScene() >>> with Recorder('out.mp4') as r: >>> for im in v: >>> r(im.annotate().rgb()) # write individual frames from video v """ def __init__(self, outfile, fps=30, overwrite=False): assert vipy.util.isvideo(outfile) self._vo = vipy.video.Scene(filename=outfile, framerate=fps) self._overwrite = overwrite def __enter__(self): self._vs = self._vo.stream(write=True, overwrite=self._overwrite) return lambda im: self._vs.write(im.rgb()) def __exit__(self, type, value, tb): self._vs.__exit__(type, value, tb) def __repr__(self): return '<heyvi.system.Recorder: %s>' % str(self._vo) def __call__(self, vi, seconds=None, verbose=True): assert isinstance(vi, vipy.video.Scene) vi = vi if seconds is None else vi.clone().duration(seconds=seconds) vi = vi.framerate(self._vo.framerate()) with self._vo.stream(overwrite=True) as s: for (k,im) in enumerate(vi.stream()): if verbose: print('[heyvi.system.Recorder][%s][%d]: %s' % (timestamp(), k, im), end='\r') s.write(im) return self._vo
class Tracker (verbose=False)
-
heyvi.system.Tracker() class. Run a video object tracker on a video or live stream.
To run on a livestream:
v = heyvi.sensor.rtsp() T = heyvi.system.Tracker() with heyvi.system.YoutubeLive(fps=5, encoder='480p') as s: T(v, frame_callback=lambda im: s(im.pixelize().annotate(fontsize=15, timestamp=heyvi.util.timestamp(), timestampoffset=(6,10))), minconf=0.5)
To run on an input file as a batch:
v = vipy.video.Scene(filename=/path/to/infile.mp4', framerate=5) T = heyvi.system.Tracker() v_tracked = T(v) v_tracked.annotate('annotation.mp4')
To stream tracks computed per frame
vi = vipy.video.Scene(filename=/path/to/infile.mp4', framerate=5) for (f,vo) in enumerate(T.stream(vi)): print(vo) # tracking result at frame f
To stream tracks computed per frame, along with pixels for current frame
vi = vipy.video.Scene(filename=/path/to/infile.mp4', framerate=5) for (f,(im,vo)) in enumerate(zip(vi, T.stream(vi))) print(vo) # tracking result at frame f print(im) # `vipy.image.Image` with pixels available as im.numpy()
To stream tracks computed per frame, along with the most recent video clip of length 16:
vi = vipy.video.Scene(filename=/path/to/infile.mp4', framerate=5) for (f,(vc,vo)) in enumerate(zip(vi.stream().clip(16), T.stream(vi))) print(vo) # tracking result at frame f print(vc) # `vipy.video.Scene` with pixels for clips of length 16
For additional use cases for streaming batches, clips, frames, delays see the vipy documentation
Returns
vipy.video.Scene
objects with tracks corresponding to objects inheyvi.detection.MultiscaleVideoTracker.classlist
. Object tracks are "person", "vehicle", "bicycle".Expand source code Browse git
class Tracker(): """heyvi.system.Tracker() class. Run a video object tracker on a video or live stream. To run on a livestream: ```python v = heyvi.sensor.rtsp() T = heyvi.system.Tracker() with heyvi.system.YoutubeLive(fps=5, encoder='480p') as s: T(v, frame_callback=lambda im: s(im.pixelize().annotate(fontsize=15, timestamp=heyvi.util.timestamp(), timestampoffset=(6,10))), minconf=0.5) ``` To run on an input file as a batch: ```python v = vipy.video.Scene(filename=/path/to/infile.mp4', framerate=5) T = heyvi.system.Tracker() v_tracked = T(v) v_tracked.annotate('annotation.mp4') ``` To stream tracks computed per frame ```python vi = vipy.video.Scene(filename=/path/to/infile.mp4', framerate=5) for (f,vo) in enumerate(T.stream(vi)): print(vo) # tracking result at frame f ``` To stream tracks computed per frame, along with pixels for current frame ```python vi = vipy.video.Scene(filename=/path/to/infile.mp4', framerate=5) for (f,(im,vo)) in enumerate(zip(vi, T.stream(vi))) print(vo) # tracking result at frame f print(im) # `vipy.image.Image` with pixels available as im.numpy() ``` To stream tracks computed per frame, along with the most recent video clip of length 16: ```python vi = vipy.video.Scene(filename=/path/to/infile.mp4', framerate=5) for (f,(vc,vo)) in enumerate(zip(vi.stream().clip(16), T.stream(vi))) print(vo) # tracking result at frame f print(vc) # `vipy.video.Scene` with pixels for clips of length 16 ``` For additional use cases for streaming batches, clips, frames, delays see the [vipy documentation](https://visym.github.io/vipy) Returns: `vipy.video.Scene` objects with tracks corresponding to objects in `heyvi.detection.MultiscaleVideoTracker.classlist`. Object tracks are "person", "vehicle", "bicycle". """ def __init__(self, verbose=False): assert vipy.version.is_at_least('1.11.11') assert heyvi.version.is_at_least('0.0.5') assert torch.cuda.device_count() >= 4 objects = ['person', ('car','vehicle'), ('truck','vehicle'), ('bus', 'vehicle'), 'bicycle'] # merge truck/bus/car to vehicle self._tracker = heyvi.detection.MultiscaleVideoTracker(gpu=[0,1,2,3], batchsize=9, minconf=0.05, trackconf=0.2, maxhistory=5, objects=objects, overlapfrac=0, gate=64, detbatchsize=None) self._verbose = verbose def __call__(self, vi, frame_callback=None): """Batch tracking of input video file""" assert isinstance(vi, vipy.video.Scene) for (k, (im,v)) in enumerate(zip(vi.stream(buffered=True).frame(delay=5), self._tracker(vi, stride=3, buffered=vi.islive()))): if callable(frame_callback) and im is not None: frame_callback(im) if self._verbose and v is not None: print('[heyvi.system.Tracker][%s][%d]: %s' % (timestamp(), k, str(v)+' '*100), end='\r') return vi def stream(self, vi): """Tracking iterator of input video""" for (k, (im,v)) in enumerate(zip(vi.stream(buffered=True).frame(delay=5), self._tracker(vi, stride=3, buffered=vi.islive()))): yield v
Methods
def stream(self, vi)
-
Tracking iterator of input video
Expand source code Browse git
def stream(self, vi): """Tracking iterator of input video""" for (k, (im,v)) in enumerate(zip(vi.stream(buffered=True).frame(delay=5), self._tracker(vi, stride=3, buffered=vi.islive()))): yield v
class YoutubeLive (streamkey=None, url='rtmp://a.rtmp.youtube.com/live2', fps=30, encoder='480p')
-
Youtube Live stream.
>>> Y = heyvi.system.YoutubeLive(encoder='480p') >>> v = heyvi.sensor.rtsp() >>> Y(v)
Args
encoder [str]['480p, '720p', '360p']: The encoder settings for the youtube live stream fps [float]: The framerate in frames per second of the output stream.
streamkey [str]: The youtube live key (https://support.google.com/youtube/answer/9854503?hl=en), or set as envronment variable VIPY_YOUTUBE_STREAMKEYExpand source code Browse git
class YoutubeLive(): """Youtube Live stream. >>> Y = heyvi.system.YoutubeLive(encoder='480p') >>> v = heyvi.sensor.rtsp() >>> Y(v) Args: encoder [str]['480p, '720p', '360p']: The encoder settings for the youtube live stream fps [float]: The framerate in frames per second of the output stream. streamkey [str]: The youtube live key (https://support.google.com/youtube/answer/9854503?hl=en), or set as envronment variable VIPY_YOUTUBE_STREAMKEY """ def __init__(self, streamkey=None, url='rtmp://a.rtmp.youtube.com/live2', fps=30, encoder='480p'): assert streamkey is not None or 'VIPY_YOUTUBE_STREAMKEY' in os.environ streamkey = streamkey if streamkey is not None else os.environ['VIPY_YOUTUBE_STREAMKEY'] # https://support.google.com/youtube/answer/2853702?hl=en#zippy=%2Cp self._encoder_recommended = {'720p':{'width':1280, 'height':720, 'bitrate': '4000k'}, '480p':{'width':854, 'height':480, 'bitrate': '1000k'}, '360p':{'width':640, 'height':360, 'bitrate': '1000k'}} assert encoder in self._encoder_recommended self._encoder = self._encoder_recommended[encoder] self._url = '%s/%s' % (url, streamkey) assert vipy.util.isurl(self._url) self._vo = vipy.video.Scene(url=self._url, framerate=fps) def __repr__(self): return '<heyvi.system.YoutubeLive: url=%s, framerate=%2.1f>' % (str(self._vo.url()), self._vo.framerate()) def __enter__(self): (h,w,br) = (self._encoder['height'], self._encoder['width'], self._encoder['bitrate']) self._vs = self._vo.stream(write=True, bitrate=br) return lambda im: self._vs.write(im.rgb() if im.shape() == (h,w) else im.rgb().resize(height=h, width=w)) # quiet anisotropic resize to stream dimensions def __exit__(self, type, value, tb): self._vs.__exit__(type, value, tb) def __call__(self, vi, verbose=True): assert isinstance(vi, vipy.video.Scene) (h,w,fps) = (self._encoder['height'], self._encoder['width'], self._vo.framerate()) with self as s: for (k,im) in enumerate(vi.framerate(fps).resize(height=h, width=w)): if verbose: print('[heyvi.system.YoutubeLive][%s][%d]: %s' % (timestamp(), k,im), end='\r') s(im) # write frame to live stream return self