bdvdrip/backup-disc.sh

734 lines
32 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/bin/bash
# Purpose: _Fully_ rip a DVD or BD to MKV (DVD only for now).
#
# LICENCE: GNU GPL v3 or later
#
# Dependencies (needed in PATH):
# — isosize (part of util-linux) : get sector size and count of the disc (unless: -n)
# — dd, ddrescue : copy image of disc (unless: -n)
# — HandBrakeCLI (part of HandBrake) : get disc metadata
# — ffmpeg, ffprobe (part of FFMpeg) : extract single frames and rip to MKV
# — spumux (part of dvdauthor) : generate missing VOBSUB streams to align all tracks
# — identify, montage, convert (part of ImageMagick): get extracted frame metadata, create visual timelines
# — czkawka_cli (part of Czkawka) : compare timelines and detect duplicate chapters
# — mkvmerge (part of MKVToolNix) : get MKV metadata and assemble chapters into a final MKV
# — sed, gawk, grep, sort, uniq, head, cut, tr, wc : data manipulation
# — dirname, tee, ln, mktemp, cat, touch, mv, rm : file manipulation
# — jq : for parsing JSON and extracting metadata
# — bc : arbitrary precision calculator
# — date : conversion between timestamps and hh:mm:ss.nnnnnnnnn
#
# Options:
# [-h : show this help ]
# [-? : display metadata and exit (implies: -n -v quiet) ]
# [-l lang1,lang2,… : all languages to retain for audio and subtitles, with preferred first; default: fra,eng]
# [-v debug|info|quiet : verbosity; default: info ]
# [-t temp_path : location with at least 2×disc size, to store temporary files; default: /tmp/bdvdrip-xxx]
# [-i input_path : path of the disc device or ISO; default: /dev/sr0 ]
# [-o output_path : path of target video file to write (`.mkv` extension); default: ./output.mkv ]
# [-n : do Not dump the input_path to an ISO file; symlink instead ]
# [-g MAX_WxMAX_H : maximum geometry of the encoded video (after border removal); default: 1280x720 (720p) ]
#
# IMPORTANT: https://unix.stackexchange.com/a/493393
#
# Technical references:
# — http://mod16.org/hurfdurf/?p=8
# — https://www.reddit.com/r/mkvtoolnix/comments/11nbfy0/tutorial_mediumlinked_tiny_segmented_mkvs_with/
# — https://www.reddit.com/r/handbrake/comments/bhqxve/handbrake_settings_explained/
trap 'exit 100' SIGINT
# read command-line arguments
while [ $# -gt 0 ]; do
case "$1" in
-h) sed -nr '2,/^$/s/^# ?//p' "$0"; exit 0 ;;
-\?) NO_DD=true; DEBUG=quiet; SHOW=true ;;
-l) ALL_LANGS="$2"; shift ;;
-v) DEBUG="$2"; shift ;;
-t) BUILD="$2"; shift ;;
-i) DVD="$2"; shift ;;
-o) TARGET="$2"; shift ;;
-n) NO_DD=true ;;
-g) IFS=x read -r MAXW MAXH <<<"$2"; MAXW=$((MAXW/16*16)); MAXH=$((MAXH/16*16)) shift ;;
*) { echo "Unknown option: $1"; sed -n '2,/^$/s/^..//p' "$0"; } >&2; exit 1 ;;
esac
shift
done
ALL_LANGS=${ALL_LANGS:-fra,eng}
DEFAULT_LANG=${ALL_LANGS%%,*}
KEEP_ALL=${BUILD:+true}
BUILD="${BUILD:-$(mktemp -d /tmp/bdvdrip-XXXXXX)}"
[ -d "$BUILD" ] && [ -w "$BUILD" ] || { echo "$BUILD is not a writeable directory"; exit 1; }
DVD="${DVD:-/dev/sr0}"
[ -r "$DVD" ] && ! [ -d "$DVD" ] || [ -e "$BUILD/.iso" ] || { echo "$DVD is not a useable readable input"; exit 1; }
TARGET="${TARGET:-./output.mkv}"
[ -d "$(dirname "$TARGET")" ] && [ -w "$(dirname "$TARGET")" ] || { echo "$(dirname "$TARGET") is not a writeable directory"; exit 1; }
if ! [[ "$MAXW" =~ ^[0-9]+$ ]]; then MAXW=1280; fi
if ! [[ "$MAXH" =~ ^[0-9]+$ ]]; then MAXH=720; fi
exec 3>&2
function log_and_run() {
local getOutput output
if [ "$1" == '-o' ]; then
getOutput=1; shift
fi
local step="$1"; shift
if [ "$DEBUG" != quiet ]; then
printf '\n>> %s\n>> %s\n' "$step" "${*@Q}" >&3
fi
case "$getOutput.$DEBUG" in
1.debug) "$@" 1> >(tee >(cat >&3)) 2> >(tee >(cat >&3) >&2) ;; #OK
1.*) F=/tmp/fifo.$$.outerr; mkfifo $F; "$@" 1> >(tee -a $F) 2> >(tee -a $F >&2) & pid=$!; output="$(cat $F)"; rm -f $F; wait $pid ;;
.debug) "$@" 1>&3 2>&3 ;; #OK
.*) output="$("$@" 2>&1)" ;;
esac
excode=$?
if [ $excode -ne 0 ] && [ "$DEBUG" != quiet ]; then
printf '%s>>> ERROR: EXIT CODE %s\n\n' "${output:+"$output"$'\n'}" "$excode" >&3
fi
return $excode
}
# copy DVD
if ! [ -f "$BUILD/.iso" ]; then
if [ -n "$NO_DD" ]; then
RM_ISO=${SHOW:+true}
ln -s "$DVD" "$BUILD/.iso"
else
read -r sectcount sectsize < <(LANG=C isosize -x "$DVD" | sed -r 's/.*: (.*), .*: (.*)$/\1 \2/')
log_and_run 'Dump disc to ISO image' \
dd bs=$sectsize count=$sectcount if="$DVD" of="$BUILD/.iso" || {
log_and_run 'Dump disc to ISO image (slower because of media errors)' \
ddrescue -n -b$sectsize "$DVD" "$BUILD/.iso" "$BUILD/.iso.mapfile"
log_and_run 'Try to recover damaged sectors if needed' \
ddrescue -d -r1 -b$sectsize "$DVD" "$BUILD/.iso" "$BUILD/.iso.mapfile"
}
fi
fi
# fetch metadata
if ! [ -f "$BUILD/.json" ]; then
log_and_run -o 'Read disc metadata (tracks, chapters…)' \
HandBrakeCLI --json -t 0 --min-duration 5 -i "$BUILD/.iso" 2>/dev/null \
| sed $'1i \\\n{\n1,/JSON Title Set:/d' >"$BUILD/.json"
fi
ALL_META="$(
log_and_run -o 'Parse metadata' \
jq -r ".TitleList[] | .Index as \$tnum | \
\"TITLE \(\$tnum) (\(.Duration.Hours):\(.Duration.Minutes):\(.Duration.Seconds))\", \
\"\tVideo (index, codec, geometry, frame-rate, bit-depth, chroma-subsampling, pixel aspect ratio, top:bottom:left:right borders, is interlaced)\", \
\"V-\(\$tnum)\t\t0\t\(.VideoCodec)\t\(.Geometry.Width)x\(.Geometry.Height)\t\(.FrameRate.Num/.FrameRate.Den)\t\(.Color.BitDepth)\t\(.Color.ChromaSubsampling)\t\(.Geometry.PAR.Num)/\(.Geometry.PAR.Den)\t\(.Crop[0]):\(.Crop[1]):\(.Crop[2]):\(.Crop[3])\t\(.InterlaceDetected)\", \
\"\tAudio (index, language, codec, channels, sample-rate, bit/s, is default, is commentary, is secondary, for visually impaired, description)\", \
(.AudioList[] | \
\"A-\(\$tnum)\t\t\(.TrackNumber)\t\(.LanguageCode)\t\(.CodecName)\t\(.ChannelCount)\t\(.SampleRate)\t\(.BitRate)\t\(.Attributes.Default)\t\(.Attributes.Commentary or .Attributes.AltCommentary)\t\(.Attributes.Secondary or .Attributes.AltCommentary)\t\(.Attributes.VisuallyImpaired)\t\(.Description)\"), \
\"\tSubtitles (index, language, codec, is default, is commentary, is forced, for hearing impaired, description)\", \
(.SubtitleList[] | \
\"S-\(\$tnum)\t\t\(.TrackNumber)\t\(.LanguageCode)\t\(.SourceName):\(.Format)\t\(.Attributes.Default)\t\(.Attributes.Commentary)\t\(.Attributes.Forced)\t\(.Attributes.ClosedCaption)\t\(.Language)\"), \
\"\tChapters (index, duration, name)\", \
(.ChapterList | to_entries[] | \
\"C-\(\$tnum)\t\t\(.key+1)\t\(.value.Duration.Hours):\(.value.Duration.Minutes):\(.value.Duration.Seconds)\t\(.value.Name)\")" \
<"$BUILD/.json"
)"
# show information then exit, if requested
if [ -n "$SHOW" ]; then
echo "$ALL_META"
[ -z "$KEEP_ALL" ] && rm -rf "$BUILD" || { [ -n "$RM_ISO" ] && rm -f "$BUILD/.iso"; }
exit 0
fi
# parse general information
declare TITLE_LIST=$(sed -rn 's/^TITLE (.*) .*/\1/p' <<<"$ALL_META")
declare VFORMAT=$(awk -F$'\t' '$5~"x480$"{print "ntsc";exit}' <<<"$ALL_META")
declare -A VSECONDS
declare -A CHAPTERS
declare -A CHNAMES
declare -A INTERLV
while read -r tnum h m s; do VSECONDS[$tnum]=$((3600*h+60*m+s)); done < <(
sed -nr 's/^TITLE (.*) \((.*):(.*):(.*)\)/\1 \2 \3 \4/p' <<<"$ALL_META")
for tnum in $TITLE_LIST; do CHAPTERS[$tnum]=$(
sed -rn "s/^C-$tnum\\t\\t([^\\t]*)\\t.*/\\1/p" <<<"$ALL_META"); done
while read -r tnum chnum name; do CHNAMES[$tnum.$chnum]="$name"; done < <(
sed -nr 's/^C-([0-9]+)\t\t([0-9]+)\t[0-9:]*\t(.*)/\1 \2 \3/p' <<<"$ALL_META")
while read -r tnum interl; do INTERLV[$tnum]="$interl"; done < <(
sed -nr 's/^V-([0-9]+)\t.*\t([^\t]*)$/\1 \2/p' <<<"$ALL_META")
# compute final geometry
# $1: src dimension to keep
# $2: src dimension to scale rel. PAR
# $3 $4: PAR num/den
# $5 $6: resizing num/den
# &1: "<adjusted $1> <adjusted $2> <resized+rounded $1> <scaled+resized+rounded $2> <$5 for convenience> <$6…>"
function scale_round() {
local dim1=$(( ($1+4)*$5/$6/16*16 )) # round ¾-to-floor, ¼-to-ceil
local dim2=$(( ($2*$3/$4+4)*$5/$6/16*16 )) # round ¾-to-floor, ¼-to-ceil
local adj1=$(( dim1*$6/$5 ))
local adj2=$(( dim2*$6*$4/$5/$3 ))
printf '%d %d %d %d %d %d' $adj1 $adj2 $dim1 $dim2 $5 $6
}
declare -A SRC_RAW SRC_LTRB SRC_WH2WH DAR_COUNT
# → compute optimal crop and sizes, to target a pixel aspect ratio of 1:1, with width and height being multiples of 16
for tnum in $TITLE_LIST; do
read -r raww rawh parn pard cropt cropb cropl cropr < <(sed -nr "
s/^V-$tnum\t(\t[^\t]*){2}\t([0-9]+)x([0-9]+)/\2 \3/; T
s#(\t[^\t]*){3}\t([0-9]+)/([0-9]+)\t([0-9]+):([0-9]+):([0-9]+):([0-9]+)\t.*# \2 \3 \4 \5 \6 \7#p
" <<<"$ALL_META")
cropw=$((raww-cropl-cropr))
croph=$((rawh-cropt-cropb))
if [ $parn -gt $pard ]; then # stretch horizontally
read -r srch srcw finalh finalw scalen scaled < <(scale_round $croph $cropw $parn $pard 1 1)
if [ $finalw -gt $MAXW ] || [ $finalh -gt $MAXH ]; then
if [ $((100*finalw/MAXW)) -gt $((100*finalh/MAXH)) ]; then
read -r srch srcw finalh finalw scalen scaled < <(scale_round $croph $cropw $parn $pard $MAXW $finalw)
else
read -r srch srcw finalh finalw scalen scaled < <(scale_round $croph $cropw $parn $pard $MAXH $finalh)
fi
fi
else # stretch vertically
read -r srcw srch finalw finalh scalen scaled < <(scale_round $cropw $croph $pard $parn 1 1)
if [ $finalw -gt $MAXW ] || [ $finalh -gt $MAXH ]; then
if [ $((100*finalw/MAXW)) -gt $((100*finalh/MAXH)) ]; then
read -r srcw srch finalw finalh scalen scaled < <(scale_round $cropw $croph $pard $parn $MAXW $finalw)
else
read -r srcw srch finalw finalh scalen scaled < <(scale_round $cropw $croph $pard $parn $MAXH $finalh)
fi
fi
fi
[ $srcw -le $raww ] || srcw=$raww
[ $srch -le $rawh ] || srch=$rawh
SRC_LTRB[$tnum]="$cropl $cropt $cropr $cropb"
SRC_WH2WH[$tnum]="$srcw $srch $finalw $finalh $scalen $scaled"
SRC_RAW[$tnum]="$raww $rawh $parn $pard"
DAR_COUNT["$finalw $finalh"]=$(awk -vN=${DAR_COUNT["$finalw $finalh"]:-0} "/C-$tnum\t/{N++};END{print N}" <<<"$ALL_META")
done
# → identify the width and height most often used in all chapters
read -r maxw maxh x < <(
for wh in "${!DAR_COUNT[@]}"; do printf '%s %d\n' "$wh" ${DAR_COUNT[$wh]}; done | sort -k3,3nr -k1,1nr -k2,2nr | head -n 1)
unset DAR_COUNT
# → rescale each title according to the chosen width and height
for tnum in $TITLE_LIST; do
read -r sw sh fw fh scn scd <<<"${SRC_WH2WH[$tnum]}"
read -r rw rh parn pard <<<"${SRC_RAW[$tnum]}"
# already OK ⇒ skip
if { [ $fw -eq $maxw ] || [ $fh -eq $maxh ]; } && [ $fw -le $maxw ] && [ $fh -le $maxh ]; then
continue
fi
# else scale final geometry
if [ $parn -gt $pard ]; then
read -r newsrch newsrcw newh neww newscalen newscaled < <(scale_round $sh $sw $parn $pard $((scn*maxw)) $((scd*fw)))
if [ $newh -gt $maxh ]; then
read -r newsrch newsrcw newh neww newscalen newscaled < <(scale_round $sh $sw $parn $pard $((scn*maxh)) $((scd*fh)))
fi
else
read -r newsrcw newsrch neww newh newscalen newscaled < <(scale_round $sw $sh $pard $parn $((scn*maxw)) $((scd*fw)))
if [ $newh -gt $maxh ]; then
read -r newsrcw newsrch neww newh newscalen newscaled < <(scale_round $sw $sh $parn $pard $((scn*maxh)) $((scd*fh)))
fi
fi
SRC_WH2WH[$tnum]="$newsrcw $newsrch $neww $newh $newscalen $newscaled"
done
unset SRC_RAW
# compute final audio streams
declare -A SRC_AUDIO CATEG_COUNT
# → categorize all audio streams
for tnum in $TITLE_LIST; do
allaudio="$(awk -F$'\t' -vT=$tnum '$1=="A-" T{print T, $3, $5, $7, $8, $11, $4, $6, $10, $12}' <<<"$ALL_META")"
# (technical info:) <1:title number> <2:stream number> <3:codec> <4:Hz> <5:bit/s> <6:secondary?>
# (category:) <7:lang> <8:channels count> <9:commentary?> <10:visu.impaired?> <11:priority>
SRC_AUDIO[$tnum]="$(
for lng in ${ALL_LANGS//,/ }; do
awk -vL=$lng '$7==L{print}' <<<"$allaudio" \
| sort -k9,9 -k10,10 -k6,6 -k8,8nr -k2,2n \
| awk '($8 $9 $10)!=ref{ref=($8 $9 $10);c=0};{c++; print $0, c}'
done
)"
while read -r x x cod freq bps x lng cnt cmt blind prio; do
x="$cod $freq $bps $lng $cnt $cmt $blind $prio"
CATEG_COUNT["$x"]=$(awk -vN=${CATEG_COUNT["$x"]:-0} "/C-$tnum\t/{N++};END{print N}" <<<"$ALL_META")
done <<<"${SRC_AUDIO[$tnum]}"
done
# → identify for each category the audio settings most often used in all chapters, with preferred language on top
declare -A expo=(["1"]='' ["2"]=² ["3"]=³ ["4"]=["5"]=["6"]=["7"]=["8"]=["9"]=)
allaudio="$(export IFS=$'\n'; echo "${SRC_AUDIO[*]}")"
# (technical info:) <1:stream number> <2:MKV codec> <3:Hz> <4:bit/s>
# (category:) <5:lang> <6:channels count> <7:commentary?> <8:visu.impaired?> <9:priority> <10:name>
MKV_AUDIO="$(
for lng in ${ALL_LANGS//,/ }; do
while read -r x x cod freq bps x lng cnt cmt blind prio; do
x="$cod $freq $bps $lng $cnt $cmt $blind $prio"
name="${lng^^}${expo[$prio]} ${cnt}🕩"
[ "$cmt" == true ] && name+=" 🗩"
[ "$blind" == true ] && name+=" 🙈"
echo "${CATEG_COUNT["$x"]} $cod $freq $bps $lng $cnt $cmt $blind $prio $name"
done < <(awk -vL=$lng '$7==L{print}' <<<"$allaudio") \
| sort -k7,7 -k8,8 -k6,6nr -k9,9n -k1,1nr \
| uniq -f5
done \
| nl -nln -s' ' -w1 | cut -d' ' -f1,3-
)"
unset expo allaudio
unset CATEG_COUNT
# (technical info:) <1:stream number> <2:MKV codec> <3:FFMpeg index> <4:FFMpeg codec> <5:FFMpeg layout> <6:Hz> <7:bit/s>
# (category:) <8:lang> <9:channels count> <10:commentary?> <11:visu.impaired?> <12:priority> <13:name>
MKV_AUDIO="$(
while read -r num cod freq bps lng count cmt blind prio name; do
# take example on an existing stream
read -r ffnum t2 < <(export IFS=$'\n'; echo "${SRC_AUDIO[*]}" \
| awk '$1!=T{T=$1;C=0};{print ++C, $0}' | sed -nr "s|^([^ ]+) ([^ ]+) [^ ]+ $cod $freq $bps [^ ]+ $lng $count $cmt $blind $prio\$|\\1 \\2|p;T;q")
read -r ffcod ffchanlay < <(ffprobe -hide_banner -output_format json -show_streams -select_streams a -f dvdvideo -title $t2 "$BUILD/.iso" 2>/dev/null \
| jq -r --argjson I $ffnum '.streams[] | select(.index == $I) | "\(.codec_name) \(.channel_layout)"')
echo "$num $cod $ffnum $ffcod $ffchanlay $freq $bps $lng $count $cmt $blind $prio $name"
done <<<"$MKV_AUDIO"
)"
# compute final subtitle streams
declare -A SRC_SUB
# → categorize all subtitle streams
for tnum in $TITLE_LIST; do
allsubs="$(awk -F$'\t' -vT=$tnum '$1=="S-" T{print $3, $8, $5, $4, $7, $9}' <<<"$ALL_META")"
# (technical info:) <1:stream number> <2:forced?>
# (category:) <3:codec> <4:lang> <5:commentary?> <6:hear.impaired?> <7:priority>
SRC_SUB[$tnum]="$(
for lng in ${ALL_LANGS//,/ }; do
awk -vL=$lng '$4==L{print}' <<<"$allsubs" | sort -k5,5 -k6,6 -k2,2r -k3,3 -k1,1n | awk '($3 $5 $6)!=ref{ref=($3 $5 $6);c=0};{c++; print $0, c}'
done
)"
done
# → identify final subtitle streams, with preferred language on top
declare -A expo=(["1"]='' ["2"]=² ["3"]=³ ["4"]=["5"]=["6"]=["7"]=["8"]=["9"]=)
allsubs="$(export IFS=$'\n'; echo "${SRC_SUB[*]}")"
# (technical info:) <1:stream number>
# (category:) <2:MKV codec> <3:lang> <4:commentary?> <5:hear.impaired?> <6:priority> <7:name>
MKV_SUB="$(
for lng in ${ALL_LANGS//,/ }; do
while read -r x x cod lng cmt deaf prio; do
name="${lng^^}${expo[$prio]}"
[ "$cmt" == true ] && name+=" 🗩"
[ "$deaf" == true ] && name+=" 🙉"
echo "$cod $lng $cmt $deaf $prio $name"
done < <(awk -vL=$lng '$4==L{print}' <<<"$allsubs") \
| sort -k3,3 -k4,4 -k1,1 -k5,5n \
| uniq
done \
| nl -nln -s' ' -w1
)"
unset expo
# generate per-chapter timeline snapshots
if [ -z "$(ls "$BUILD/"t.*.ch.*.png 2>/dev/null)" ]; then
[ -d "$BUILD/.tsnap" ] || mkdir "$BUILD/.tsnap"
while read -r tnum chnum x; do
seconds=$(awk -F$'\t' -vT=$tnum -vC=$chnum '$3==C && $1=="C-" T{FS=":"; $0=$4; print 3600*$1+60*$2+$3}' <<<"$ALL_META")
audio=( $(awk -vOFS=, '{print $7, $8, $9, $10, $11}' <<<"${SRC_AUDIO[$tnum]}") )
subs=( $(awk -vOFS=, '{print $3, $4, $5, $6, $7}' <<<"${SRC_SUB[$tnum]}") )
montagecmd=(montage -background black)
snapsec=0
snapcount=0
while [ $snapsec -le $seconds ]; do
target="$BUILD/.tsnap/t.$tnum.ch.$chnum.s.$snapsec.png"
log_and_run "Make snapshot @${snapsec}s of track $tnum chapter $chnum" \
ffmpeg -hide_banner -f dvdvideo -trim false -title $tnum -chapter_start $chnum -chapter_end $chnum -i "$BUILD/.iso" \
-ss ${snapsec}s -vf scale=w=256:h=144:force_original_aspect_ratio=decrease -vframes 1 "$target%1d.png" </dev/null
mv "$target"*.png "$target"
snapsec=$((snapsec+5))
snapcount=$((snapcount+1))
montagecmd+=("$target")
done
cellsize=$(identify "$BUILD/.tsnap/t.$tnum.ch.$chnum.s.5.png" </dev/null | sed -nr 's/.* ([0-9]+x[0-9]+(\+[0-9]+){2}).*/\1/p')
gridsize=1
while [ $((gridsize*gridsize)) -lt $snapcount ]; do gridsize=$((gridsize+1)); done
log_and_run "Make timeline snapshot of track $tnum chapter $chnum" \
"${montagecmd[@]}" -tile ${gridsize}x${gridsize} -geometry $cellsize -trim "$BUILD/.tsnap/t.$tnum.ch.$chnum.png" </dev/null
log_and_run "Annotate timeline snapshot of track $tnum chapter $chnum with audio and subtitles metadata" \
convert -fill red -gravity SouthEast -annotate +0+0 \
"$(export IFS=+; echo "${audio[*]}")|$(export IFS=+; echo "${subs[*]}")" \
"$BUILD/.tsnap/t.$tnum.ch.$chnum.png" "$BUILD/t.$tnum.ch.$chnum.png" </dev/null
unset audio subs
done < <(
# restrict to chapters having common durations
jq -r '.TitleList[] | .Index as $tnum | .ChapterList | to_entries | .[] | .key as $chnum | .value.Duration | "\($tnum) \(1+$chnum) \(.Hours).\(.Minutes).\(.Seconds).\(.Ticks)"' <"$BUILD/.json" \
| sort -k3,3V | uniq -f2 -D # keep only duplicates in the duration column
)
fi
# detect duplicates
function islongfilm() {
[ ${VSECONDS[$1]} -ge 4200 ] # [70min…]
echo $?
}
function isepisode() {
[ ${VSECONDS[$1]} -ge 900 ] # [15min…]
echo $?
}
declare -A STREAMS_IDS
declare -A SUBST
for tnum in $TITLE_LIST; do
strid=
while read -r x x x x x x lng cnt cmt blind prio; do
strid+=",${lng}${prio}x${cnt}(${cmt}/${blind})"
done <<<"${SRC_AUDIO[$tnum]}"
while read -r x x cod lng cmt deaf prio; do
strid+=",${lng}${prio}:${cod}(${cmt}/${deaf})"
done <<<"${SRC_SUB[$tnum]}"
STREAMS_IDS[$tnum]="$strid"
done
if ! [ -f "$BUILD/.duplicates" ]; then
log_and_run -o "Detect duplicates among all chapters of all titles" \
czkawka_cli image -f "$BUILD/.raw_dedup" -s High -m 1 -R -d "$BUILD/" \
| sed -rn 's#^"?/.*/t\.([0-9]+)\.ch\.([0-9]+)\.png.*#\1 \2#p' \
| while read -r tnum1 chnum1; read -r tnum2 chnum2; do
if [ "${STREAMS_IDS[$tnum1]}" == "${STREAMS_IDS[$tnum2]}" ]; then
# long film first, else episode first, else lower title number first, else lower chapter number first
printf '%d %d %d %d %d %d\n%d %d %d %d %d %d\n' \
$(islongfilm $tnum1) $(isepisode $tnum1) $tnum1 $tnum2 $chnum1 $chnum2 \
$(islongfilm $tnum2) $(isepisode $tnum2) $tnum2 $tnum1 $chnum2 $chnum1 \
| sort -k1,1n -k2,2n -k3,3n -k4,4n -k5,5n | head -n 1 | awk '{print $3, $5, $4, $6}'
fi
done >"$BUILD/.duplicates"
fi
while read -r t1 ch1 t2 ch2; do
SUBST[$t2.$ch2]="$t1.$ch1"
done <"$BUILD/.duplicates"
# rip chapters and fill-in missing streams
declare -A USED_A USED_S
for tnum in $TITLE_LIST; do
read -r sw sh fw fh scn scd <<<"${SRC_WH2WH[$tnum]}"
read -r cropl cropt cropr cropb <<<"${SRC_LTRB[$tnum]}"
firstch=true
while read -r chnum chtimefloor; do
[ -z "${SUBST[$tnum.$chnum]}" ] || continue
declare -A blank=()
# video stream
ffcmd=(
ffmpeg -hide_banner -f dvdvideo -preindex 1 -trim false
-title $tnum -chapter_start $chnum -chapter_end $chnum -i "$BUILD/.iso"
)
ffmap=(-map_chapters -1 -map 0:V:0)
ffenc=()
filter=
if [ "${INTERLV[$tnum]}" == true ]; then
filter+=",bwdif"
fi
filter+=",crop=x=$cropl:y=$cropt:w=$((sw/2*2)):h=$((sh/2*2)):exact=1"
if [ $sw -ne $fw ] || [ $sh -ne $fh ]; then
ffenc+=(-sws_flags lanczos+accurate_rnd)
filter+=",scale=w=$fw:h=$fh:force_original_aspect_ratio=disable,setsar=1/1"
fi
if [ $fw -ne $maxw ] || [ $fh -ne $maxh ]; then
filter+=",pad=w=$maxw:h=$maxh:x=-1:y=-1"
fi
ffenc+=(-filter:v:0 "${filter:1}" -enc_time_base:v:0 demux)
ffenc+=(-c:v:0 libx265 -x265-params:v:0 profile=main10:preset=slower:crf=18:sar=1:videoformat=${VFORMAT:-pal}:rc-lookahead=120:bframes=12:ref=6:subme=7:aq-mode=3)
# audio streams
while read -r mkvnum mkvcod ffnum ffcod ffchanlay mkvfreq mkvrate lng count cmt blind prio name; do if [ -n "$mkvnum" ]; then
read -r x num cod freq rate x < <(
awk -vL=$lng -vC=$count -vM=$cmt -vB=$blind -vP=$prio '$7==L && $8==C && $9==M && $10==B && $11==P{print}' <<<"${SRC_AUDIO[$tnum]}")
if [ -z "$num" ]; then
# no such stream in this title
f="$BUILD/tmp.empty_a.$ffchanlay.$mkvfreq.$mkvrate.$chtimefloor.$ffcod"
if ! [ -f "$f" ] && ! [ -f "$BUILD/t.$tnum.ch.$chnum.mkv" ]; then
log_and_run "For title $tnum chapter $chnum, create missing audio stream for ${chtimefloor}s of ${mkvnum}: ${name}" \
ffmpeg -hide_banner -strict -2 -lavfi anullsrc=channel_layout="${ffchanlay}":sample_rate=${mkvfreq}:duration=${chtimefloor} -c:a ${ffcod} -b:a ${mkvrate} "$f" </dev/null
fi
if [ -z "${blank["$f"]}" ]; then
ffcmd+=(-i "$f")
blank["$f"]=$((${#blank[*]}+1))
fi
ffmap+=(-map ${blank["$f"]}:a:0)
ffenc+=(-c:a:$((mkvnum-1)) copy)
elif [ "$cod,$freq,$rate" == "$mkvcod,$mkvfreq,$mkvrate" ]; then
# exact match in this title
[ -n "$firstch" ] && USED_A[$tnum]="${USED_A[$tnum]} $mkvnum"
ffmap+=(-map 0:a:$((num-1)))
ffenc+=(-c:a:$((mkvnum-1)) copy -map_metadata:s:a:$((mkvnum-1)) 0:s:a:$((num-1)))
else
# steam has different characteristics
[ -n "$firstch" ] && USED_A[$tnum]="${USED_A[$tnum]} $mkvnum"
ffmap+=(-map 0:a:$((num-1)))
ffenc+=(-c:a:$((mkvnum-1)) $ffcod -ar:a:$((mkvnum-1)) $mkvfreq -b:a:$((mkvnum-1)) $mkvrate -map_metadata:s:a:$((mkvnum-1)) 0:s:a:$((num-1)))
fi
ffenc+=(-metadata:s:a:$((mkvnum-1)) language=$lng -metadata:s:a:$((mkvnum-1)) title="$mkvnum: $name")
fi; done <<<"$MKV_AUDIO"
# subtitle streams
while read -r mkvnum mkvcod lng cmt deaf prio name; do if [ -n "$mkvnum" ]; then
read -r num x < <(
awk -vC=$mkvcod -vL=$lng -vM=$cmt -vD=$deaf -vP=$prio '$3==C && $4==L && $5==M && $6==D && $7==P{print}' <<<"${SRC_SUB[$tnum]}")
if [ -n "$num" ]; then
# exact match in this title
[ -n "$firstch" ] && USED_S[$tnum]="${USED_S[$tnum]} $mkvnum"
ffmap+=(-map 0:s:$((num-1)))
ffenc+=(-c:s:$((mkvnum-1)) copy -map_metadata:s:s:$((mkvnum-1)) 0:s:s:$((num-1)))
elif [ "$mkvcod" != 'VOBSUB:bitmap' ]; then
# unsupported type!
printf 'ERROR: UNSUPPORTED MISSING SUBTITLE STREAM FOR TITLE %d CHAPTER %d OF TYPE %s\n' $tnum $chnum "$mkvcod" >&2
exit 1
else
# no such stream in this title
f="$BUILD/tmp.empty_s.${VFORMAT:-pal}.$chtimefloor.mpeg2"
if ! [ -f "$f" ] && ! [ -f "$BUILD/t.$tnum.ch.$chnum.mkv" ]; then
if ! [ -f "$BUILD/tmp.empty_pixel.png" ]; then
log_and_run "Generate a transparent pixel for use as an empty subtitle stream" \
convert -size 1x1 'xc:rgba(0,0,0,0)' "$BUILD/tmp.empty_pixel.png"
fi
log_and_run -o "Generate XML description for ${chtimefloor}s of empty subtitle" \
echo "<subpictures format=\"$([ "$VFORMAT" == ntsc ] && echo NTSC || echo PAL)\"><stream><spu start=\"00:00:00.00\" end=\"$(date -u -d@${chtimefloor} +%T.00)\" image=\"$BUILD/tmp.empty_pixel.png\"/></stream></subpictures>" >"$BUILD/tmp.empty_s.xml"
log_and_run -o "For title $tnum chapter $chnum, create missing subtitle stream for ${chtimefloor}s of ${mkvnum}: ${name}" \
spumux -m dvd --nomux --nodvdauthor-data "$BUILD/tmp.empty_s.xml" </dev/null >"$f"
fi
if [ -z "${blank["$f"]}" ]; then
ffcmd+=(-i "$f")
blank["$f"]=$((${#blank[*]}+1))
fi
ffmap+=(-map ${blank["$f"]}:s:0)
ffenc+=(-c:s:$((mkvnum-1)) copy)
fi
ffenc+=(-metadata:s:s:$((mkvnum-1)) language=$lng -metadata:s:s:$((mkvnum-1)) title="$mkvnum: $name")
fi; done <<<"$MKV_SUB"
firstch=
if ! [ -f "$BUILD/t.$tnum.ch.$chnum.mkv" ]; then
log_and_run "Adapt title $tnum chapter $chnum to final MKV streams" \
"${ffcmd[@]}" "${ffmap[@]}" "${ffenc[@]}" "$BUILD/t.$tnum.ch.$chnum.mkv" </dev/null
fi
unset blank ffcmd ffmap ffenc
rm -f "$BUILD/tmp.empty_s.xml" "$BUILD/tmp.empty_pixel.png"
done < <(awk -F$'\t' -vT=$tnum '$1=="C-" T{bFS=FS; n=$3; FS=":"; $0=$4; print n, 3600*$1+60*$2+$3; FS=bFS}' <<<"$ALL_META")
done
for substid in "${!SUBST[@]}"; do
tch=$substid
while [ -n "${SUBST[$tch]}" ]; do tch=${SUBST[$tch]}; done
[ -n "${USED_A[${substid%.*}]}" ] || USED_A[${substid%.*}]="${USED_A[${tch%.*}]}"
[ -n "${USED_S[${substid%.*}]}" ] || USED_S[${substid%.*}]="${USED_S[${tch%.*}]}"
done
unset firstch INTERLV SRC_LTRB SRC_WH2WH
# assemble source tracks into end-result tracks
if ! [ -f "$BUILD/.titlelist" ]; then
tcategs="$(
for tnum in $TITLE_LIST; do
printf '%d %d %d %d\n' $(islongfilm $tnum) $(isepisode $tnum) $tnum $(ls -1 "$BUILD"/t.$tnum.ch.*.mkv | wc -l)
done \
| grep -v ' 0$'
)"
{
awk '$1==0 {printf("🎥 %d\t%d\n",$3,$3)}' <<<"$tcategs"
awk '$1==1 && $2==0 {L=L " " $3}; END {if(L!="")printf("📹%s\t%s\n",gensub(" ","","g",L),gensub(" ","",1,L))}' <<<"$tcategs"
awk '$1==1 && $2==1 {L=L " " $3}; END {if(L!="")printf("🖼%s\t%s\n",gensub(" ","","g",L),gensub(" ","",1,L))}' <<<"$tcategs"
awk '{L=L " " $3}; END {if(L!="")printf("…🎞🎞🎞…\t%s\n",gensub(" ","",1,L))}' <<<"$tcategs"
{
if [ $(grep '^1 0 ' <<<"$tcategs" | wc -l) -gt 1 ]; then
awk '$1==1 && $2==0 {printf("🎞 %d\t%d\n",$3,$3)}' <<<"$tcategs"
fi
if [ $(grep '^1 1 ' <<<"$tcategs" | wc -l) -gt 1 ]; then
awk '$1==1 && $2==1 {printf("🎞 %d\t%d\n",$3,$3)}' <<<"$tcategs"
fi
} | sort -t$'\t' -k2,2n
} >"$BUILD/.titlelist"
unset tcategs
fi
# fetch chapter metadata
for mkv in "$BUILD/"t.*.ch.*.mkv; do
if ! [ -f "${mkv%mkv}json" ]; then
log_and_run "Read metadata (length, ids…) for $mkv" \
mkvmerge -F json -i "$mkv" -r "${mkv%mkv}json"
fi
done
# compute chapters timestamps, UIDs, and substitutions + prepare merging
declare -A TIMESTAMPS
declare -A CHAPTERS_IDS
declare -A UIDS
declare -a MKVMRGARGS=()
default=
strnum=0
while read -r x x x x x x x lng x cmt blind x; do if [ -n "$lng" ]; then
strnum=$((strnum+1))
if [ "$default$cmt$blind${lng,,}" == "falsefalse${DEFAULT_LANG,,}" ]; then
default=found
MKVMRGARGS+=(--default-track-flag $strnum:1)
else
MKVMRGARGS+=(--default-track-flag $strnum:0)
fi
fi; done <<<"$MKV_AUDIO"
while read -r x x lng cmt deaf x; do if [ -n "$lng" ]; then
strnum=$((strnum+1))
if [ "$default$cmt$blind" == "falsefalse" ] && grep -qiF ",${lng,,}," <<<",$ALL_LANGS,"; then
default=found
MKVMRGARGS+=(--default-track-flag $strnum:1)
else
MKVMRGARGS+=(--default-track-flag $strnum:0)
fi
fi; done <<<"$MKV_SUB"
unset default strnum
ref=0
MKVMRGARGS+=('--no-chapters' '--no-global-tags' '[')
while read -r tnum; do
for chnum in ${CHAPTERS[$tnum]}; do if [ -f "$BUILD/t.$tnum.ch.$chnum.mkv" ]; then
[ -z "${SUBST[$tnum.$chnum]}" ] || continue
MKVMRGARGS+=("$BUILD/t.$tnum.ch.$chnum.mkv")
# compute milliseconds
start=$ref
stop=$(bc -lq <<<"$ref+$(jq -r '.container.properties | .duration/.timestamp_scale' <"$BUILD/t.$tnum.ch.$chnum.json")")
ref=$stop
TIMESTAMPS[$tnum.$chnum]="$start $stop"
CHAPTERS_IDS[$tnum.$chnum]="$tnum$chnum$(jq -r .container.properties.segment_uid <"$BUILD/t.$tnum.ch.$chnum.json" \
| tr '[a-f]' '[A-F]' | bc -q <<<"ibase=16;$(cat)" | head -c 8)"
UIDS[$tnum.$chnum]="$(jq -r .container.properties.segment_uid <"$BUILD/t.$tnum.ch.$chnum.json")"
fi; done
done < <(cut -d$'\t' -f2 <"$BUILD/.titlelist" | tr ' ' '\n' | awk '{if($0 in seen)next};{print;seen[$0]=1}')
for substid in "${!SUBST[@]}"; do
tch=$substid
while [ -n "${SUBST[$tch]}" ]; do tch=${SUBST[$tch]}; done
TIMESTAMPS[$substid]=${TIMESTAMPS[$tch]}
CHAPTERS_IDS[$substid]="${substid//.}$(jq -r .container.properties.segment_uid <"$BUILD/t.${tch//./.ch.}.json" \
| tr '[a-f]' '[A-F]' | bc -q <<<"ibase=16;$(cat)" | head -c 8)"
UIDS[$substid]="${UIDS[$tch]}"
done
MKVMRGARGS+=(']')
unset ref
# generate MKV editions XML file
function formatTimestamp() {
if [ $1 == 0 ]; then
echo '00:00:00.000'
else
date -u -d@${1:0:-3} +%T.${1: -3:3}
fi
}
if ! [ -f "$BUILD/mkv-editions.xml" ]; then
{
cat <<ENDOFXML
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE Chapters SYSTEM "matroskachapters.dtd">
<Chapters>
ENDOFXML
edflagdefault=1
titlenum=1
while IFS=$'\t' read -r label titlelist; do
cat <<ENDOFXML
<EditionEntry>
<EditionFlagHidden>0</EditionFlagHidden>
<EditionFlagDefault>$edflagdefault</EditionFlagDefault>
<EditionFlagOrdered>1</EditionFlagOrdered>
<EditionDisplay>
<EditionString>$(printf '%03d%s' $titlenum "$label")</EditionString>
</EditionDisplay>
ENDOFXML
chflaghidden=0
for tnum in $titlelist; do
for chnum in ${CHAPTERS[$tnum]}; do if [ -f "$BUILD/t.$tnum.ch.$chnum.mkv" ]; then
cat <<ENDOFXML
<ChapterAtom>
<ChapterUID>${CHAPTERS_IDS[$tnum.$chnum]}</ChapterUID>
<ChapterTimeStart>$(formatTimestamp ${TIMESTAMPS[$tnum.$chnum]% *})</ChapterTimeStart>
<ChapterTimeEnd>$(formatTimestamp ${TIMESTAMPS[$tnum.$chnum]#* })</ChapterTimeEnd>
<ChapterFlagHidden>$chflaghidden</ChapterFlagHidden>
<ChapterFlagEnabled>1</ChapterFlagEnabled>
<!-- ChapterSegmentUID format="hex">${UIDS[$tnum.$chnum]}</ChapterSegmentUID -->
<ChapterDisplay>
<ChapterString>$tnum.$chnum (A${USED_A[$tnum]:- ∅} - S${USED_S[$tnum]:- ∅}) — ${CHNAMES[$tnum.$chnum]}</ChapterString>
</ChapterDisplay>
</ChapterAtom>
ENDOFXML
if [ "${titlelist/ }" != "$titlelist" ]; then
chflaghidden=1
fi
fi; done
chflaghidden=0
done
cat <<ENDOFXML
</EditionEntry>
ENDOFXML
edflagdefault=0
titlenum=$((titlenum+1))
done <"$BUILD/.titlelist"
cat <<ENDOFXML
</Chapters>
ENDOFXML
} >"$BUILD/mkv-editions.xml"
fi
# merge all chapters of all titles
if ! [ -f "$TARGET" ]; then
log_and_run 'Build final MKV file' \
mkvmerge --disable-track-statistics-tags --append-mode file --chapters "$BUILD/mkv-editions.xml" -o "$TARGET" "${MKVMRGARGS[@]}"
fi
if [ $? -eq 0 ] && [ -z "$KEEP_ALL" ]; then
rm -rf "$BUILD"
fi
# FIXME: HOW TO DETECT USELESS SUBTITLES? E.G.
# 3|fra,VOBSUB:bitmap,false,false,false Francais (Wide Screen) [VOBSUB] → Normal
# 5|fra,VOBSUB:bitmap,false,false,false Francais (Wide Screen) [VOBSUB] → ??
# 6|fra,VOBSUB:bitmap,false,false,false Francais (Wide Screen) [VOBSUB] → Commentary
# ⇓
# (about 4min for full movie for each subtitle track)
# $ HandBrakeCLI -t 1 -e x265_10bit --encoder-profile main10 -q 1 --vfr -X 128 -Y 96 -a none -s 3 --subtitle-burned=none -i .iso -o sub3.mkv
# $ HandBrakeCLI -t 1 -e x265_10bit --encoder-profile main10 -q 1 --vfr -X 128 -Y 96 -a none -s 5 --subtitle-burned=none -i .iso -o sub5.mkv
# $ HandBrakeCLI -t 1 -e x265_10bit --encoder-profile main10 -q 1 --vfr -X 128 -Y 96 -a none -s 6 --subtitle-burned=none -i .iso -o sub6.mkv
# (instantaneous)
# $ mkvextract sub3.mkv tracks --raw 1:sub3.sub
# $ mkvextract sub5.mkv tracks --raw 1:sub5.sub
# $ mkvextract sub6.mkv tracks --raw 1:sub6.sub
# $ ls -l sub*
# -rw-r--r-- 1 yves yves 287386078 8 mars 18:54 sub3.mkv
# -rw-r--r-- 1 yves yves 1058252 8 mars 19:35 sub3.sub
# -rw-r--r-- 1 yves yves 286304465 8 mars 19:07 sub5.mkv
# -rw-r--r-- 1 yves yves 2268 8 mars 19:35 sub5.sub
# -rw-r--r-- 1 yves yves 289029155 8 mars 19:12 sub6.mkv
# -rw-r--r-- 1 yves yves 2684952 8 mars 19:35 sub6.sub