import type { RecordRTCPromisesHandler } from "recordrtc"
import hark from "hark"

export default () => {
	const recorder = ref<RecordRTCPromisesHandler>()
	const audioBlob = ref<Blob>()
	const stream = ref<MediaStream>()
	const speechEvents = ref<hark.Harker>()
	const onSpeaking = ref<Function>()
	const onStoppedSpeaking = ref<Function>()
	const onDataAvailable = ref<(blob: Blob) => void>()
	const onAudioBufferAvailable = ref<(audioBuffer: AudioBuffer) => void>()

	const prevDataChunk = ref<Blob>()
	const isSpokeInChunk = ref<boolean>()
	const onActiveDataAvailable = ref<(blob: Blob[]) => void>()
	const onActiveAudioBufferAvailable = ref<(audioBuffer: AudioBuffer[]) => void>()

	const init = async (timeSlice?: number) => {
		const { RecordRTCPromisesHandler, StereoAudioRecorder } = await import("recordrtc")
		stream.value = await navigator.mediaDevices.getUserMedia({
			video: false,
			audio: true
		})
		if (!stream.value) return false
		stream.value = stream.value.clone()

		// voice activity detection
		speechEvents.value = hark(stream.value, {
			play: false,
			interval: 150,
			smoothing: 0.1,
			threshold: -60
		})
		speechEvents.value.on("speaking", () => {
			onSpeaking.value?.()
			isSpokeInChunk.value = true
		})
		speechEvents.value.on("stopped_speaking", () => {
			onStoppedSpeaking.value?.()
		})
		recorder.value = new RecordRTCPromisesHandler(stream.value, {
			type: "audio",
			mimeType: "audio/wav",
			numberOfAudioChannels: 1,
			recorderType: StereoAudioRecorder,
			bufferSize: 2048,
			timeSlice,
			desiredSampRate: 16000,
			ondataavailable: async (blob) => {
				if (isSpokeInChunk.value) {
					onDataAvailable.value?.(blob)
					onActiveDataAvailable.value?.([prevDataChunk.value, blob].filter((v) => v) as Blob[])
					if (onAudioBufferAvailable.value) {
						onAudioBufferAvailable.value(await convertBlobToAudioBuffer(blob, 16000))
					}
					if (onActiveAudioBufferAvailable.value) {
						onActiveAudioBufferAvailable.value(
							[
								prevDataChunk.value && (await convertBlobToAudioBuffer(prevDataChunk.value, 16000)),
								await convertBlobToAudioBuffer(blob, 16000)
							].filter((v) => v) as AudioBuffer[]
						)
					}
				}
				prevDataChunk.value = blob
				isSpokeInChunk.value = false
			}
		})

		return true
	}

	const start = async (timeSlice?: number) => {
		await init(timeSlice)
		if (!recorder.value) return false
		await recorder.value?.startRecording()
		audioBlob.value = new Blob()
		return true
	}

	const stop = async () => {
		await recorder.value?.stopRecording()
		speechEvents.value?.stop()
		stream.value = undefined
		return true
	}

	const getBlob = async () => {
		return await recorder.value?.getBlob()
	}

	const getFile = async () => {
		const blob = await getBlob()
		return blob
			? new File([blob], "audio.wav", {
					type: blob.type,
					lastModified: new Date().valueOf()
			  })
			: null
	}

	const convertBlobToAudioBuffer = (blob: Blob, sampleRate: number) => {
		return new Promise<AudioBuffer>((resolve, reject) => {
			var fileReader = new FileReader()
			fileReader.onload = (e: any) => {
				var arrayBuffer = e.target.result
				decodeAudioData(arrayBuffer)
			}
			fileReader.readAsArrayBuffer(blob)
			const decodeAudioData = async (arrayBuffer: ArrayBuffer) => {
				// @ts-ignore
				const offlineContext = new (window.AudioContext || window.webkitAudioContext)({
					sampleRate
				})
				offlineContext.decodeAudioData(
					arrayBuffer,
					function (audioBuffer) {
						resolve(audioBuffer)
					},
					function (err) {
						reject(err)
					}
				)
			}
		})
	}

	return {
		recorder,
		audioBlob,
		stream,
		init,
		start,
		stop,
		getBlob,
		getFile,
		onSpeaking,
		onStoppedSpeaking,
		onDataAvailable,
		onAudioBufferAvailable,
		onActiveDataAvailable,
		onActiveAudioBufferAvailable
	}
}
