#include "audiocaptureff.h" #ifdef __MINGW32__ std::string WString2String(const std::wstring& ws) { std::string strLocale = setlocale(LC_ALL, ""); const wchar_t* wchSrc = ws.c_str(); size_t nDestSize = wcstombs(NULL, wchSrc, 0) + 1; char *chDest = new char[nDestSize]; memset(chDest, 0, nDestSize); wcstombs(chDest, wchSrc, nDestSize); std::string strResult = chDest; delete[]chDest; setlocale(LC_ALL, strLocale.c_str()); return strResult; } #endif vector CaptureAudioFfmpeg::EnumSpeakers() { vector ret; std::vector names; IEnumMoniker *pEnum = nullptr; // Create the System Device Enumerator. ICreateDevEnum *pDevEnum; HRESULT hr = CoCreateInstance(CLSID_SystemDeviceEnum, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&pDevEnum)); if (SUCCEEDED(hr)) { // Create an enumerator for the category. hr = pDevEnum->CreateClassEnumerator(CLSID_AudioInputDeviceCategory, &pEnum, 0); if (hr == S_FALSE) { hr = VFW_E_NOT_FOUND; // The category is empty. Treat as an error. } pDevEnum->Release(); } if (!SUCCEEDED(hr)) return ret; IMoniker *pMoniker = nullptr; while (pEnum->Next(1, &pMoniker, nullptr) == S_OK) { IPropertyBag *pPropBag; IBindCtx* bindCtx = nullptr; LPOLESTR str = nullptr; VARIANT var; VariantInit(&var); HRESULT hr = pMoniker->BindToStorage(0, 0, IID_PPV_ARGS(&pPropBag)); if (FAILED(hr)) { pMoniker->Release(); continue; } // Get description or friendly name. hr = pPropBag->Read(L"Description", &var, 0); if (FAILED(hr)) { hr = pPropBag->Read(L"FriendlyName", &var, 0); } if (SUCCEEDED(hr)) { names.push_back(var.bstrVal); CaptureAudioFfmpeg::MICInfo ele; ele.name = var.bstrVal; ret.push_back(ele); VariantClear(&var); } pPropBag->Release(); pMoniker->Release(); } pEnum->Release(); return ret; } CaptureAudioFfmpeg::CaptureAudioFfmpeg(uint16_t rate, uint8_t channel) { mSampleRate = rate; mChanel = channel; } static char *dup_wchar_to_utf8(wchar_t *w) { char *s = NULL; int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0); s = (char *)av_malloc(l); if (s) WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0); return s; } int CaptureAudioFfmpeg::InitCapture(wstring url, uint16_t rate, uint8_t channel) { string fileAudioInput = dup_wchar_to_utf8((wchar_t *)url.c_str()); AVInputFormat* imft = av_find_input_format("dshow"); AVDictionary *format_opts = nullptr; av_dict_set_int(&format_opts, "audio_buffer_size", 20, 0); if (0 > avformat_open_input(&mInfmt_ctx, fileAudioInput.c_str(), imft, &format_opts)) { printf("failed input file\n"); return -1; } if (0 > avformat_find_stream_info(mInfmt_ctx, NULL)) { printf("failed find stream info\n"); avformat_close_input(&mInfmt_ctx); return -1; } int audio_index = -1; audio_index = av_find_best_stream(mInfmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0); if (-1 == audio_index) { printf("failed find best stream\n"); avformat_close_input(&mInfmt_ctx); return -1; } //av_dump_format(infmt_ctx, 0, fileAudioInput.c_str(), 1); //END输入文件 //打开解码器 static AVCodec* decodec = avcodec_find_decoder(mInfmt_ctx->streams[0]->codec->codec_id); if (!decodec) { printf("failed find decoder\n"); return -1; } if (0 > avcodec_open2(mInfmt_ctx->streams[0]->codec, decodec, NULL)) { printf("failed open decoder\n"); return -1; } //END解码器 //重采样初始化 initAudioFilters(); //END重采样初始化 //编码器 static AVCodec* codec = NULL; //codec = avcodec_find_encoder_by_name("libmp3lame"); codec = avcodec_find_encoder(AV_CODEC_ID_AAC); static AVCodecContext* codec_ctx = NULL; codec_ctx = avcodec_alloc_context3(codec); // codec_ctx->bit_rate = 64000; // inputContext->streams[0]->codec codec_ctx->codec = codec; codec_ctx->sample_rate = 48000; codec_ctx->channel_layout = 3; codec_ctx->channels = 2; //codec_ctx->frame_size = 1024; codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP; codec_ctx->codec_tag = 0; codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; if (0 > avcodec_open2(codec_ctx, codec, NULL)) { printf("failed open coder\n"); avformat_close_input(&mInfmt_ctx); avcodec_free_context(&codec_ctx); return -1; } //END编码器 //输出文件 AVFormatContext* outfmt_ctx = NULL; if (0 > avformat_alloc_output_context2(&outfmt_ctx, NULL, NULL, "aac.aac")) { printf("failed alloc outputcontext\n"); avformat_close_input(&mInfmt_ctx); avcodec_free_context(&codec_ctx); return -1; } AVStream* out_stream = avformat_new_stream(outfmt_ctx, codec_ctx->codec); if (!out_stream) { printf("failed new stream\n"); avformat_close_input(&mInfmt_ctx); avcodec_free_context(&codec_ctx); avformat_close_input(&outfmt_ctx); return -1; } avcodec_copy_context(out_stream->codec, codec_ctx); // if (0 > avio_open(&outfmt_ctx->pb, "rtmp://localhost/testlive", AVIO_FLAG_WRITE)) { if (0 > avio_open(&outfmt_ctx->pb, "aac.aac", AVIO_FLAG_WRITE)) { printf("failed to open outfile\n"); avformat_close_input(&mInfmt_ctx); avcodec_free_context(&codec_ctx); avformat_close_input(&outfmt_ctx); return -1; } avformat_write_header(outfmt_ctx, NULL); //END输出文件 #if 0 AVFrame* Frame = av_frame_alloc(); Frame->nb_samples = codec_ctx->frame_size; Frame->format = codec_ctx->sample_fmt; Frame->channel_layout = codec_ctx->channel_layout; int size = av_samples_get_buffer_size(NULL, codec_ctx->channels, codec_ctx->frame_size, codec_ctx->sample_fmt, 1); uint8_t* frame_buf = (uint8_t *)av_malloc(size); avcodec_fill_audio_frame(Frame, codec_ctx->channels, codec_ctx->sample_fmt, (const uint8_t*)frame_buf, size, 1); int64_t in_channel_layout = av_get_default_channel_layout(codec_ctx->channels); AVPacket pkt; av_new_packet(&pkt, size); pkt.data = NULL; int got_frame = -1; int delayedFrame = 0; static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2]; int audioCount = 0; const uint8_t *indata[AV_NUM_DATA_POINTERS] = { 0 }; AVFrame* Frame1 = av_frame_alloc(); #endif int loop = 1; int delayedFrame = 0; AVPacket packet; av_init_packet(&packet); packet.data = NULL; packet.size = 0; AVPacket pkt; av_init_packet(&pkt); pkt.data = NULL; pkt.size = 0; AVFrame* pSrcAudioFrame = av_frame_alloc(); int got_frame = 0; while (1) { av_read_frame(mInfmt_ctx, &packet); loop++; if (packet.stream_index == audio_index) { auto filterFrame = DecodeAudio(&packet, pSrcAudioFrame); if (filterFrame) { avcodec_encode_audio2(codec_ctx, &pkt, filterFrame, &got_frame); if (got_frame) { #if 1 auto streamTimeBase = outfmt_ctx->streams[pkt.stream_index]->time_base.den; auto codecTimeBase = outfmt_ctx->streams[pkt.stream_index]->codec->time_base.den; pkt.pts = pkt.dts = (1024 * streamTimeBase * mAudioCount) / codecTimeBase; mAudioCount++; auto inputStream = mInfmt_ctx->streams[pkt.stream_index]; auto outputStream = outfmt_ctx->streams[pkt.stream_index]; av_packet_rescale_ts(&pkt, inputStream->time_base, outputStream->time_base); #endif // pkt.stream_index = out_stream->index; av_interleaved_write_frame(outfmt_ctx, &pkt); av_packet_unref(&pkt); printf("output frame %3d\n", loop - delayedFrame); } else { delayedFrame++; av_packet_unref(&pkt); printf("no output frame\n"); } } } av_packet_unref(&packet); } flush_encoder(outfmt_ctx, 0); av_write_trailer(outfmt_ctx); //av_free(Frame); av_free(pSrcAudioFrame); avio_close(outfmt_ctx->pb); avformat_close_input(&mInfmt_ctx); //avformat_close_input(&outfmt_ctx); return 0; } int CaptureAudioFfmpeg::initAudioFilters() { char args[512]; int ret; AVFilter *abuffersrc = (AVFilter *)avfilter_get_by_name("abuffer"); AVFilter *abuffersink = (AVFilter *)avfilter_get_by_name("abuffersink"); AVFilterInOut *outputs = avfilter_inout_alloc(); AVFilterInOut *inputs = avfilter_inout_alloc(); auto audioDecoderContext = mInfmt_ctx->streams[0]->codec; if (!audioDecoderContext->channel_layout) audioDecoderContext->channel_layout = av_get_default_channel_layout(audioDecoderContext->channels); static const enum AVSampleFormat out_sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE }; static const uint64_t out_channel_layouts[] = { audioDecoderContext->channel_layout}; static const int out_sample_rates[] = { audioDecoderContext->sample_rate , -1 }; AVRational time_base = mInfmt_ctx->streams[0]->time_base; mFilterGraph = avfilter_graph_alloc(); mFilterGraph->nb_threads = 1; sprintf_s(args, sizeof(args), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x", time_base.num, time_base.den, audioDecoderContext->sample_rate, av_get_sample_fmt_name(audioDecoderContext->sample_fmt), audioDecoderContext->channel_layout); ret = avfilter_graph_create_filter(&mBuffersrcCtx, abuffersrc, "in", args, NULL, mFilterGraph); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer source\n"); return ret; } /* buffer audio sink: to terminate the filter chain. */ ret = avfilter_graph_create_filter(&mBuffersinkCtx, abuffersink, "out", NULL, NULL, mFilterGraph); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot create audio buffer sink\n"); return ret; } ret = av_opt_set_int_list(mBuffersinkCtx, "sample_fmts", out_sample_fmts, -1, AV_OPT_SEARCH_CHILDREN); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot set output sample format\n"); return ret; } ret = av_opt_set_int_list(mBuffersinkCtx, "channel_layouts", out_channel_layouts, -1, AV_OPT_SEARCH_CHILDREN); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot set output channel layout\n"); return ret; } ret = av_opt_set_int_list(mBuffersinkCtx, "sample_rates", out_sample_rates, -1, AV_OPT_SEARCH_CHILDREN); if (ret < 0) { av_log(NULL, AV_LOG_ERROR, "Cannot set output sample rate\n"); return ret; } /* Endpoints for the filter graph. */ outputs->name = av_strdup("in"); outputs->filter_ctx = mBuffersrcCtx;; outputs->pad_idx = 0; outputs->next = NULL; inputs->name = av_strdup("out"); inputs->filter_ctx = mBuffersinkCtx; inputs->pad_idx = 0; inputs->next = NULL; if ((ret = avfilter_graph_parse_ptr(mFilterGraph, "anull", &inputs, &outputs, nullptr)) < 0) return ret; if ((ret = avfilter_graph_config(mFilterGraph, NULL)) < 0) return ret; av_buffersink_set_frame_size(mBuffersinkCtx, 1024); return 0; } int CaptureAudioFfmpeg::flush_encoder(AVFormatContext *fmt_ctx, unsigned int stream_index) { int ret; int got_frame; AVPacket enc_pkt; if (!(fmt_ctx->streams[stream_index]->codec->codec->capabilities & 0x0020)) return 0; while (1) { enc_pkt.data = NULL; enc_pkt.size = 0; av_init_packet(&enc_pkt); ret = avcodec_encode_audio2(fmt_ctx->streams[stream_index]->codec, &enc_pkt, NULL, &got_frame); av_frame_free(NULL); if (ret < 0) break; if (!got_frame) { ret = 0; break; } printf("Flush Encoder: Succeed to encode 1 frame!\tsize:%5d\n", enc_pkt.size); /* mux encoded frame */ ret = av_write_frame(fmt_ctx, &enc_pkt); if (ret < 0) break; } return ret; } AVFrame *CaptureAudioFfmpeg::DecodeAudio(AVPacket *packet, AVFrame *pSrcAudioFrame) { AVStream * stream = mInfmt_ctx->streams[0]; AVCodecContext* codecContext = stream->codec; int gotFrame; AVFrame *filtFrame = nullptr; auto length = avcodec_decode_audio4(codecContext, pSrcAudioFrame, &gotFrame, packet); if (length >= 0 && gotFrame != 0) { if (av_buffersrc_add_frame_flags(mBuffersrcCtx, pSrcAudioFrame, AV_BUFFERSRC_FLAG_PUSH) < 0) { av_log(NULL, AV_LOG_ERROR, "buffe src add frame error!\n"); return nullptr; } filtFrame = av_frame_alloc(); int ret = av_buffersink_get_frame_flags(mBuffersinkCtx, filtFrame, AV_BUFFERSINK_FLAG_NO_REQUEST); if (ret < 0) { av_frame_free(&filtFrame); goto error; } return filtFrame; } error: return nullptr; }