树莓派语音识别

参考：

http://bbs.elecfans.com/jishu_543262_1_1.html

配置：

喇叭：3.5音频口

麦克风：摄像头自带麦克风（Usb）

1. 安装需要的软件

1 2	sudo apt-get install python-pip mplayer flac python2.7-dev libcurl4-gnutls-dev sudo pip install requests pycurl

2. 配置配置麦克风和音箱

$ cat /proc/asound/cards

0 [ALSA           ]: bcm2835 - bcm2835 ALSA
                      bcm2835 ALSA
1 [Camera         ]: USB-Audio - USB 2.0 Camera
                      Sonix Technology Co., Ltd. USB 2.0 Camera at usb-3f980000.usb-1.2, high speed
                      
$ cat /proc/asound/modules
 0 snd_bcm2835
 1 snd_usb_audio

第一个命令查看声卡是否存在

第二个命令应该是看声卡的驱动的

调整输入输出音频工具alsamixer：

sudo alsamixer

3. 播放声音

aplay make.wav

结果声音没有播放

网上查了，

By default output will be automatic (hdmi if hdmi supports audio, otherwise analogue). You can force it with:

amixer cset numid=3
where n is 0=auto, 1=headphones, 2=hdmi.

If you have pulseaudio installed you need to also specify the card number:

amixer -c 0 cset numid=3

http://elinux.org/R-Pi_Troubleshooting#Sound_does_not_work_at_all.2C_or_in_some_applications

所以运行

amixer cset numid=3 1
就可以听到3.5输出了

4. 麦克风测试

sudo arecord –duration=10 –device=plughw:1,0 –format=cd aaa.wav

指令中的’–device=plughw:1,0’ 选项指派特定的录音设备，而Raspberry Pi的内置音效设备是’plughw:0,0’

使用百度api进行语音播放：

5. 使用百度API进行语音识别和播放

http://yuyin.baidu.com/docs/tts/135

测试取的token：

import requests
import json
url = 'https://openapi.baidu.com/oauth/2.0/token'

payload = {'grant_type':'client_credentials',
           'client_id':'ZEzwlE2gipnqoE3jqiCKN7SN',
           'client_secret':'KOyWkyE57qWnDnFoc5afwIk0zBMw2X1p'}

r = requests.post(url,data=payload)

print json.loads(r.text)['access_token']

测试ok后，开始正式写：

https://github.com/xuqi1987/10.voice

大致思路：
将请求的token保存到文件中，然后读取文件中的accesstoken,判断token的日期是否有效，无效重新请求token。

代码：
app.py

# -*- coding:utf8 -*-
import requests
import os
import json
import time
import uuid
import base64

class API(object):

    def __init__(self):
        dic = {
            'hello':'欢迎使用语音播放',
            'error':'识别错误',
        }
        for key in dic:
            self.txt2audio(dic[key],key)
            if key == 'hello':
                self.play(key)
        pass

    def get_token(self):

        token = None
        if os.path.exists('token'):
            file = open('token','r')
            token = file.read()
            file.close()


            if json.loads(token)['expires'] < time.time():
                token = self.req_token()
            else:
                print "load cache token"
            pass
        else:
            token = self.req_token()

        return json.loads(token)['access_token']

    def req_token(self):
        print "request new token"
        token = None
        url = 'https://openapi.baidu.com/oauth/2.0/token'
        key = 'ZEzwlE2gipnqoE3jqiCKN7SN'
        secret = 'KOyWkyE57qWnDnFoc5afwIk0zBMw2X1p'
        payload = {'grant_type':'client_credentials',
                   'client_id':key,
                   'client_secret':secret}

        r = requests.post(url,data=payload)
        token = r.text
        jtoken = json.loads(token)
        jtoken['expires'] = jtoken['expires_in'] + time.time()
        token = json.dumps(jtoken)

        file = open('token','w')
        file.write(token)
        file.close()
        return token


    def txt2audio(self,txt,audio='tmp'):
        access_token = self.get_token()
        url = "http://tsn.baidu.com/text2audio"
        payload = {
            'tex':txt,
            'lan':'zh',
            'tok':access_token,
            'ctp':1,
            'cuid':uuid.UUID(int = uuid.getnode()).hex,
            'spd':5,
            'pit':8,
            'vol':9,
            'per':1,
        }

        r = requests.post(url,payload)

        if r.headers['Content-Type'] == 'audio/mp3':
            file = open(audio + '.mp3','w')
            file.write(r.content)
            file.close()

        else:
            self.play('error')
            print r.text
        pass

    def audio2txt(self,file='rec'):
        access_token = self.get_token()
        url="http://vop.baidu.com/server_api"

        self.txt2audio('请说:')
        self.play('tmp')

        os.system('arecord --duration=2 -c 1 --device=plughw:1,0 -f S16_LE -t wav -v  --rate=16000 ' + file + '.wav')

        f = open(file + '.wav','r')
        sound = f.read()
        f.close()

        payload = {
        'format':'wav',
        'rate':16000,
        'channel':1,
        'cuid':uuid.UUID(int = uuid.getnode()).hex,
        'token':access_token,
        'lan':'zh',
        'speech':base64.b64encode(sound),
        'len':len(sound),

        }
        header = {'Content-Type':'application/json'}


        r = requests.post(url,data=json.dumps(payload),headers=header)
        print r.text
	jret = json.loads(r.text)
	if jret.has_key('result'):
        	ret = json.loads(r.text)['result'][0].split(',')[0]
        else:
		ret = ""
	return ret
        pass

    def play(self,file='tmp'):
        # can not play the mp3 by aplay
        # os.system("aplay " + file + ".mp3")
        os.system("mpg321 " + file + ".mp3")
        pass

baidu.py

# -*- coding:utf8 -*-
from baidu import API
import RPi.GPIO as GPIO
import time
import requests
import uuid
import json
pin = 13

GPIO.setmode(GPIO.BCM)
GPIO.setup(pin, GPIO.IN)

baidu = API()
# baidu.txt2audio("你好")
# baidu.play('tmp')


def get_tuling(text):

    url='http://www.tuling123.com/openapi/api'
    data={'key':'fa78fe2fbb85c914c7126d42bc7c3ebb','info':text,'userid':str(uuid.UUID(int = uuid.getnode()).hex,)}
    r = requests.post(url,data=data)
    ans = json.loads(r.text)
    return ans

while True:

    change = False

    while GPIO.input(pin) == GPIO.LOW:
        change = True
        time.sleep(0.005)

    if change == True:
        #baidu.txt2audio('我听着,请说指令:')
        text = baidu.audio2txt()
        print text
	if len(text) > 0:
        	rep = get_tuling(text)
        	rep = rep['text']
		print rep
	else:
		rep = "我没有听清"
	baidu.txt2audio(rep)
	baidu.play()	
	change = False
	
    time.sleep(1)

遇到坑：

mp3格式只能通过mpg321播放
读取文件token = file.readlines()[0]，是一个列表，所以需要先取第一个再用json转。