#!/usr/bin/env?python3
創(chuàng)新互聯(lián)建站服務(wù)項(xiàng)目包括安平網(wǎng)站建設(shè)、安平網(wǎng)站制作、安平網(wǎng)頁制作以及安平網(wǎng)絡(luò)營(yíng)銷策劃等。多年來,我們專注于互聯(lián)網(wǎng)行業(yè),利用自身積累的技術(shù)優(yōu)勢(shì)、行業(yè)經(jīng)驗(yàn)、深度合作伙伴關(guān)系等,向廣大中小型企業(yè)、政府機(jī)構(gòu)等提供互聯(lián)網(wǎng)行業(yè)的解決方案,安平網(wǎng)站推廣取得了明顯的社會(huì)效益與經(jīng)濟(jì)效益。目前,我們服務(wù)的客戶以成都為中心已經(jīng)輻射到安平省份的部分城市,未來相信會(huì)繼續(xù)擴(kuò)大服務(wù)區(qū)域并繼續(xù)獲得客戶的支持與信任!
#-*-?coding:utf-8?-*-
import?os,random
#假設(shè)要讀取文件名為aa,位于當(dāng)前路徑
filename='aa.txt'
dirname=os.getcwd()
f_n=os.path.join(dirname,filename)
#注釋掉的程序段,用于測(cè)試腳本,它生成20行數(shù)據(jù),每行有1-20隨機(jī)個(gè)數(shù)字,每個(gè)數(shù)字隨機(jī)1-20
'''
test=''
for?i?in?range(20):
for?j?in?range(random.randint(1,20)):
test+=str(random.randint(1,20))+'?'
test+='\n'
with?open(f_n,'w')?as?wf:
wf.write(test)
'''
with?open(f_n)?as?f:
s=f.readlines()
#將每一行數(shù)據(jù)去掉首尾的空格和換行符,然后用空格分割,再組成一維列表
words=[]
for?line?in?s:
words.extend(line.strip().split('?'))
#格式化要輸出的每行數(shù)據(jù),首尾各占8位,中間占18位
def?geshi(a,b,c):
return?alignment(str(a))+alignment(str(b),18)+alignment(str(c))+'\n'
#中英文混合對(duì)齊?,參考?,二樓
#漢字與字母?格式化占位?format對(duì)齊出錯(cuò)?對(duì)不齊?漢字對(duì)齊數(shù)字?漢字對(duì)齊字母?中文對(duì)齊英文
#alignment函數(shù)用于英漢混合對(duì)齊、漢字英文對(duì)齊、漢英對(duì)齊、中英對(duì)齊
def?alignment(str1,?space=8,?align?=?'left'):
length?=?len(str1.encode('gb2312'))
space?=?space?-?length?if?space?=length?else?0
if?align?in?['left','l','L','Left','LEFT']:
str1?=?str1?+?'?'?*?space
elif?align?in?['right','r','R','Right','RIGHT']:
str1?=?'?'*?space?+str1
elif?align?in?['center','c','C','Center','CENTER','centre']:
str1?=?'?'?*?(space?//2)?+str1?+?'?'*?(space?-?space?//?2)
return?str1
w_s=geshi('序號(hào)','詞','頻率')
#由(詞,頻率)元組構(gòu)成列表,先按頻率降序排序,再按詞升序排序,多級(jí)排序,一組升,一組降,高級(jí)sorted
wordcount=sorted([(w,words.count(w))?for?w?in?set(words)],key=lambda?l:(-l[1],l[0]))
#要輸出的數(shù)據(jù),每一行由:序號(hào)(占8位)詞(占20位)頻率(占8位)+'\n'構(gòu)成,序號(hào)=List.index(element)+1
for?(w,c)?in?wordcount:????
w_s+=geshi(wordcount.index((w,c))+1,w,c)
#將統(tǒng)計(jì)結(jié)果寫入文件ar.txt中
writefile='ar.txt'
w_n=os.path.join(dirname,writefile)
with?open(w_n,'w')?as?wf:
wf.write(w_s)
import?re
from?itertools?import?imap?as?map
from?collections?import?Counter
def?parserwords(sentence):
preword?=?''
result?=?[]
for?word?in?re.findall('\w+',?sentence.lower()):
if?preword:
result.append((preword,?word))
preword?=?word
return?result
context?=?"""
Do?you?hear?the?people?sing,?singing?a?song?of?angry?men.?
It?is?the?music?of?a?people,?who?will?not?be?slaves?again,?
when?the?beating?of?your?heart?echoes?the?beating?of?the?drums.?
There?is?a?life?about?to?start?when?tomorrow?comes.
"""
words?=?[]
for?sentence?in?map(parserwords,?
re.split(r'[,.]',?context.lower())):
words.extend(sentence)
prefixcounter?=?Counter([word[0]?for?word?in?words])
counter?=?Counter(words)
meter?=?{}
for?pre,?post?in?counter.iterkeys():
meter[(pre,?post)]?=?1.?*?counter[(pre,?post)]?/?prefixcounter[pre]
result?=?sorted(meter.iteritems(),
cmp?=?lambda?a,?b:?cmp(b[1],?a[1])?or?cmp(a[0],?b[0])
)
print?result[:5]
使用結(jié)巴分詞,統(tǒng)計(jì)頻率可以使用Counter,即from collections import Counter
def statistics(astr):
# astr.replace("\n", "")
slist = list(astr.split("\t"))
alist = []
[alist.append(i) for i in slist if i not in alist]
alist[-1] = alist[-1].replace("\n", "")
return alist
if __name__ == "__main__":
code_doc = {}
with open("test_data.txt", "r", encoding='utf-8') as fs:
for ln in fs.readlines():
l = statistics(ln)
for t in l:
if t not in code_doc:
code_doc.setdefault(t, 1)
else:
code_doc[t] += 1
for keys in code_doc.keys():
print(keys + ' ' + str(code_doc[keys]))
#下載一文到words.txt,內(nèi)容為(stu?ml?ds?ml?stu?stuee?zkz?wxj?Zkz?Wxj)
File?=?"words.txt"
number_list?=[]
with?open(File)?as?f:
for?line?in?f:
number_list.extend(?str(i)?for?i?in?line.split())
for?item?in?set(number_list):
L=[item,number_list.index(item),number_list.count(item)]
print(L)??#單詞??首次出現(xiàn)的位置??詞頻
with?open('Q1.txt','a')?as?F:
F.writelines(str(L))
#!?python3
#?-*-?coding:?utf-8?-*-
import?os,?codecs
import?jieba
from?collections?import?Counter
def?get_words(txt):
seg_list?=?jieba.cut(txt)
c?=?Counter()
for?x?in?seg_list:
if?len(x)1?and?x?!=?'\r\n':
c[x]?+=?1
print('常用詞頻度統(tǒng)計(jì)結(jié)果')
for?(k,v)?in?c.most_common(100):
print('%s%s?%s??%d'?%?('??'*(5-len(k)),?k,?'*'*int(v/3),?v))
if?__name__?==?'__main__':
with?codecs.open('19d.txt',?'r',?'utf8')?as?f:
txt?=?f.read()
get_words(txt)