前言
#转为Tensor 数据类型
tf.convert_to_tensor(my_np_array, dtype=tf.float32)
torch.FloatTensor(py_list)
import pandas as pd
def file_process(file_path):
with open(file_path,'r',encoding='utf_8') as f:
labels=[]
texts=[]
for line in f:
line=line.split(' ')
labels.append(line[0])
texts.append(line[1])
return labels,texts
def describe_file(file_path):
labels,texts=file_process(file_path)
file_df=pd.Dataframe({'label':labels,'text':texts})
file_length=file_df['text'].apply(lambda x:len(x))
return file_df
test_df=describe_file(path)
print(test_df)
#encoding part
!pip install transformers
from transformers import BertTokenizer, TFBertModel
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = TFBertModel.from_pretrained("bert-base-uncased")
test_text=[text for text in test_df['text']]
encoded_input = tokenizer(test_text,padding=True ,return_tensors='tf')#tokenizer 处理(list(str)或者str type)
output = model(encoded_input)
import tensorflow as tf
y_test=[int(label) for label in test_df['label']]
y_train=output['pooler_output']
y_test=tf.convert_to_tensor(y_test, dtype=tf.float32)
# y_test=torch.FloatTensor(y_test)
print(output['pooler_output'].shape,len(y_test))
print(type(y_test),type(y_train))
#model part
from keras.models import Sequential,Model
from keras.layers import LSTM, Dense, Embedding, Dropout,Input
from tensorflow.keras.optimizers import Adam
def build_classifier_model():
x_input=Input(shape=(768,))
x_out=Dense(4,activation='relu')(x_input)
x_out=Dense(1,activation='softmax')(x_out)
return Model(x_input,x_out)
classifier_model=build_classifier_model()
print(classifier_model.summary())
classifier_model.compile(loss='categorical_crossentropy',
optimizer=Adam(),
metrics=['accuracy'])
classifier_model.fit(y_train,y_test,epochs=2)