郑州做企业网站的公司,广西建设工程招标网,网页制作app下载,网站建设都EasyNLP带你玩转CLIP图文检索 - 知乎作者#xff1a;熊兮、章捷、岑鸣、临在导读随着自媒体的不断发展#xff0c;多种模态数据例如图像、文本、语音、视频等不断增长#xff0c;创造了互联网上丰富多彩的世界。为了准确建模用户的多模态内容#xff0c;跨模态检索是跨模态…EasyNLP带你玩转CLIP图文检索 - 知乎作者熊兮、章捷、岑鸣、临在导读随着自媒体的不断发展多种模态数据例如图像、文本、语音、视频等不断增长创造了互联网上丰富多彩的世界。为了准确建模用户的多模态内容跨模态检索是跨模态理解的重要任务…https://zhuanlan.zhihu.com/p/528476134
initialize_easynlp()-train_dataset CLIPDataset(pretrained_model_name_or_pathget_pretrain_model_path(alibaba-pai/clip_chinese_roberta_base_vit_base),data_fileMUGE_MR_train_base64_part.tsv,max_seq_length32,input_schematext:str:1,image:str:1,first_sequencetext,second_sequenceimage,is_trainingTrue)
valid_dataset CLIPDataset()model get_application_model(app_nameclip,...)
- easynlp.appzoo.api.ModelMapping-CLIPApp
- easynlp.appzoo.clip.model.py-CLIPApp
- CHINESE_CLIP-
- self.visual VisualTransformer()
- self.bert BertModel()trainer Trainer(model,train_dataset,user_defined_parameters, evaluatorget_application_evaluator(app_nameclip,valid_datasetvalid_dataset,user_defined_parametersuser_defined_parameters,eval_batch_size32))trainer.train()
- for _epoch in range(self._first_epoch,int(args.epoch_num)):for _step,batch in enumerate(self._train_loader): label_ids batch.pop()forward_outputs self._model(batch)loss_dict self.model_module.compute_loss(forward_outputs,label_ids)_loss loss_dict(loss)_loss.backward()model get_application_model_evaluation()
evaluator get_application_evaluator()
evaluator.evaluate(model)
数据处理
import os
import base64
import multiprocessing
from tqdm import tqdmdef process_image(image_path):# 从图片路径中提取中文描述image_name os.path.basename(image_path)description os.path.splitext(image_name)[0]# 将图片转换为 Base64 编码with open(image_path, rb) as f:image_data f.read()base64_data base64.b64encode(image_data).decode(utf-8)return description, base64_datadef generate_tsv(directory):image_paths [os.path.join(directory, filename) for filename in os.listdir(directory) iffilename.endswith((.jpg, .png))]with multiprocessing.Pool() as pool, tqdm(totallen(image_paths), descProcessing Images) as pbar:results []for result in pool.imap_unordered(process_image, image_paths):results.append(result)pbar.update(1)with open(/home/image_team/image_team_docker_home/lgd/e_commerce_sd/data/vcg_furnitures_text_image/vcg_furnitures_train.tsv,w, encodingutf-8) as f:for description, base64_data in results:line f{description}\t{base64_data}\nf.write(line)if __name__ __main__:target_directory /home/image_team/image_team_docker_home/lgd/e_commerce_sd/data/vcg_furnitures_text_image/vcg_furnitures_train/img_download/# import pdb;pdb.set_trace()generate_tsv(target_directory)训练代码
import torch.cuda
from easynlp.appzoo import CLIPDataset
from easynlp.appzoo import get_application_predictor, get_application_model, get_application_evaluator, \get_application_model_for_evaluation
from easynlp.core import Trainer, PredictorManager
from easynlp.utils import initialize_easynlp, get_args, get_pretrain_model_path
from easynlp.utils.global_vars import parse_user_defined_parametersdef main():# /root/.easynlp/modelzoo中train_dataset CLIPDataset(pretrained_model_name_or_pathget_pretrain_model_path(args.pretrained_model_name_or_path),data_fileargs.tables.split(,)[0],max_seq_lengthargs.sequence_length,input_schemaargs.input_schema,first_sequenceargs.first_sequence,second_sequenceargs.second_sequence,is_trainingTrue)valid_dataset CLIPDataset(# 预训练模型名称路径这里我们使用封装好的get_pretrain_model_path函数来处理模型名称alibaba-pai/clip_chinese_roberta_base_vit_base以得到其路径并自动下载模型pretrained_model_name_or_pathget_pretrain_model_path(args.pretrained_model_name_or_path),data_fileargs.tables.split(,)[-1],# data/pai/MUGE_MR_valid_base64_part.tsvmax_seq_lengthargs.sequence_length, # 文本最大长度超过将截断不足将paddinginput_schemaargs.input_schema, # 输入tsv数据的格式逗号分隔的每一项对应数据文件中每行以\t分隔的一项每项开头为其字段标识如label、sent1等first_sequenceargs.first_sequence, # 用于说明input_schema中哪些字段作为第一/第二列输入数据second_sequenceargs.second_sequence,is_trainingFalse) # 是否为训练过程train_dataset为Truevalid_dataset为Falsemodel get_application_model(app_nameargs.app_name, # 任务名称这里选择文本分类clippretrained_model_name_or_pathget_pretrain_model_path(args.pretrained_model_name_or_path),user_defined_parametersuser_defined_parameters# user_defined_parameters用户自定义参数直接填入刚刚处理好的自定义参数user_defined_parameters)trainer Trainer(modelmodel,train_datasettrain_dataset,user_defined_parametersuser_defined_parameters,evaluatorget_application_evaluator(app_nameargs.app_name,valid_datasetvalid_dataset,user_defined_parametersuser_defined_parameters,eval_batch_size32))trainer.train()# 模型评估model get_application_model_for_evaluation(app_nameargs.app_name,pretrained_model_name_or_pathargs.checkpoint_dir,user_defined_parametersuser_defined_parameters)evaluator get_application_evaluator(app_nameargs.app_name,valid_datasetvalid_dataset,user_defined_parametersuser_defined_parameters,eval_batch_size32)model.to(torch.cuda.current_device())evaluator.evaluate(modelmodel)# 模型预测if test:predictor get_application_predictor(app_nameclip,model_dir./outputs/clip_model/,first_sequencetext,second_sequenceimage,sequence_length32,user_defined_parametersuser_defined_parameters)predictor_manager PredictorManager(predictorpredictor,input_filedata/vcg_furnitures_text_image/vcg_furnitures_test.tsv,input_schematext:str:1,output_filetext_feat.tsv,output_schematext_feat,append_colstext,batch_size2)predictor_manager.run()if __name__ __main__:initialize_easynlp()args get_args()user_defined_parameters parse_user_defined_parameters(pretrain_model_name_or_pathalibaba-pai/clip_chinese_roberta_base_vit_base)args.checkpoint_dir ./outputs/clip_model/args.pretrained_model_name_or_path alibaba-pai/clip_chinese_roberta_base_vit_base# args.n_gpu 3# args.worker_gpu 1,2,3args.app_name clipargs.tables data/pai/MUGE_MR_train_base64_part.tsv,data/pai/MUGE_MR_valid_base64_part.tsv# data/vcg_furnitures_text_image/vcg_furnitures_train.tsv, \# data/vcg_furnitures_text_image/vcg_furnitures_test.tsv# data/pai/MUGE_MR_train_base64_part.tsv,data/pai/MUGE_MR_valid_base64_part.tsvargs.input_schema text:str:1,image:str:1args.first_sequence textargs.second_sequence imageargs.learning_rate 1e-4args.epoch_num 1000args.random_seed 42args.save_checkpoint_steps 200args.sequence_length 32# args.train_batch_size 2args.micro_batch_size 32test Falsemain()# python -m torch.distributed.launch --nproc_per_node 4 tools/train_pai_chinese_clip.py
说一点自己的想法在我自己工作之初我很喜欢去拆解一些框架例如openmm系列但其实大部分在训练过程上都是相似的大可不必在改动上也没有必要对其进行流程上的大改动兼具百家之长了解整体pipeline更加专注在pipeline实现和效果导向型的结果提交更加有效。