操作一个很简单的图
//创建你自己
CREATE (you:Person {name:"You"})
RETURN you
//你喜欢Neo4j数据库
MATCH (you:Person {name:"You"})
CREATE (you)-[like:LIKE]->(neo:Database {name:"Neo4j" })
RETURN you,like,neo
//创建你的朋友们
MATCH (you:Person {name:"You"})
FOREACH (name in ["Johan","Rajesh","Anna","Julia","Andrew"] |
CREATE (you)-[:FRIEND]->(:Person {name:name}))
//查找你的朋友
MATCH (you {name:"You"})-[:FRIEND]->(yourFriends)
RETURN you, yourFriends
//增加一个深度 创建一个neo4j专家Amanda,他是你朋友Anna的朋友哦
MATCH (neo:Database {name:"Neo4j"})
MATCH (anna:Person {name:"Anna"})
CREATE (anna)-[:FRIEND]->(:Person:Expert {name:"Amanda"})-[:WORKED_WITH]->(neo)
//查找到名词是Neo4j的Database,使用Neo4j工作的专家,以及你和专家的最短路径
MATCH (you {name:"You"})
MATCH (expert)-[:WORKED_WITH]->(db:Database {name:"Neo4j"})
MATCH path = shortestPath( (you)-[:FRIEND*..5]-(expert) )
RETURN db,expert,path
更多操作
首先创建一下这个演员-电影关系图:
// 这个不全的 最好执行neo4j示例的代码创建
LOAD CSV WITH HEADERS FROM "https://neo4j-contrib.github.io/developer-resources/cypher/movies_actors.csv" AS line
WITH line
WHERE line.job = "ACTED_IN"
MERGE (m:Movie {title:line.title}) ON CREATE SET m.released = toInt(line.released), m.tagline = line.tagline
MERGE (p:Person {name:line.name}) ON CREATE SET p.born = toInt(line.born)
MERGE (p)-[:ACTED_IN {roles:split(line.roles,";")}]->(m)
RETURN count(*);
//创建一个节点
CREATE (me:Person {name:"My Name"})
//返回所有的节点
MATCH (n)
RETURN n
//删除某节点和与其有关的所有关系
MATCH (me:Person {name:"My Name"})
OPTIONAL MATCH (me)-[r]-()
DELETE me,r
//排序 比如要找最老的那个人
MATCH (person:Person)
RETURN person.name, person.born
ORDER BY person.born
//跳过和限制 可以用于分页 减少查询的结果
MATCH (actor:Person)-[:ACTED_IN]->(movie:Movie)
RETURN actor.name AS Actor, movie.title AS Movie
SKIP 10 LIMIT 10
//使用Distinct 因为同一个演员可能演过很多不同电影,返回的演员会有重复,增加Distinct去重
MATCH (actor:Person)-[:ACTED_IN]->()
RETURN DISTINCT actor
ORDER BY actor.born
LIMIT 5
//使用where
MATCH (person:Person)
WHERE person.name = "Tom Hanks"
RETURN person
//效果和下面这个一样
MATCH (person:Person {name:"Tom Hanks"})
RETURN person
//使用比较过滤 如查询和Tom Hanks一起演过电影并且年龄比他小的演员
MATCH (tom:Person)-[:ACTED_IN]->()<-[:ACTED_IN]-(actor:Person)
WHERE tom.name="Tom Hanks"
AND actor.born < tom.born
RETURN actor.name AS Name
// 查找和Tom Hanks一起演过电影并且年龄比他小的演员,以及比他小几岁,并且去除重复记录
MATCH (tom:Person {name:"Tom Hanks"})-[:ACTED_IN]->(movie:Movie),
(movie)<-[:ACTED_IN]-(actor:Person) // note how the line break doesn't break the query
WHERE actor.born < tom.born
RETURN DISTINCT actor.name AS Name, (tom.born - actor.born) AS diff
//增加一个关系
match (ce:Person{name:"Clint Eastwood"}),(un:Movie{title:"Unforgiven"})
create (ce)-[:DIRECTED]->(un)
//查找和Gene Hackman一起合作过的演员
MATCH (gene:Person)-[:ACTED_IN]->()<-[:ACTED_IN]-(other:Person)
WHERE gene.name="Gene Hackman"
RETURN DISTINCT other
//找到一起合作过的演员兼导演
MATCH (gene:Person)-[:ACTED_IN]->()<-[:ACTED_IN]-(other:Person)
WHERE gene.name="Gene Hackman"
AND exists( (other)-[:DIRECTED]->() ) //加个not就把导演过滤掉
RETURN DISTINCT other
小练习
1.找到所有的Tom Hanks出演的电影
match (tom:Person{name:'Tom Hanks'})-[:ACTED_IN]->(movie) return tom,movie
2.找到所有Tom Hanks 2000年以后演的电影
match (tom:Person{name:'Tom Hanks'})-[:ACTED_IN]->(movie)
where movie.released > 2000
return tom,movie
//或者这样 都写到where条件里
MATCH (tom:Person)-[:ACTED_IN]->(movie)
WHERE tom.name="Tom Hanks"
AND movie.released > 2000
RETURN tom,movie
3.查找Keanu Reeves扮演角色Neo的所有电影
match (keanu :Person{name:'Keanu Reeves'})-[r:ACTED_IN]->(movie)
where 'Neo' in r.roles
return keanu,movie
//或者这样写
MATCH (keanu:Person)-[r:ACTED_IN]->(movie)
WHERE keanu.name="Keanu Reeves"
AND "Neo" IN r.roles
RETURN movie.title
路径匹配
(movie)<-[:DIRECTED]-(director)
//也可以反过来写
(director)-[:DIRECTED]->(movie)
//可变长路径
MATCH (node1)-[*]-(node2)
//遍历任意长度的关系
(a)-[*]->(b)
//查找depth深度的关系
(a)-[*depth]->(b)
//1-4级深度的关系
(a)-[*1..4]->(b)
//3级深度的KNOWS关系
(a)-[:KNOWS*3]->(b)
//KNOWS关系或2级深度的LIKES关系
(a)-[:KNOWS|:LIKES*2..]->(b)
路径命名作为Return返回值
MATCH p=(actor:Person)-[:ACTED_IN]->(movie:Movie)<-[:DIRECTED]-(director:Person)
RETURN p
MATCH p=(actor:Person)-[:ACTED_IN]->(movie:Movie)<-[:DIRECTED]-(director:Person)
RETURN nodes(p)
MATCH p=(actor:Person)-[:ACTED_IN]->(movie:Movie)<-[:DIRECTED]-(director:Person)
RETURN rels(p)
MATCH p1=(actor:Person)-[:ACTED_IN]->(movie:Movie), p2=(director:Person)-[:DIRECTED]->(movie)
RETURN p1, p2
索引和标签
与其他数据库不同,Neo4j不使用索引来加快JOIN
操作。但是,它们对于通过值,文本前缀或范围找到起点很有用。您将创建一个标签特定索引,因为索引绑定到具体的标签属性组合。
因此,如果您希望Movies
能够基于title
更高效的搜索,则可以运行以下Cypher命令:
CREATE INDEX ON :Movie(title)
//删除索引
DROP INDEX ON :Movie(title)
聚合
//count(x) 查找每个演员演的电影的数量
match(p:Person)-[:ACTED_IN]->(movie:Movie)
return p.name,count(movie) as num
order by num desc
//collect(x) 返回每个演员拍过的电影集合
MATCH (person:Person)-[:ACTED_IN]->(movie:Movie)
RETURN person.name, collect(movie.title)
// 返回每个演员合作过导演的集合
MATCH (person:Person)-[:ACTED_IN]->(movie:Movie)<-[:DIRECTED]-(director:Person)
RETURN person.name, collect(director.name)
// 如果演员和导演合作过多次 那个导演会出现多次 去重的话这样写
MATCH (person:Person)-[:ACTED_IN]->(movie:Movie)<-[:DIRECTED]-(director:Person)
RETURN person.name, collect(DISTINCT director.name) as directors
// 查找最忙的5个演员
MATCH (actor:Person)-[:ACTED_IN]->(movie)
RETURN actor.name, count(movie)
ORDER BY count(movie) DESC
LIMIT 5
推荐引擎
// 写一个牛逼一点的关系
MATCH (keanu:Person {name:"Keanu Reeves"})-[:ACTED_IN]->()<-[:ACTED_IN]-(coworker:Person),
(coworker)-[:ACTED_IN]->()<-[:ACTED_IN]-(coworkerOfCoworker)
WHERE coworkerOfCoworker <> keanu AND NOT((keanu)-[:ACTED_IN]->()<-[:ACTED_IN]-(coworkerOfCoworker))
RETURN coworkerOfCoworker.name, count(coworkerOfCoworker)
ORDER BY count(coworkerOfCoworker) DESC
LIMIT 3
导入数据
Cypher提供了一种优雅的内置方法将表格CSV数据导入图形结构。
ex:将一个下面这样的movies.csv导入图,实际上就是按行读取,将内容塞进创建语句的循环
ID,标题,国家,一年
1,美国华尔街1987年
2,美国总统,美国,1995年
3,肖申克救赎,美国,1994年
LOAD CSV WITH HEADERS
FROM "http://neo4j.com/docs/stable/csv/intro/movies.csv"
AS line
CREATE (movie:Movie { id:line.id, title:line.title, released:toInt(line.year) })
persons.csv
ID,名称
1,查理恩
2,奥利佛石
3,迈克尔·道格拉斯
4,Martin Sheen
5,摩根·弗里曼
LOAD CSV WITH HEADERS
FROM "http://neo4j.com/docs/stable/csv/intro/persons.csv"
AS line
MERGE (actor:Person { id:line.id })
ON CREATE SET actor.name=line.name;
roles.csv
PERSONID,movieId,角色
1,1,Bud Fox
4,1,卡尔·福克斯
3,1,Gordon Gekko
4,2,AJ MacInerney
3,2,安德鲁·谢弗德总统
5,3,Ellis Boyd'Red'Redding
LOAD CSV WITH HEADERS
FROM "http://neo4j.com/docs/stable/csv/intro/roles.csv"
AS line
MATCH (movie:Movie { id:line.movieId })
MATCH (person:Person { id:line.personId })
CREATE (person)-[:ACTED_IN { roles: [line.role]}]->(movie);
导入非正规化数据
movie_actor_roles.csv
称号;释放;演员;负担;字符
回到未来; 1985; Michael J. Fox; 1961; Marty McFly
回到未来; 1985;克里斯托弗·劳埃德; 1938年;博士。Emmet Brown
LOAD CSV WITH HEADERS
FROM "http://neo4j.com/docs/stable/csv/intro/movie_actor_roles.csv"
AS line FIELDTERMINATOR ";"
MERGE (movie:Movie { title:line.title })
ON CREATE SET movie.released = toInt(line.released)
MERGE (actor:Person { name:line.actor })
ON CREATE SET actor.born = toInt(line.born)
MERGE (actor)-[r:ACTED_IN]->(movie)
ON CREATE SET r.roles = split(line.characters,",")
导入大型数据集
如果您导入更大量的数据(超过10000行),建议在您的LOAD CSV
子句前面加一个PERIODIC COMMIT
提示。这允许数据库定期提交导入事务,以避免大型事务状态的内存流失。
更多内容请参考 官方文档
网友评论